patch inference on CPU & Windows + Update README snippets (#2)
Browse files- Remove reference_compile; set model max length to avoid warning (0e4036849927b9bac2cbc06f35f7cad173d32145)
- README.md +31 -16
- config.json +0 -1
- tokenizer_config.json +1 -1
README.md
CHANGED
@@ -6,6 +6,8 @@ base_model:
|
|
6 |
- answerdotai/ModernBERT-base
|
7 |
pipeline_tag: sentence-similarity
|
8 |
library_name: transformers
|
|
|
|
|
9 |
---
|
10 |
|
11 |
# gte-reranker-modernbert-base
|
@@ -32,28 +34,39 @@ The `gte-modernbert` models demonstrates competitive performance in several text
|
|
32 |
|
33 |
## Usage
|
34 |
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
```python
|
37 |
-
# Requires transformers>=4.
|
38 |
import torch
|
39 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
40 |
|
41 |
-
model_name_or_path =
|
42 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
43 |
model = AutoModelForSequenceClassification.from_pretrained(
|
44 |
-
model_name_or_path,
|
45 |
-
torch_dtype=torch.float16
|
46 |
)
|
47 |
model.eval()
|
48 |
|
49 |
-
pairs = [
|
|
|
|
|
|
|
|
|
50 |
|
51 |
with torch.no_grad():
|
52 |
inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
|
53 |
scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
|
54 |
print(scores)
|
55 |
|
56 |
-
# tensor([
|
57 |
```
|
58 |
Use with `sentence-transformers`:
|
59 |
|
@@ -63,22 +76,24 @@ pip install sentence-transformers
|
|
63 |
```
|
64 |
|
65 |
```python
|
66 |
-
# Requires
|
67 |
from sentence_transformers import CrossEncoder
|
68 |
|
69 |
-
model_name_or_path = 'Alibaba-NLP/gte-reranker-modernbert-base'
|
70 |
-
|
71 |
model = CrossEncoder(
|
72 |
-
|
73 |
automodel_args={"torch_dtype": "auto"},
|
74 |
-
trust_remote_code=True,
|
75 |
)
|
76 |
|
77 |
-
pairs = [
|
78 |
-
|
79 |
-
|
|
|
|
|
80 |
|
81 |
-
|
|
|
|
|
|
|
82 |
```
|
83 |
|
84 |
## Training Details
|
|
|
6 |
- answerdotai/ModernBERT-base
|
7 |
pipeline_tag: sentence-similarity
|
8 |
library_name: transformers
|
9 |
+
tags:
|
10 |
+
- sentence-transformers
|
11 |
---
|
12 |
|
13 |
# gte-reranker-modernbert-base
|
|
|
34 |
|
35 |
## Usage
|
36 |
|
37 |
+
> [!TIP]
|
38 |
+
> For `transformers` and `sentence-transformers`, if your GPU supports it, the efficient Flash Attention 2 will be used automatically if you have `flash_attn` installed. It is not mandatory.
|
39 |
+
>
|
40 |
+
> ```bash
|
41 |
+
> pip install flash_attn
|
42 |
+
> ```
|
43 |
+
|
44 |
+
Use with `transformers`
|
45 |
```python
|
46 |
+
# Requires transformers>=4.48.0
|
47 |
import torch
|
48 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
49 |
|
50 |
+
model_name_or_path = "Alibaba-NLP/gte-reranker-modernbert-base"
|
51 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
52 |
model = AutoModelForSequenceClassification.from_pretrained(
|
53 |
+
model_name_or_path,
|
54 |
+
torch_dtype=torch.float16,
|
55 |
)
|
56 |
model.eval()
|
57 |
|
58 |
+
pairs = [
|
59 |
+
["what is the capital of China?", "Beijing"],
|
60 |
+
["how to implement quick sort in python?", "Introduction of quick sort"],
|
61 |
+
["how to implement quick sort in python?", "The weather is nice today"],
|
62 |
+
]
|
63 |
|
64 |
with torch.no_grad():
|
65 |
inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
|
66 |
scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
|
67 |
print(scores)
|
68 |
|
69 |
+
# tensor([ 2.1387, 2.4609, -1.6729])
|
70 |
```
|
71 |
Use with `sentence-transformers`:
|
72 |
|
|
|
76 |
```
|
77 |
|
78 |
```python
|
79 |
+
# Requires transformers>=4.48.0
|
80 |
from sentence_transformers import CrossEncoder
|
81 |
|
|
|
|
|
82 |
model = CrossEncoder(
|
83 |
+
"Alibaba-NLP/gte-reranker-modernbert-base",
|
84 |
automodel_args={"torch_dtype": "auto"},
|
|
|
85 |
)
|
86 |
|
87 |
+
pairs = [
|
88 |
+
["what is the capital of China?", "Beijing"],
|
89 |
+
["how to implement quick sort in python?","Introduction of quick sort"],
|
90 |
+
["how to implement quick sort in python?", "The weather is nice today"],
|
91 |
+
]
|
92 |
|
93 |
+
scores = model.predict(pairs)
|
94 |
+
print(scores)
|
95 |
+
# [0.8945664 0.9213594 0.15742092]
|
96 |
+
# NOTE: Sentence Transformers calls Softmax over the outputs by default, hence the scores are in [0, 1] range.
|
97 |
```
|
98 |
|
99 |
## Training Details
|
config.json
CHANGED
@@ -42,7 +42,6 @@
|
|
42 |
"num_hidden_layers": 22,
|
43 |
"pad_token_id": 50283,
|
44 |
"position_embedding_type": "absolute",
|
45 |
-
"reference_compile": true,
|
46 |
"sep_token_id": 50282,
|
47 |
"sparse_pred_ignore_index": -100,
|
48 |
"sparse_prediction": false,
|
|
|
42 |
"num_hidden_layers": 22,
|
43 |
"pad_token_id": 50283,
|
44 |
"position_embedding_type": "absolute",
|
|
|
45 |
"sep_token_id": 50282,
|
46 |
"sparse_pred_ignore_index": -100,
|
47 |
"sparse_prediction": false,
|
tokenizer_config.json
CHANGED
@@ -938,7 +938,7 @@
|
|
938 |
"input_ids",
|
939 |
"attention_mask"
|
940 |
],
|
941 |
-
"model_max_length":
|
942 |
"pad_to_multiple_of": null,
|
943 |
"pad_token": "[PAD]",
|
944 |
"pad_token_type_id": 0,
|
|
|
938 |
"input_ids",
|
939 |
"attention_mask"
|
940 |
],
|
941 |
+
"model_max_length": 8192,
|
942 |
"pad_to_multiple_of": null,
|
943 |
"pad_token": "[PAD]",
|
944 |
"pad_token_type_id": 0,
|