patch inference on CPU & Windows + Update README snippets (#2)

Browse files

- Remove reference_compile; set model max length to avoid warning (0e4036849927b9bac2cbc06f35f7cad173d32145)

Files changed (3) hide show

README.md +31 -16
config.json +0 -1
tokenizer_config.json +1 -1

README.md CHANGED Viewed

@@ -6,6 +6,8 @@ base_model:
 - answerdotai/ModernBERT-base
 pipeline_tag: sentence-similarity
 library_name: transformers
 ---
 # gte-reranker-modernbert-base
@@ -32,28 +34,39 @@ The `gte-modernbert` models demonstrates competitive performance in several text
 ## Usage
-Use with `Transformers`
 ```python
-# Requires transformers>=4.36.0
 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
-model_name_or_path = 'Alibaba-NLP/gte-reranker-modernbert-base'
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
 model = AutoModelForSequenceClassification.from_pretrained(
-    model_name_or_path, trust_remote_code=True,
-    torch_dtype=torch.float16
 )
 model.eval()
-pairs = [["what is the capital of China?", "Beijing"], ["how to implement quick sort in python?","Introduction of quick sort"], ["how to implement quick sort in python?", "The weather is nice today"]]
 with torch.no_grad():
     inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
     scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
     print(scores)
-# tensor([1.2315, 0.5923, 0.3041])
 ```
 Use with `sentence-transformers`:
@@ -63,22 +76,24 @@ pip install sentence-transformers
 ```
 ```python
-# Requires sentence_transformers>=2.7.0
 from sentence_transformers import CrossEncoder
-model_name_or_path = 'Alibaba-NLP/gte-reranker-modernbert-base'
 model = CrossEncoder(
-    model_name_or_path,
     automodel_args={"torch_dtype": "auto"},
-    trust_remote_code=True,
 )
-pairs = [["what is the capital of China?", "Beijing"], ["how to implement quick sort in python?","Introduction of quick sort"], ["how to implement quick sort in python?", "The weather is nice today"]]
-scores = model.predict(sentence_pairs, convert_to_tensor=True).tolist()
-print ("scores: ", scores)
 ```
 ## Training Details

 - answerdotai/ModernBERT-base
 pipeline_tag: sentence-similarity
 library_name: transformers
+tags:
+- sentence-transformers
 ---
 # gte-reranker-modernbert-base
 ## Usage
+> [!TIP]
+> For `transformers` and `sentence-transformers`, if your GPU supports it, the efficient Flash Attention 2 will be used automatically if you have `flash_attn` installed. It is not mandatory.
+>
+> ```bash
+> pip install flash_attn
+> ```
+Use with `transformers`
 ```python
+# Requires transformers>=4.48.0
 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
+model_name_or_path = "Alibaba-NLP/gte-reranker-modernbert-base"
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
 model = AutoModelForSequenceClassification.from_pretrained(
+    model_name_or_path,
+    torch_dtype=torch.float16,
 )
 model.eval()
+pairs = [
+    ["what is the capital of China?", "Beijing"],
+    ["how to implement quick sort in python?", "Introduction of quick sort"],
+    ["how to implement quick sort in python?", "The weather is nice today"],
+]
 with torch.no_grad():
     inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
     scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
     print(scores)
+# tensor([ 2.1387,  2.4609, -1.6729])
 ```
 Use with `sentence-transformers`:
 ```
 ```python
+# Requires transformers>=4.48.0
 from sentence_transformers import CrossEncoder
 model = CrossEncoder(
+    "Alibaba-NLP/gte-reranker-modernbert-base",
     automodel_args={"torch_dtype": "auto"},
 )
+pairs = [
+    ["what is the capital of China?", "Beijing"],
+    ["how to implement quick sort in python?","Introduction of quick sort"],
+    ["how to implement quick sort in python?", "The weather is nice today"],
+]
+scores = model.predict(pairs)
+print(scores)
+# [0.8945664  0.9213594  0.15742092]
+# NOTE: Sentence Transformers calls Softmax over the outputs by default, hence the scores are in [0, 1] range.
 ```
 ## Training Details

config.json CHANGED Viewed

@@ -42,7 +42,6 @@
   "num_hidden_layers": 22,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
-  "reference_compile": true,
   "sep_token_id": 50282,
   "sparse_pred_ignore_index": -100,
   "sparse_prediction": false,

   "num_hidden_layers": 22,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "sep_token_id": 50282,
   "sparse_pred_ignore_index": -100,
   "sparse_prediction": false,

tokenizer_config.json CHANGED Viewed

@@ -938,7 +938,7 @@
     "input_ids",
     "attention_mask"
   ],
-  "model_max_length": 1000000000000000019884624838656,
   "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
   "pad_token_type_id": 0,

     "input_ids",
     "attention_mask"
   ],
+  "model_max_length": 8192,
   "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
   "pad_token_type_id": 0,