zyznull commited on
Commit
e2c1143
·
verified ·
1 Parent(s): 04b85dc

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -32
README.md CHANGED
@@ -34,49 +34,52 @@ The `gte-modernbert` models demonstrates competitive performance in several text
34
  ## Usage
35
 
36
  Use with `Transformers`
37
-
38
  ```python
39
  # Requires transformers>=4.36.0
 
 
40
 
41
- import torch.nn.functional as F
42
- from transformers import AutoModel, AutoTokenizer
43
-
44
- input_texts = [
45
- "what is the capital of China?",
46
- "how to implement quick sort in python?",
47
- "Beijing",
48
- "sorting algorithms"
49
- ]
50
-
51
- model_path = 'Alibaba-NLP/gte-modernbert-base'
52
- tokenizer = AutoTokenizer.from_pretrained(model_path)
53
- model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
54
-
55
- # Tokenize the input texts
56
- batch_dict = tokenizer(input_texts, max_length=8192, padding=True, truncation=True, return_tensors='pt')
57
-
58
- outputs = model(**batch_dict)
59
- embeddings = outputs.last_hidden_state[:, 0]
60
-
61
- # (Optionally) normalize embeddings
62
- embeddings = F.normalize(embeddings, p=2, dim=1)
63
- scores = (embeddings[:1] @ embeddings[1:].T) * 100
64
- print(scores.tolist())
65
- ```
66
 
 
 
 
 
 
 
 
67
  Use with `sentence-transformers`:
68
 
 
 
 
 
 
69
  ```python
70
  # Requires sentence_transformers>=2.7.0
 
 
 
 
 
 
 
 
 
71
 
72
- from sentence_transformers import SentenceTransformer
73
- from sentence_transformers.util import cos_sim
74
 
75
- sentences = ['That is a happy person', 'That is a very happy person']
76
 
77
- model = SentenceTransformer('Alibaba-NLP/gte-modernbert-base', trust_remote_code=True)
78
- embeddings = model.encode(sentences)
79
- print(cos_sim(embeddings[0], embeddings[1]))
80
  ```
81
 
82
  ## Training Details
 
34
  ## Usage
35
 
36
  Use with `Transformers`
 
37
  ```python
38
  # Requires transformers>=4.36.0
39
+ import torch
40
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
41
 
42
+ model_name_or_path = 'Alibaba-NLP/gte-reranker-modernbert-base'
43
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
44
+ model = AutoModelForSequenceClassification.from_pretrained(
45
+ model_name_or_path, trust_remote_code=True,
46
+ torch_dtype=torch.float16
47
+ )
48
+ model.eval()
49
+
50
+ pairs = [["what is the capital of China?", "Beijing"], ["how to implement quick sort in python?","Introduction of quick sort"], ["how to implement quick sort in python?", "The weather is nice today"]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ with torch.no_grad():
53
+ inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
54
+ scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
55
+ print(scores)
56
+
57
+ # tensor([1.2315, 0.5923, 0.3041])
58
+ ```
59
  Use with `sentence-transformers`:
60
 
61
+ Before you start, install the sentence-transformers libraries:
62
+ ```
63
+ pip install sentence-transformers
64
+ ```
65
+
66
  ```python
67
  # Requires sentence_transformers>=2.7.0
68
+ from sentence_transformers import CrossEncoder
69
+
70
+ model_name_or_path = 'Alibaba-NLP/gte-reranker-modernbert-base'
71
+
72
+ model = CrossEncoder(
73
+ model_name_or_path,
74
+ automodel_args={"torch_dtype": "auto"},
75
+ trust_remote_code=True,
76
+ )
77
 
78
+ pairs = [["what is the capital of China?", "Beijing"], ["how to implement quick sort in python?","Introduction of quick sort"], ["how to implement quick sort in python?", "The weather is nice today"]]
 
79
 
80
+ scores = model.predict(sentence_pairs, convert_to_tensor=True).tolist()
81
 
82
+ print ("scores: ", scores)
 
 
83
  ```
84
 
85
  ## Training Details