Update README.md
Browse files
README.md
CHANGED
@@ -9,5 +9,31 @@ This model focuses on retrieval tasks while also performing well on various task
|
|
9 |
|
10 |
##For retrieval tasks
|
11 |
```python
|
|
|
|
|
|
|
|
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
```
|
|
|
9 |
|
10 |
##For retrieval tasks
|
11 |
```python
|
12 |
+
from transformers import AutoTokenizer, AutoModel
|
13 |
+
import torch
|
14 |
+
# Sentences we want sentence embeddings for
|
15 |
+
sentences = ["this is a test sentence", "this is another test sentence"]
|
16 |
|
17 |
+
# Prefixing for retrieval tasks
|
18 |
+
instruction = "Represent this sentence for searching relevant passages: "
|
19 |
+
|
20 |
+
# Load model from HuggingFace Hub
|
21 |
+
tokenizer = AutoTokenizer.from_pretrained('Marqo/Slerp_merged_109M')
|
22 |
+
model = AutoModel.from_pretrained('Marqo/Slerp_merged_109M')
|
23 |
+
model.eval()
|
24 |
+
|
25 |
+
# Tokenize sentences
|
26 |
+
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
|
27 |
+
encoded_input_with_prefixing = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')
|
28 |
+
|
29 |
+
# Compute token embeddings
|
30 |
+
with torch.no_grad():
|
31 |
+
model_output = model(**encoded_input)
|
32 |
+
model_output_with_prefixing = model(**encoded_input_with_prefixing)
|
33 |
+
model_output_avg = (model_output + model_output_with_prefixing) / 2
|
34 |
+
# Perform pooling. In this case, cls pooling.
|
35 |
+
sentence_embeddings = model_output_avg[0][:, 0]
|
36 |
+
# normalize embeddings
|
37 |
+
sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
|
38 |
+
print("Sentence embeddings:", sentence_embeddings)
|
39 |
```
|