t0b1as91 commited on
Commit
bdc00f6
·
verified ·
1 Parent(s): c049e26

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +26 -0
README.md CHANGED
@@ -9,5 +9,31 @@ This model focuses on retrieval tasks while also performing well on various task
9
 
10
  ##For retrieval tasks
11
  ```python
 
 
 
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  ```
 
9
 
10
  ##For retrieval tasks
11
  ```python
12
+ from transformers import AutoTokenizer, AutoModel
13
+ import torch
14
+ # Sentences we want sentence embeddings for
15
+ sentences = ["this is a test sentence", "this is another test sentence"]
16
 
17
+ # Prefixing for retrieval tasks
18
+ instruction = "Represent this sentence for searching relevant passages: "
19
+
20
+ # Load model from HuggingFace Hub
21
+ tokenizer = AutoTokenizer.from_pretrained('Marqo/Slerp_merged_109M')
22
+ model = AutoModel.from_pretrained('Marqo/Slerp_merged_109M')
23
+ model.eval()
24
+
25
+ # Tokenize sentences
26
+ encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
27
+ encoded_input_with_prefixing = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')
28
+
29
+ # Compute token embeddings
30
+ with torch.no_grad():
31
+ model_output = model(**encoded_input)
32
+ model_output_with_prefixing = model(**encoded_input_with_prefixing)
33
+ model_output_avg = (model_output + model_output_with_prefixing) / 2
34
+ # Perform pooling. In this case, cls pooling.
35
+ sentence_embeddings = model_output_avg[0][:, 0]
36
+ # normalize embeddings
37
+ sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
38
+ print("Sentence embeddings:", sentence_embeddings)
39
  ```