terapyon commited on
Commit
b9061bc
·
1 Parent(s): 59d39d4

added embedding module

Browse files
Files changed (1) hide show
  1. src/embedding.py +20 -0
src/embedding.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sentence_transformers import SentenceTransformer
3
+
4
+ MODEL_NAME = "cl-nagoya/ruri-large"
5
+ PREFIX_QUERY = "クエリ: " # "query: "
6
+ PASSAGE_QUERY = "文章: " # "passage: "
7
+
8
+ model = SentenceTransformer(MODEL_NAME)
9
+
10
+
11
+ def get_embeddings(texts: list[str], query=False, passage=False) -> np.ndarray:
12
+ if query:
13
+ texts = [PREFIX_QUERY + text for text in texts]
14
+ if passage:
15
+ texts = [PASSAGE_QUERY + text for text in texts]
16
+ # texts = [text[i : i + CHUNK_SIZE] for i in range(0, len(text), CHUNK_SIZE)]
17
+ embeddings = model.encode(texts)
18
+ # print(embeddings.shape)
19
+ # print(type(embeddings))
20
+ return embeddings