Update app.py
Browse files
app.py
CHANGED
@@ -77,9 +77,7 @@ from qdrant_client.models import (
|
|
77 |
Batch,
|
78 |
Filter,
|
79 |
FieldCondition,
|
80 |
-
Datatype
|
81 |
-
BinaryQuantization,
|
82 |
-
BinaryQuantizationConfig
|
83 |
)
|
84 |
|
85 |
class Question(BaseModel):
|
@@ -185,13 +183,17 @@ def collect_files(directory, pattern):
|
|
185 |
|
186 |
for filename in os.listdir(directory):
|
187 |
if pattern in filename:
|
188 |
-
if filename.endswith('.msgpack'):
|
|
|
|
|
|
|
|
|
189 |
with open(os.path.join(directory, filename), "rb") as data_file_payload:
|
190 |
decompressed_payload = data_file_payload.read()
|
191 |
array.extend(msgpack.unpackb(decompressed_payload, raw=False))
|
192 |
elif filename.endswith('.npz') and (pattern == '_dense'):
|
193 |
array.extend(list(np.load(os.path.join(directory, filename)).values()))
|
194 |
-
elif filename.endswith('.npz') and (
|
195 |
sparse_embeddings = []
|
196 |
loaded_sparse_matrix = load_npz(os.path.join(directory, filename))
|
197 |
|
@@ -202,7 +204,21 @@ def collect_files(directory, pattern):
|
|
202 |
embedding = SparseVector(indices=indices, values=values)
|
203 |
sparse_embeddings.append(embedding)
|
204 |
array.extend(sparse_embeddings)
|
205 |
-
elif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
ids_list = np.load(os.path.join(directory, filename), allow_pickle=True).tolist()
|
207 |
array.extend(ids_list)
|
208 |
|
@@ -299,7 +315,7 @@ def load_models_and_documents():
|
|
299 |
embeddings_path = os.path.join(os.getenv('HF_HOME'), 'embeddings')
|
300 |
|
301 |
payload_path = os.path.join(embeddings_path, name + '_payload.msgpack')
|
302 |
-
payload_titles_path = os.path.join(embeddings_path, name + '_payload_titles.
|
303 |
dense_path = os.path.join(embeddings_path, name + '_dense.npz')
|
304 |
sparse_path = os.path.join(embeddings_path, name + '_sparse.npz')
|
305 |
sparse_titles_path = os.path.join(embeddings_path, name + '_sparse_titles.npz')
|
|
|
77 |
Batch,
|
78 |
Filter,
|
79 |
FieldCondition,
|
80 |
+
Datatype
|
|
|
|
|
81 |
)
|
82 |
|
83 |
class Question(BaseModel):
|
|
|
183 |
|
184 |
for filename in os.listdir(directory):
|
185 |
if pattern in filename:
|
186 |
+
if filename.endswith('.msgpack') and (pattern == '_payload'):
|
187 |
+
with open(os.path.join(directory, filename), "rb") as data_file_payload:
|
188 |
+
decompressed_payload = data_file_payload.read()
|
189 |
+
array.extend(msgpack.unpackb(decompressed_payload, raw=False))
|
190 |
+
elif filename.endswith('.msgpack') and (pattern == '_payload_titles'):
|
191 |
with open(os.path.join(directory, filename), "rb") as data_file_payload:
|
192 |
decompressed_payload = data_file_payload.read()
|
193 |
array.extend(msgpack.unpackb(decompressed_payload, raw=False))
|
194 |
elif filename.endswith('.npz') and (pattern == '_dense'):
|
195 |
array.extend(list(np.load(os.path.join(directory, filename)).values()))
|
196 |
+
elif filename.endswith('.npz') and (pattern == '_sparse'):
|
197 |
sparse_embeddings = []
|
198 |
loaded_sparse_matrix = load_npz(os.path.join(directory, filename))
|
199 |
|
|
|
204 |
embedding = SparseVector(indices=indices, values=values)
|
205 |
sparse_embeddings.append(embedding)
|
206 |
array.extend(sparse_embeddings)
|
207 |
+
elif filename.endswith('.npz') and (pattern == '_sparse_titles'):
|
208 |
+
sparse_embeddings = []
|
209 |
+
loaded_sparse_matrix = load_npz(os.path.join(directory, filename))
|
210 |
+
|
211 |
+
for i in range(loaded_sparse_matrix.shape[0]):
|
212 |
+
row = loaded_sparse_matrix.getrow(i)
|
213 |
+
values = row.data.tolist()
|
214 |
+
indices = row.indices.tolist()
|
215 |
+
embedding = SparseVector(indices=indices, values=values)
|
216 |
+
sparse_embeddings.append(embedding)
|
217 |
+
array.extend(sparse_embeddings)
|
218 |
+
elif filename.endswith('.npy') and (pattern == '_ids'):
|
219 |
+
ids_list = np.load(os.path.join(directory, filename), allow_pickle=True).tolist()
|
220 |
+
array.extend(ids_list)
|
221 |
+
elif filename.endswith('.npy') and (pattern == '_ids_titles'):
|
222 |
ids_list = np.load(os.path.join(directory, filename), allow_pickle=True).tolist()
|
223 |
array.extend(ids_list)
|
224 |
|
|
|
315 |
embeddings_path = os.path.join(os.getenv('HF_HOME'), 'embeddings')
|
316 |
|
317 |
payload_path = os.path.join(embeddings_path, name + '_payload.msgpack')
|
318 |
+
payload_titles_path = os.path.join(embeddings_path, name + '_payload_titles.msgpack')
|
319 |
dense_path = os.path.join(embeddings_path, name + '_dense.npz')
|
320 |
sparse_path = os.path.join(embeddings_path, name + '_sparse.npz')
|
321 |
sparse_titles_path = os.path.join(embeddings_path, name + '_sparse_titles.npz')
|