devve1 commited on
Commit
90a4681
1 Parent(s): e15ef1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -7
app.py CHANGED
@@ -77,9 +77,7 @@ from qdrant_client.models import (
77
  Batch,
78
  Filter,
79
  FieldCondition,
80
- Datatype,
81
- BinaryQuantization,
82
- BinaryQuantizationConfig
83
  )
84
 
85
  class Question(BaseModel):
@@ -185,13 +183,17 @@ def collect_files(directory, pattern):
185
 
186
  for filename in os.listdir(directory):
187
  if pattern in filename:
188
- if filename.endswith('.msgpack'):
 
 
 
 
189
  with open(os.path.join(directory, filename), "rb") as data_file_payload:
190
  decompressed_payload = data_file_payload.read()
191
  array.extend(msgpack.unpackb(decompressed_payload, raw=False))
192
  elif filename.endswith('.npz') and (pattern == '_dense'):
193
  array.extend(list(np.load(os.path.join(directory, filename)).values()))
194
- elif filename.endswith('.npz') and ((pattern == '_sparse') or (pattern == '_sparse_titles')):
195
  sparse_embeddings = []
196
  loaded_sparse_matrix = load_npz(os.path.join(directory, filename))
197
 
@@ -202,7 +204,21 @@ def collect_files(directory, pattern):
202
  embedding = SparseVector(indices=indices, values=values)
203
  sparse_embeddings.append(embedding)
204
  array.extend(sparse_embeddings)
205
- elif (filename.endswith('.npy')):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  ids_list = np.load(os.path.join(directory, filename), allow_pickle=True).tolist()
207
  array.extend(ids_list)
208
 
@@ -299,7 +315,7 @@ def load_models_and_documents():
299
  embeddings_path = os.path.join(os.getenv('HF_HOME'), 'embeddings')
300
 
301
  payload_path = os.path.join(embeddings_path, name + '_payload.msgpack')
302
- payload_titles_path = os.path.join(embeddings_path, name + '_payload_titles.npz')
303
  dense_path = os.path.join(embeddings_path, name + '_dense.npz')
304
  sparse_path = os.path.join(embeddings_path, name + '_sparse.npz')
305
  sparse_titles_path = os.path.join(embeddings_path, name + '_sparse_titles.npz')
 
77
  Batch,
78
  Filter,
79
  FieldCondition,
80
+ Datatype
 
 
81
  )
82
 
83
  class Question(BaseModel):
 
183
 
184
  for filename in os.listdir(directory):
185
  if pattern in filename:
186
+ if filename.endswith('.msgpack') and (pattern == '_payload'):
187
+ with open(os.path.join(directory, filename), "rb") as data_file_payload:
188
+ decompressed_payload = data_file_payload.read()
189
+ array.extend(msgpack.unpackb(decompressed_payload, raw=False))
190
+ elif filename.endswith('.msgpack') and (pattern == '_payload_titles'):
191
  with open(os.path.join(directory, filename), "rb") as data_file_payload:
192
  decompressed_payload = data_file_payload.read()
193
  array.extend(msgpack.unpackb(decompressed_payload, raw=False))
194
  elif filename.endswith('.npz') and (pattern == '_dense'):
195
  array.extend(list(np.load(os.path.join(directory, filename)).values()))
196
+ elif filename.endswith('.npz') and (pattern == '_sparse'):
197
  sparse_embeddings = []
198
  loaded_sparse_matrix = load_npz(os.path.join(directory, filename))
199
 
 
204
  embedding = SparseVector(indices=indices, values=values)
205
  sparse_embeddings.append(embedding)
206
  array.extend(sparse_embeddings)
207
+ elif filename.endswith('.npz') and (pattern == '_sparse_titles'):
208
+ sparse_embeddings = []
209
+ loaded_sparse_matrix = load_npz(os.path.join(directory, filename))
210
+
211
+ for i in range(loaded_sparse_matrix.shape[0]):
212
+ row = loaded_sparse_matrix.getrow(i)
213
+ values = row.data.tolist()
214
+ indices = row.indices.tolist()
215
+ embedding = SparseVector(indices=indices, values=values)
216
+ sparse_embeddings.append(embedding)
217
+ array.extend(sparse_embeddings)
218
+ elif filename.endswith('.npy') and (pattern == '_ids'):
219
+ ids_list = np.load(os.path.join(directory, filename), allow_pickle=True).tolist()
220
+ array.extend(ids_list)
221
+ elif filename.endswith('.npy') and (pattern == '_ids_titles'):
222
  ids_list = np.load(os.path.join(directory, filename), allow_pickle=True).tolist()
223
  array.extend(ids_list)
224
 
 
315
  embeddings_path = os.path.join(os.getenv('HF_HOME'), 'embeddings')
316
 
317
  payload_path = os.path.join(embeddings_path, name + '_payload.msgpack')
318
+ payload_titles_path = os.path.join(embeddings_path, name + '_payload_titles.msgpack')
319
  dense_path = os.path.join(embeddings_path, name + '_dense.npz')
320
  sparse_path = os.path.join(embeddings_path, name + '_sparse.npz')
321
  sparse_titles_path = os.path.join(embeddings_path, name + '_sparse_titles.npz')