devve1 commited on
Commit
5481353
1 Parent(s): 3fa2224

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -24
app.py CHANGED
@@ -27,7 +27,7 @@ from streamlit_navigation_bar import st_navbar
27
  from ppt_chunker import ppt_chunk
28
  from unstructured.cleaners.core import clean
29
  from unstructured.partition.pptx import partition_pptx
30
- from fastembed import SparseEmbedding, SparseTextEmbedding
31
  from unstructured.nlp.tokenize import download_nltk_packages
32
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
33
  from langchain_experimental.text_splitter import SemanticChunker
@@ -67,23 +67,6 @@ icon_to_types = {
67
  'Excel')
68
  }
69
 
70
- def make_points(texts: List[str], metadatas: List[dict], dense: List[List[float]], sparse: List[SparseEmbedding])-> List[PointStruct]:
71
- points = []
72
- for idx, (text, metadata, sparse_vector, dense_vector) in enumerate(zip(texts, metadatas, sparse, dense)):
73
- sparse_vec = SparseVector(indices=sparse_vector.indices.tolist(), values=sparse_vector.values.tolist())
74
- point = PointStruct(
75
- id=idx,
76
- vector={
77
- "text-sparse": sparse_vec,
78
- "text-dense": dense_vector,
79
- },
80
- payload={
81
- "text": text,
82
- "metadata": metadata
83
- }
84
- )
85
- points.append(point)
86
- return points
87
 
88
  def transform_query(query: str) -> str:
89
  """ For retrieval, add the prompt for query (not for documents).
@@ -325,14 +308,18 @@ def load_models_and_documents():
325
  sparse_embeddings.append(embedding)
326
 
327
  st.write('Ingesting saved documents on disk into our Qdrant Vector Database...')
328
-
329
  client.upsert(
330
  collection_name,
331
- make_points(
332
- docs_texts,
333
- docs_metadatas,
334
- dense_embeddings,
335
- sparse_embeddings
 
 
 
 
336
  )
337
  )
338
 
 
27
  from ppt_chunker import ppt_chunk
28
  from unstructured.cleaners.core import clean
29
  from unstructured.partition.pptx import partition_pptx
30
+ from fastembed import SparseTextEmbedding
31
  from unstructured.nlp.tokenize import download_nltk_packages
32
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
33
  from langchain_experimental.text_splitter import SemanticChunker
 
67
  'Excel')
68
  }
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  def transform_query(query: str) -> str:
72
  """ For retrieval, add the prompt for query (not for documents).
 
308
  sparse_embeddings.append(embedding)
309
 
310
  st.write('Ingesting saved documents on disk into our Qdrant Vector Database...')
311
+
312
  client.upsert(
313
  collection_name,
314
+ points=Batch(
315
+ payload={
316
+ 'texts': docs_texts,
317
+ 'metadatas': docs_metadatas
318
+ }
319
+ vectors={
320
+ 'text-dense': dense_embeddings,
321
+ 'text-sparse': sparse_embeddings
322
+ }
323
  )
324
  )
325