devve1 commited on
Commit
4e5215c
β€’
1 Parent(s): 1b923e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -152,7 +152,7 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Auto
152
  else:
153
  return f'Internal Knowledge :\n\n{answer}' if 'knowledge_topic' in text else f'Documents Based :\n\n{answer_with_metadatas}'
154
 
155
- def collect_files(directory, pattern):
156
  array = []
157
 
158
  for filename in os.listdir(directory):
@@ -177,12 +177,12 @@ def collect_files(directory, pattern):
177
  array.extend(sparse_embeddings)
178
  elif (filename.endswith('.npy')):
179
  ids_list = np.load(os.path.join(directory, filename), allow_pickle=True).tolist()
180
- insert_data(os.path.splitext(filename)[0], ids_list)
181
  array.extend(ids_list)
182
 
183
  return array
184
 
185
- def insert_data(name, ids_array):
186
  cursor.execute('INSERT INTO table_names (id) VALUES (?)', (name,))
187
  for ids in ids_array:
188
  cursor.execute('INSERT INTO table_ids (name, ids_value) VALUES (?, ?)', (name, ids))
@@ -343,7 +343,7 @@ def load_models_and_documents():
343
  FOREIGN KEY(name) REFERENCES table_names(doc_name)
344
  )
345
  ''')
346
- insert_data(name, unique_ids)
347
 
348
  np.save(ids_path, np.array(unique_ids), allow_pickle=True)
349
  else:
@@ -368,7 +368,7 @@ def load_models_and_documents():
368
  ''')
369
 
370
  unique_ids, payload_docs, dense_embeddings, sparse_embeddings = [
371
- collect_files(embeddings_path, pattern) for pattern in patterns
372
  ]
373
 
374
  st.write('Ingesting saved documents on disk into our Qdrant Vector Database...')
@@ -398,7 +398,7 @@ def load_models_and_documents():
398
  time.sleep(5)
399
  container.empty()
400
 
401
- return client, collection_name, tokenizer, model, llm, dense_model, sparse_model, nlp
402
 
403
  def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
404
  text_splitter = SemanticChunker(
@@ -458,7 +458,7 @@ def on_change_documents_only():
458
  if __name__ == '__main__':
459
  st.set_page_config(page_title="Multipurpose AI Agent",layout="wide", initial_sidebar_state='collapsed')
460
 
461
- client, collection_name, tokenizer, model, llm, dense_model, sparse_model, nlp = load_models_and_documents()
462
 
463
  if 'menu_id' not in st.session_state:
464
  st.session_state.menu_id = 'ChatBot'
@@ -847,7 +847,7 @@ if __name__ == '__main__':
847
  combined_sparse_matrix = vstack(sparse_matrices)
848
  save_npz(sparse_path, combined_sparse_matrix)
849
 
850
- insert_data(base_name, ids)
851
  np.save(ids_path, np.array(ids), allow_pickle=True)
852
 
853
  st.toast('Document(s) Ingested !', icon='πŸŽ‰')
 
152
  else:
153
  return f'Internal Knowledge :\n\n{answer}' if 'knowledge_topic' in text else f'Documents Based :\n\n{answer_with_metadatas}'
154
 
155
+ def collect_files(conn, cursor, directory, pattern):
156
  array = []
157
 
158
  for filename in os.listdir(directory):
 
177
  array.extend(sparse_embeddings)
178
  elif (filename.endswith('.npy')):
179
  ids_list = np.load(os.path.join(directory, filename), allow_pickle=True).tolist()
180
+ insert_data(conn, cursor, os.path.splitext(filename)[0], ids_list)
181
  array.extend(ids_list)
182
 
183
  return array
184
 
185
+ def insert_data(conn, cursor, name, ids_array):
186
  cursor.execute('INSERT INTO table_names (id) VALUES (?)', (name,))
187
  for ids in ids_array:
188
  cursor.execute('INSERT INTO table_ids (name, ids_value) VALUES (?, ?)', (name, ids))
 
343
  FOREIGN KEY(name) REFERENCES table_names(doc_name)
344
  )
345
  ''')
346
+ insert_data(conn, cursor, name, unique_ids)
347
 
348
  np.save(ids_path, np.array(unique_ids), allow_pickle=True)
349
  else:
 
368
  ''')
369
 
370
  unique_ids, payload_docs, dense_embeddings, sparse_embeddings = [
371
+ collect_files(conn, cursor, embeddings_path, pattern) for pattern in patterns
372
  ]
373
 
374
  st.write('Ingesting saved documents on disk into our Qdrant Vector Database...')
 
398
  time.sleep(5)
399
  container.empty()
400
 
401
+ return client, collection_name, tokenizer, model, llm, dense_model, sparse_model, nlp, conn, cursor
402
 
403
  def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
404
  text_splitter = SemanticChunker(
 
458
  if __name__ == '__main__':
459
  st.set_page_config(page_title="Multipurpose AI Agent",layout="wide", initial_sidebar_state='collapsed')
460
 
461
+ client, collection_name, tokenizer, model, llm, dense_model, sparse_model, nlp, conn, cursor = load_models_and_documents()
462
 
463
  if 'menu_id' not in st.session_state:
464
  st.session_state.menu_id = 'ChatBot'
 
847
  combined_sparse_matrix = vstack(sparse_matrices)
848
  save_npz(sparse_path, combined_sparse_matrix)
849
 
850
+ insert_data(conn, cursor, base_name, ids)
851
  np.save(ids_path, np.array(ids), allow_pickle=True)
852
 
853
  st.toast('Document(s) Ingested !', icon='πŸŽ‰')