Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -152,7 +152,7 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Auto
|
|
152 |
else:
|
153 |
return f'Internal Knowledge :\n\n{answer}' if 'knowledge_topic' in text else f'Documents Based :\n\n{answer_with_metadatas}'
|
154 |
|
155 |
-
def collect_files(directory, pattern):
|
156 |
array = []
|
157 |
|
158 |
for filename in os.listdir(directory):
|
@@ -177,12 +177,12 @@ def collect_files(directory, pattern):
|
|
177 |
array.extend(sparse_embeddings)
|
178 |
elif (filename.endswith('.npy')):
|
179 |
ids_list = np.load(os.path.join(directory, filename), allow_pickle=True).tolist()
|
180 |
-
insert_data(os.path.splitext(filename)[0], ids_list)
|
181 |
array.extend(ids_list)
|
182 |
|
183 |
return array
|
184 |
|
185 |
-
def insert_data(name, ids_array):
|
186 |
cursor.execute('INSERT INTO table_names (id) VALUES (?)', (name,))
|
187 |
for ids in ids_array:
|
188 |
cursor.execute('INSERT INTO table_ids (name, ids_value) VALUES (?, ?)', (name, ids))
|
@@ -343,7 +343,7 @@ def load_models_and_documents():
|
|
343 |
FOREIGN KEY(name) REFERENCES table_names(doc_name)
|
344 |
)
|
345 |
''')
|
346 |
-
insert_data(name, unique_ids)
|
347 |
|
348 |
np.save(ids_path, np.array(unique_ids), allow_pickle=True)
|
349 |
else:
|
@@ -368,7 +368,7 @@ def load_models_and_documents():
|
|
368 |
''')
|
369 |
|
370 |
unique_ids, payload_docs, dense_embeddings, sparse_embeddings = [
|
371 |
-
collect_files(embeddings_path, pattern) for pattern in patterns
|
372 |
]
|
373 |
|
374 |
st.write('Ingesting saved documents on disk into our Qdrant Vector Database...')
|
@@ -398,7 +398,7 @@ def load_models_and_documents():
|
|
398 |
time.sleep(5)
|
399 |
container.empty()
|
400 |
|
401 |
-
return client, collection_name, tokenizer, model, llm, dense_model, sparse_model, nlp
|
402 |
|
403 |
def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
|
404 |
text_splitter = SemanticChunker(
|
@@ -458,7 +458,7 @@ def on_change_documents_only():
|
|
458 |
if __name__ == '__main__':
|
459 |
st.set_page_config(page_title="Multipurpose AI Agent",layout="wide", initial_sidebar_state='collapsed')
|
460 |
|
461 |
-
client, collection_name, tokenizer, model, llm, dense_model, sparse_model, nlp = load_models_and_documents()
|
462 |
|
463 |
if 'menu_id' not in st.session_state:
|
464 |
st.session_state.menu_id = 'ChatBot'
|
@@ -847,7 +847,7 @@ if __name__ == '__main__':
|
|
847 |
combined_sparse_matrix = vstack(sparse_matrices)
|
848 |
save_npz(sparse_path, combined_sparse_matrix)
|
849 |
|
850 |
-
insert_data(base_name, ids)
|
851 |
np.save(ids_path, np.array(ids), allow_pickle=True)
|
852 |
|
853 |
st.toast('Document(s) Ingested !', icon='π')
|
|
|
152 |
else:
|
153 |
return f'Internal Knowledge :\n\n{answer}' if 'knowledge_topic' in text else f'Documents Based :\n\n{answer_with_metadatas}'
|
154 |
|
155 |
+
def collect_files(conn, cursor, directory, pattern):
|
156 |
array = []
|
157 |
|
158 |
for filename in os.listdir(directory):
|
|
|
177 |
array.extend(sparse_embeddings)
|
178 |
elif (filename.endswith('.npy')):
|
179 |
ids_list = np.load(os.path.join(directory, filename), allow_pickle=True).tolist()
|
180 |
+
insert_data(conn, cursor, os.path.splitext(filename)[0], ids_list)
|
181 |
array.extend(ids_list)
|
182 |
|
183 |
return array
|
184 |
|
185 |
+
def insert_data(conn, cursor, name, ids_array):
|
186 |
cursor.execute('INSERT INTO table_names (id) VALUES (?)', (name,))
|
187 |
for ids in ids_array:
|
188 |
cursor.execute('INSERT INTO table_ids (name, ids_value) VALUES (?, ?)', (name, ids))
|
|
|
343 |
FOREIGN KEY(name) REFERENCES table_names(doc_name)
|
344 |
)
|
345 |
''')
|
346 |
+
insert_data(conn, cursor, name, unique_ids)
|
347 |
|
348 |
np.save(ids_path, np.array(unique_ids), allow_pickle=True)
|
349 |
else:
|
|
|
368 |
''')
|
369 |
|
370 |
unique_ids, payload_docs, dense_embeddings, sparse_embeddings = [
|
371 |
+
collect_files(conn, cursor, embeddings_path, pattern) for pattern in patterns
|
372 |
]
|
373 |
|
374 |
st.write('Ingesting saved documents on disk into our Qdrant Vector Database...')
|
|
|
398 |
time.sleep(5)
|
399 |
container.empty()
|
400 |
|
401 |
+
return client, collection_name, tokenizer, model, llm, dense_model, sparse_model, nlp, conn, cursor
|
402 |
|
403 |
def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
|
404 |
text_splitter = SemanticChunker(
|
|
|
458 |
if __name__ == '__main__':
|
459 |
st.set_page_config(page_title="Multipurpose AI Agent",layout="wide", initial_sidebar_state='collapsed')
|
460 |
|
461 |
+
client, collection_name, tokenizer, model, llm, dense_model, sparse_model, nlp, conn, cursor = load_models_and_documents()
|
462 |
|
463 |
if 'menu_id' not in st.session_state:
|
464 |
st.session_state.menu_id = 'ChatBot'
|
|
|
847 |
combined_sparse_matrix = vstack(sparse_matrices)
|
848 |
save_npz(sparse_path, combined_sparse_matrix)
|
849 |
|
850 |
+
insert_data(conn, cursor, base_name, ids)
|
851 |
np.save(ids_path, np.array(ids), allow_pickle=True)
|
852 |
|
853 |
st.toast('Document(s) Ingested !', icon='π')
|