Spaces:

ashok2216
/

pdf-chatbot

Running

ashok2216 commited on Nov 19, 2024

Commit

8d71f5d

verified ·

1 Parent(s): cf2d248

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -91,7 +91,6 @@
 import chromadb
 from chromadb.utils import embedding_functions
 from chromadb.config import Settings
-from sentence_transformers import SentenceTransformer
 from transformers import pipeline
 import streamlit as st
 import fitz  # PyMuPDF for PDF parsing
@@ -115,8 +114,15 @@ def setup_chromadb():
     return client, collection
 # Clear the collection
-def clear_collection(collection):
-    collection.delete(where={})  # Delete all entries in the collection
 def extract_text_from_pdf(uploaded_file):
     with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
@@ -158,7 +164,7 @@ def main():
     if uploaded_file:
         try:
             # Clear existing data
-            clear_collection(collection)
             st.info("Existing data cleared from the database.")
             # Extract and add new data
@@ -186,3 +192,4 @@ def main():
 if __name__ == "__main__":
     main()

 import chromadb
 from chromadb.utils import embedding_functions
 from chromadb.config import Settings
 from transformers import pipeline
 import streamlit as st
 import fitz  # PyMuPDF for PDF parsing
     return client, collection
 # Clear the collection
+def clear_collection(client, collection_name):
+    # Delete the collection and recreate it
+    client.delete_collection(name=collection_name)
+    return client.get_or_create_collection(
+        name=collection_name,
+        embedding_function=chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction(
+            model_name="sentence-transformers/all-MiniLM-L6-v2"
+        ),
+    )
 def extract_text_from_pdf(uploaded_file):
     with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
     if uploaded_file:
         try:
             # Clear existing data
+            collection = clear_collection(client, "pdf_data")
             st.info("Existing data cleared from the database.")
             # Extract and add new data
 if __name__ == "__main__":
     main()