ashok2216 commited on
Commit
8d71f5d
·
verified ·
1 Parent(s): cf2d248

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -91,7 +91,6 @@
91
  import chromadb
92
  from chromadb.utils import embedding_functions
93
  from chromadb.config import Settings
94
- from sentence_transformers import SentenceTransformer
95
  from transformers import pipeline
96
  import streamlit as st
97
  import fitz # PyMuPDF for PDF parsing
@@ -115,8 +114,15 @@ def setup_chromadb():
115
  return client, collection
116
 
117
  # Clear the collection
118
- def clear_collection(collection):
119
- collection.delete(where={}) # Delete all entries in the collection
 
 
 
 
 
 
 
120
 
121
  def extract_text_from_pdf(uploaded_file):
122
  with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
@@ -158,7 +164,7 @@ def main():
158
  if uploaded_file:
159
  try:
160
  # Clear existing data
161
- clear_collection(collection)
162
  st.info("Existing data cleared from the database.")
163
 
164
  # Extract and add new data
@@ -186,3 +192,4 @@ def main():
186
  if __name__ == "__main__":
187
  main()
188
 
 
 
91
  import chromadb
92
  from chromadb.utils import embedding_functions
93
  from chromadb.config import Settings
 
94
  from transformers import pipeline
95
  import streamlit as st
96
  import fitz # PyMuPDF for PDF parsing
 
114
  return client, collection
115
 
116
  # Clear the collection
117
+ def clear_collection(client, collection_name):
118
+ # Delete the collection and recreate it
119
+ client.delete_collection(name=collection_name)
120
+ return client.get_or_create_collection(
121
+ name=collection_name,
122
+ embedding_function=chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction(
123
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
124
+ ),
125
+ )
126
 
127
  def extract_text_from_pdf(uploaded_file):
128
  with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
 
164
  if uploaded_file:
165
  try:
166
  # Clear existing data
167
+ collection = clear_collection(client, "pdf_data")
168
  st.info("Existing data cleared from the database.")
169
 
170
  # Extract and add new data
 
192
  if __name__ == "__main__":
193
  main()
194
 
195
+