Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -91,7 +91,6 @@
|
|
91 |
import chromadb
|
92 |
from chromadb.utils import embedding_functions
|
93 |
from chromadb.config import Settings
|
94 |
-
from sentence_transformers import SentenceTransformer
|
95 |
from transformers import pipeline
|
96 |
import streamlit as st
|
97 |
import fitz # PyMuPDF for PDF parsing
|
@@ -115,8 +114,15 @@ def setup_chromadb():
|
|
115 |
return client, collection
|
116 |
|
117 |
# Clear the collection
|
118 |
-
def clear_collection(
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
def extract_text_from_pdf(uploaded_file):
|
122 |
with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
|
@@ -158,7 +164,7 @@ def main():
|
|
158 |
if uploaded_file:
|
159 |
try:
|
160 |
# Clear existing data
|
161 |
-
clear_collection(
|
162 |
st.info("Existing data cleared from the database.")
|
163 |
|
164 |
# Extract and add new data
|
@@ -186,3 +192,4 @@ def main():
|
|
186 |
if __name__ == "__main__":
|
187 |
main()
|
188 |
|
|
|
|
91 |
import chromadb
|
92 |
from chromadb.utils import embedding_functions
|
93 |
from chromadb.config import Settings
|
|
|
94 |
from transformers import pipeline
|
95 |
import streamlit as st
|
96 |
import fitz # PyMuPDF for PDF parsing
|
|
|
114 |
return client, collection
|
115 |
|
116 |
# Clear the collection
|
117 |
+
def clear_collection(client, collection_name):
|
118 |
+
# Delete the collection and recreate it
|
119 |
+
client.delete_collection(name=collection_name)
|
120 |
+
return client.get_or_create_collection(
|
121 |
+
name=collection_name,
|
122 |
+
embedding_function=chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction(
|
123 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
124 |
+
),
|
125 |
+
)
|
126 |
|
127 |
def extract_text_from_pdf(uploaded_file):
|
128 |
with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
|
|
|
164 |
if uploaded_file:
|
165 |
try:
|
166 |
# Clear existing data
|
167 |
+
collection = clear_collection(client, "pdf_data")
|
168 |
st.info("Existing data cleared from the database.")
|
169 |
|
170 |
# Extract and add new data
|
|
|
192 |
if __name__ == "__main__":
|
193 |
main()
|
194 |
|
195 |
+
|