GPT-knowledge-management

Runtime error

App Files Files Community

mandrx commited on Feb 8, 2023

Commit

276d4fb

•

1 Parent(s): deb16c0

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -78

app.py CHANGED Viewed

@@ -181,92 +181,88 @@ Ask any question from the uploaded documents and Pinecone will retrieve the cont
 )
 # Sidebar
-st.sidebar.header("Options")
-st.sidebar.write("## File Upload:")
-data_files = st.sidebar.file_uploader(
-    "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
-)
-print("data_files",data_files)
-ALL_FILES = []
-META_DATA = []
-for data_file in data_files:
-    # Upload file
-    if data_file:
-        file_path = Path(FILE_UPLOAD_PATH) / f"{uuid.uuid4().hex}_{data_file.name}"
-        print("file_path",file_path)
-        print("data_file",data_file)
-        print("data_file.getbuffer()",data_file.getbuffer())
-        with open(file_path, "wb") as f:
-            f.write(data_file.getbuffer())
-        ALL_FILES.append(file_path)
-        st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; ✅ ")
-        META_DATA.append({"filename": data_file.name})
-# ALL_FILES = ["./wellous_products.txt"]
-# "wellous_products.txt"
-# text_file = 'wellous_products.txt'
-# file_path = "./" f"{text_file}"
-# print("file_path",file_path)
-# with open(file_path, "wb") as f:
-#     f.write(file_path.getbuffer())
-# ALL_FILES.append(file_path)
-# META_DATA.append({"filename": text_file})
 print("ALL_FILES",ALL_FILES)
 print("META_DATA",META_DATA)
-if len(ALL_FILES) > 0:
-    # document_store.update_embeddings(retriever, update_existing_embeddings=False)
-    docs = indexing_pipeline_with_classification.run(file_paths=ALL_FILES, meta=META_DATA)[
-        "documents"
-    ]
-    index_name = "qa_demo"
-    # we will use batches of 64
-    batch_size = 100
-    # docs  = docs['documents']
-    with st.spinner("🧠 &nbsp;&nbsp; Performing indexing of uplaoded documents... \n "):
-        for i in range(0, len(docs), batch_size):
-            # find end of batch
-            i_end = min(i + batch_size, len(docs))
-            # extract batch
-            batch = [doc.content for doc in docs[i:i_end]]
-            # generate embeddings for batch
-            try:
-                res = openai.Embedding.create(input=batch, engine=embed_model)
-            except Exception as e:
-                done = False
-                count = 0
-                while not done and count < 5:
-                    sleep(5)
-                    try:
-                        res = openai.Embedding.create(input=batch, engine=embed_model)
-                        done = True
-                    except:
-                        count += 1
-                        pass
-                if count >= 5:
-                    res = []
-                    st.error(f"🐞 File indexing failed{str(e)}")
-            if len(res) > 0:
-                embeds = [record["embedding"] for record in res["data"]]
-                # get metadata
-                meta = []
-                for doc in docs[i:i_end]:
-                    meta_dict = doc.meta
-                    meta_dict["text"] = doc.content
-                    meta.append(meta_dict)
-                # create unique IDs
-                ids = [doc.id for doc in docs[i:i_end]]
-                # add all to upsert list
-                to_upsert = list(zip(ids, embeds, meta))
-                # upsert/insert these records to pinecone
-                _ = index.upsert(vectors=to_upsert)
 # top_k_reader = st.sidebar.slider(
 #     "Max. number of answers",

 )
 # Sidebar
+# st.sidebar.header("Options")
+# st.sidebar.write("## File Upload:")
+# data_files = st.sidebar.file_uploader(
+#     "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
+# )
+# print("data_files",data_files)
+# ALL_FILES = []
+# META_DATA = []
+# for data_file in data_files:
+#     # Upload file
+#     if data_file:
+#         file_path = Path(FILE_UPLOAD_PATH) / f"{uuid.uuid4().hex}_{data_file.name}"
+#         print("file_path",file_path)
+#         print("data_file",data_file)
+#         print("data_file.getbuffer()",data_file.getbuffer())
+#         with open(file_path, "wb") as f:
+#             f.write(data_file.getbuffer())
+#         ALL_FILES.append(file_path)
+#         st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; ✅ ")
+#         META_DATA.append({"filename": data_file.name})
+text_file = 'wellous_products.txt'
+file_path = "./" f"{text_file}"
+print("file_path",file_path)
+ALL_FILES.append(file_path)
+META_DATA.append({"filename": text_file})
 print("ALL_FILES",ALL_FILES)
 print("META_DATA",META_DATA)
+# if len(ALL_FILES) > 0:
+# document_store.update_embeddings(retriever, update_existing_embeddings=False)
+docs = indexing_pipeline_with_classification.run(file_paths=ALL_FILES, meta=META_DATA)[
+    "documents"
+]
+index_name = "qa_demo"
+# we will use batches of 64
+batch_size = 100
+# docs  = docs['documents']
+with st.spinner("🧠 &nbsp;&nbsp; Performing indexing of uplaoded documents... \n "):
+    for i in range(0, len(docs), batch_size):
+        # find end of batch
+        i_end = min(i + batch_size, len(docs))
+        # extract batch
+        batch = [doc.content for doc in docs[i:i_end]]
+        # generate embeddings for batch
+        try:
+            res = openai.Embedding.create(input=batch, engine=embed_model)
+        except Exception as e:
+            done = False
+            count = 0
+            while not done and count < 5:
+                sleep(5)
+                try:
+                    res = openai.Embedding.create(input=batch, engine=embed_model)
+                    done = True
+                except:
+                    count += 1
+                    pass
+            if count >= 5:
+                res = []
+                st.error(f"🐞 File indexing failed{str(e)}")
+        if len(res) > 0:
+            embeds = [record["embedding"] for record in res["data"]]
+            # get metadata
+            meta = []
+            for doc in docs[i:i_end]:
+                meta_dict = doc.meta
+                meta_dict["text"] = doc.content
+                meta.append(meta_dict)
+            # create unique IDs
+            ids = [doc.id for doc in docs[i:i_end]]
+            # add all to upsert list
+            to_upsert = list(zip(ids, embeds, meta))
+            # upsert/insert these records to pinecone
+            _ = index.upsert(vectors=to_upsert)
 # top_k_reader = st.sidebar.slider(
 #     "Max. number of answers",