mandrx commited on
Commit
276d4fb
โ€ข
1 Parent(s): deb16c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -78
app.py CHANGED
@@ -181,92 +181,88 @@ Ask any question from the uploaded documents and Pinecone will retrieve the cont
181
  )
182
 
183
  # Sidebar
184
- st.sidebar.header("Options")
185
- st.sidebar.write("## File Upload:")
186
- data_files = st.sidebar.file_uploader(
187
- "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
188
- )
189
 
190
- print("data_files",data_files)
191
- ALL_FILES = []
192
- META_DATA = []
193
- for data_file in data_files:
194
- # Upload file
195
- if data_file:
196
- file_path = Path(FILE_UPLOAD_PATH) / f"{uuid.uuid4().hex}_{data_file.name}"
197
 
198
- print("file_path",file_path)
199
- print("data_file",data_file)
200
- print("data_file.getbuffer()",data_file.getbuffer())
201
 
202
- with open(file_path, "wb") as f:
203
- f.write(data_file.getbuffer())
204
- ALL_FILES.append(file_path)
205
- st.sidebar.write(str(data_file.name) + "    โœ… ")
206
- META_DATA.append({"filename": data_file.name})
207
- # ALL_FILES = ["./wellous_products.txt"]
208
- # "wellous_products.txt"
209
- # text_file = 'wellous_products.txt'
210
- # file_path = "./" f"{text_file}"
211
- # print("file_path",file_path)
212
-
213
- # with open(file_path, "wb") as f:
214
- # f.write(file_path.getbuffer())
215
 
216
- # ALL_FILES.append(file_path)
217
- # META_DATA.append({"filename": text_file})
218
 
219
  print("ALL_FILES",ALL_FILES)
220
  print("META_DATA",META_DATA)
221
 
222
- if len(ALL_FILES) > 0:
223
- # document_store.update_embeddings(retriever, update_existing_embeddings=False)
224
- docs = indexing_pipeline_with_classification.run(file_paths=ALL_FILES, meta=META_DATA)[
225
- "documents"
226
- ]
227
- index_name = "qa_demo"
228
- # we will use batches of 64
229
- batch_size = 100
230
- # docs = docs['documents']
231
- with st.spinner("๐Ÿง     Performing indexing of uplaoded documents... \n "):
232
- for i in range(0, len(docs), batch_size):
233
- # find end of batch
234
- i_end = min(i + batch_size, len(docs))
235
- # extract batch
236
- batch = [doc.content for doc in docs[i:i_end]]
237
- # generate embeddings for batch
238
- try:
239
- res = openai.Embedding.create(input=batch, engine=embed_model)
240
- except Exception as e:
241
- done = False
242
- count = 0
243
- while not done and count < 5:
244
- sleep(5)
245
- try:
246
- res = openai.Embedding.create(input=batch, engine=embed_model)
247
- done = True
248
- except:
249
- count += 1
250
-
251
- pass
252
- if count >= 5:
253
- res = []
254
- st.error(f"๐Ÿž File indexing failed{str(e)}")
255
-
256
- if len(res) > 0:
257
- embeds = [record["embedding"] for record in res["data"]]
258
- # get metadata
259
- meta = []
260
- for doc in docs[i:i_end]:
261
- meta_dict = doc.meta
262
- meta_dict["text"] = doc.content
263
- meta.append(meta_dict)
264
- # create unique IDs
265
- ids = [doc.id for doc in docs[i:i_end]]
266
- # add all to upsert list
267
- to_upsert = list(zip(ids, embeds, meta))
268
- # upsert/insert these records to pinecone
269
- _ = index.upsert(vectors=to_upsert)
270
 
271
  # top_k_reader = st.sidebar.slider(
272
  # "Max. number of answers",
 
181
  )
182
 
183
  # Sidebar
184
+ # st.sidebar.header("Options")
185
+ # st.sidebar.write("## File Upload:")
186
+ # data_files = st.sidebar.file_uploader(
187
+ # "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
188
+ # )
189
 
190
+ # print("data_files",data_files)
191
+ # ALL_FILES = []
192
+ # META_DATA = []
193
+ # for data_file in data_files:
194
+ # # Upload file
195
+ # if data_file:
196
+ # file_path = Path(FILE_UPLOAD_PATH) / f"{uuid.uuid4().hex}_{data_file.name}"
197
 
198
+ # print("file_path",file_path)
199
+ # print("data_file",data_file)
200
+ # print("data_file.getbuffer()",data_file.getbuffer())
201
 
202
+ # with open(file_path, "wb") as f:
203
+ # f.write(data_file.getbuffer())
204
+ # ALL_FILES.append(file_path)
205
+ # st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; โœ… ")
206
+ # META_DATA.append({"filename": data_file.name})
207
+ text_file = 'wellous_products.txt'
208
+
209
+ file_path = "./" f"{text_file}"
210
+ print("file_path",file_path)
 
 
 
 
211
 
212
+ ALL_FILES.append(file_path)
213
+ META_DATA.append({"filename": text_file})
214
 
215
  print("ALL_FILES",ALL_FILES)
216
  print("META_DATA",META_DATA)
217
 
218
+ # if len(ALL_FILES) > 0:
219
+ # document_store.update_embeddings(retriever, update_existing_embeddings=False)
220
+ docs = indexing_pipeline_with_classification.run(file_paths=ALL_FILES, meta=META_DATA)[
221
+ "documents"
222
+ ]
223
+ index_name = "qa_demo"
224
+ # we will use batches of 64
225
+ batch_size = 100
226
+ # docs = docs['documents']
227
+ with st.spinner("๐Ÿง  &nbsp;&nbsp; Performing indexing of uplaoded documents... \n "):
228
+ for i in range(0, len(docs), batch_size):
229
+ # find end of batch
230
+ i_end = min(i + batch_size, len(docs))
231
+ # extract batch
232
+ batch = [doc.content for doc in docs[i:i_end]]
233
+ # generate embeddings for batch
234
+ try:
235
+ res = openai.Embedding.create(input=batch, engine=embed_model)
236
+ except Exception as e:
237
+ done = False
238
+ count = 0
239
+ while not done and count < 5:
240
+ sleep(5)
241
+ try:
242
+ res = openai.Embedding.create(input=batch, engine=embed_model)
243
+ done = True
244
+ except:
245
+ count += 1
246
+
247
+ pass
248
+ if count >= 5:
249
+ res = []
250
+ st.error(f"๐Ÿž File indexing failed{str(e)}")
251
+
252
+ if len(res) > 0:
253
+ embeds = [record["embedding"] for record in res["data"]]
254
+ # get metadata
255
+ meta = []
256
+ for doc in docs[i:i_end]:
257
+ meta_dict = doc.meta
258
+ meta_dict["text"] = doc.content
259
+ meta.append(meta_dict)
260
+ # create unique IDs
261
+ ids = [doc.id for doc in docs[i:i_end]]
262
+ # add all to upsert list
263
+ to_upsert = list(zip(ids, embeds, meta))
264
+ # upsert/insert these records to pinecone
265
+ _ = index.upsert(vectors=to_upsert)
266
 
267
  # top_k_reader = st.sidebar.slider(
268
  # "Max. number of answers",