devve1 commited on
Commit
942c7ba
1 Parent(s): c335616

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -339,12 +339,16 @@ if __name__ == '__main__':
339
  uploaded_files = st.sidebar.file_uploader("Choose a file", accept_multiple_files=True, type=['docx', 'doc', 'odt', 'pptx', 'ppt', 'xlsx', 'csv', 'tsv', 'eml', 'msg', 'rtf', 'epub', 'html', 'xml', 'pdf', 'png', 'jpg', 'heic','txt'])
340
 
341
  for uploaded_file in uploaded_files:
342
- elements = partition(file=uploaded_file,
343
- strategy='hi_res',
344
- skip_infer_table_types=['png', 'pdf', 'jpg', 'xls', 'xlsx', 'heic'],
345
- hi_res_model_name='yolox',
346
- include_page_breaks=True
347
- )
 
 
 
 
348
  docs = [Document(elem.text) for elem in elements]
349
 
350
  chunks, dense_embeddings, sparse_embeddings = chunk_documents(docs, dense_model, sparse_model)
 
339
  uploaded_files = st.sidebar.file_uploader("Choose a file", accept_multiple_files=True, type=['docx', 'doc', 'odt', 'pptx', 'ppt', 'xlsx', 'csv', 'tsv', 'eml', 'msg', 'rtf', 'epub', 'html', 'xml', 'pdf', 'png', 'jpg', 'heic','txt'])
340
 
341
  for uploaded_file in uploaded_files:
342
+ temp_dir = tempfile.mkdtemp()
343
+ file_path = os.path.join(temp_dir, uploaded_file.name)
344
+ with open(path, "wb") as f:
345
+ f.write(uploaded_file.getvalue())
346
+ elements = partition(filename=file_path,
347
+ strategy='hi_res',
348
+ skip_infer_table_types=['png', 'pdf', 'jpg', 'xls', 'xlsx', 'heic'],
349
+ hi_res_model_name='yolox',
350
+ include_page_breaks=True
351
+ )
352
  docs = [Document(elem.text) for elem in elements]
353
 
354
  chunks, dense_embeddings, sparse_embeddings = chunk_documents(docs, dense_model, sparse_model)