Spaces:
Building
on
T4
Building
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -339,12 +339,16 @@ if __name__ == '__main__':
|
|
339 |
uploaded_files = st.sidebar.file_uploader("Choose a file", accept_multiple_files=True, type=['docx', 'doc', 'odt', 'pptx', 'ppt', 'xlsx', 'csv', 'tsv', 'eml', 'msg', 'rtf', 'epub', 'html', 'xml', 'pdf', 'png', 'jpg', 'heic','txt'])
|
340 |
|
341 |
for uploaded_file in uploaded_files:
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
|
|
|
|
|
|
|
|
348 |
docs = [Document(elem.text) for elem in elements]
|
349 |
|
350 |
chunks, dense_embeddings, sparse_embeddings = chunk_documents(docs, dense_model, sparse_model)
|
|
|
339 |
uploaded_files = st.sidebar.file_uploader("Choose a file", accept_multiple_files=True, type=['docx', 'doc', 'odt', 'pptx', 'ppt', 'xlsx', 'csv', 'tsv', 'eml', 'msg', 'rtf', 'epub', 'html', 'xml', 'pdf', 'png', 'jpg', 'heic','txt'])
|
340 |
|
341 |
for uploaded_file in uploaded_files:
|
342 |
+
temp_dir = tempfile.mkdtemp()
|
343 |
+
file_path = os.path.join(temp_dir, uploaded_file.name)
|
344 |
+
with open(path, "wb") as f:
|
345 |
+
f.write(uploaded_file.getvalue())
|
346 |
+
elements = partition(filename=file_path,
|
347 |
+
strategy='hi_res',
|
348 |
+
skip_infer_table_types=['png', 'pdf', 'jpg', 'xls', 'xlsx', 'heic'],
|
349 |
+
hi_res_model_name='yolox',
|
350 |
+
include_page_breaks=True
|
351 |
+
)
|
352 |
docs = [Document(elem.text) for elem in elements]
|
353 |
|
354 |
chunks, dense_embeddings, sparse_embeddings = chunk_documents(docs, dense_model, sparse_model)
|