Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ import streamlit as st
|
|
10 |
from io import BytesIO
|
11 |
from numpy import ndarray
|
12 |
from llama_cpp import Llama
|
|
|
13 |
from langchain_core.documents.base import Document
|
14 |
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
|
15 |
from qdrant_client import QdrantClient, models
|
@@ -406,8 +407,14 @@ if __name__ == '__main__':
|
|
406 |
hi_res_model_name='yolox',
|
407 |
include_page_breaks=True
|
408 |
)
|
409 |
-
|
410 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
411 |
|
412 |
texts, metadatas = [], []
|
413 |
for elem in elements:
|
|
|
10 |
from io import BytesIO
|
11 |
from numpy import ndarray
|
12 |
from llama_cpp import Llama
|
13 |
+
from charset_normalizer import from_bytes
|
14 |
from langchain_core.documents.base import Document
|
15 |
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
|
16 |
from qdrant_client import QdrantClient, models
|
|
|
407 |
hi_res_model_name='yolox',
|
408 |
include_page_breaks=True
|
409 |
)
|
410 |
+
bytes_content = uploaded_file.getvalue()
|
411 |
+
encoding = str(
|
412 |
+
from_bytes(
|
413 |
+
bytes_content
|
414 |
+
).best()
|
415 |
+
)
|
416 |
+
uploaded_file_name = bytes_content.decode(encoding)
|
417 |
+
print(uploaded_file_name)
|
418 |
|
419 |
texts, metadatas = [], []
|
420 |
for elem in elements:
|