devve1 commited on
Commit
c82e5da
1 Parent(s): d3c25de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -10,6 +10,7 @@ import streamlit as st
10
  from io import BytesIO
11
  from numpy import ndarray
12
  from llama_cpp import Llama
 
13
  from langchain_core.documents.base import Document
14
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
15
  from qdrant_client import QdrantClient, models
@@ -406,8 +407,14 @@ if __name__ == '__main__':
406
  hi_res_model_name='yolox',
407
  include_page_breaks=True
408
  )
409
- name = uploaded_file.getvalue().decode('utf-8')
410
- print(name)
 
 
 
 
 
 
411
 
412
  texts, metadatas = [], []
413
  for elem in elements:
 
10
  from io import BytesIO
11
  from numpy import ndarray
12
  from llama_cpp import Llama
13
+ from charset_normalizer import from_bytes
14
  from langchain_core.documents.base import Document
15
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
16
  from qdrant_client import QdrantClient, models
 
407
  hi_res_model_name='yolox',
408
  include_page_breaks=True
409
  )
410
+ bytes_content = uploaded_file.getvalue()
411
+ encoding = str(
412
+ from_bytes(
413
+ bytes_content
414
+ ).best()
415
+ )
416
+ uploaded_file_name = bytes_content.decode(encoding)
417
+ print(uploaded_file_name)
418
 
419
  texts, metadatas = [], []
420
  for elem in elements: