devve1 commited on
Commit
caa9d03
1 Parent(s): 227cdd7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -11
app.py CHANGED
@@ -10,7 +10,6 @@ import streamlit as st
10
  from io import BytesIO
11
  from numpy import ndarray
12
  from llama_cpp import Llama
13
- from charset_normalizer import from_bytes
14
  from langchain_core.documents.base import Document
15
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
16
  from qdrant_client import QdrantClient, models
@@ -407,20 +406,12 @@ if __name__ == '__main__':
407
  hi_res_model_name='yolox',
408
  include_page_breaks=True
409
  )
410
- bytes_content = uploaded_file.getvalue()
411
- encoding = str(
412
- from_bytes(
413
- bytes_content
414
- ).best()
415
- )
416
- print(f'Encoding: {encoding}')
417
- uploaded_file_name = bytes_content.decode(encoding)
418
- print(uploaded_file_name)
419
 
420
  texts, metadatas = [], []
421
  for elem in elements:
422
  texts.append(elem.text)
423
- metadatas.append(elem.metadata.to_dict())
424
 
425
  texts, metadatas, dense_embeddings, sparse_embeddings = chunk_documents(texts, metadatas, dense_model, sparse_model)
426
 
 
10
  from io import BytesIO
11
  from numpy import ndarray
12
  from llama_cpp import Llama
 
13
  from langchain_core.documents.base import Document
14
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
15
  from qdrant_client import QdrantClient, models
 
406
  hi_res_model_name='yolox',
407
  include_page_breaks=True
408
  )
409
+ metadata_dict = {"source": uploaded_file.name}
 
 
 
 
 
 
 
 
410
 
411
  texts, metadatas = [], []
412
  for elem in elements:
413
  texts.append(elem.text)
414
+ metadatas.append(metadata_dict)
415
 
416
  texts, metadatas, dense_embeddings, sparse_embeddings = chunk_documents(texts, metadatas, dense_model, sparse_model)
417