freQuensy23 commited on
Commit
1370a68
1 Parent(s): c9c4854
Files changed (2) hide show
  1. app.py +11 -9
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,25 +1,26 @@
 
 
 
1
  import translator
2
  from langchain_community.document_loaders import PyMuPDFLoader
3
  from langchain.text_splitter import SentenceTransformersTokenTextSplitter
4
- from langchain_community.embeddings import SentenceTransformerEmbeddings
5
  from langchain_community.retrievers import BM25Retriever
6
  from langchain_community.vectorstores.utils import DistanceStrategy
7
- from langchain_community.vectorstores import FAISS
8
  import gradio as gr
9
  import re
10
 
11
  print('All imports are successful')
12
 
13
  model = "msmarco-distilbert-base-tas-b"
14
- try:
15
- embeddings = SentenceTransformerEmbeddings(model_name=model)
16
- except:
17
- embeddings = SentenceTransformerEmbeddings(model_name=model)
18
  prev_files = None
19
  retriever = None
20
 
21
 
22
- def handle_files_and_query(query, files, chunk_overlap=50, token_per_chunk=256, bm_25_answers=200, translate_to_ru=False):
 
23
  results = ""
24
  global prev_files, retriever
25
  if not (isinstance(files, str) or isinstance(files[0], str)):
@@ -67,11 +68,12 @@ interface = gr.Interface(
67
  gr.Slider(minimum=1, maximum=100, value=50, label="Chunk Overlap"),
68
  gr.Slider(minimum=64, maximum=512, value=256, label="Tokens Per Chunk (чем больше - тем бОльшие куски книги "
69
  "сможем находить)"),
70
- gr.Slider(minimum=1, maximum=1000, value=200, label="BM25 Answers (чем больше - тем больше будем учитывать неявные смысловые сравнения слов)"),
 
71
  gr.Checkbox(label="Translate to Russian", value=False),
72
  ],
73
  outputs="text",
74
  title="Similarity Search for eksmo books"
75
  )
76
 
77
- interface.launch(share=True)
 
1
+ from dotenv import load_dotenv
2
+
3
+ load_dotenv()
4
  import translator
5
  from langchain_community.document_loaders import PyMuPDFLoader
6
  from langchain.text_splitter import SentenceTransformersTokenTextSplitter
7
+ from langchain_community.embeddings import OpenAIEmbeddings
8
  from langchain_community.retrievers import BM25Retriever
9
  from langchain_community.vectorstores.utils import DistanceStrategy
10
+ from langchain_community.vectorstores import FAISS
11
  import gradio as gr
12
  import re
13
 
14
  print('All imports are successful')
15
 
16
  model = "msmarco-distilbert-base-tas-b"
17
+ embeddings = OpenAIEmbeddings()
 
 
 
18
  prev_files = None
19
  retriever = None
20
 
21
 
22
+ def handle_files_and_query(query, files, chunk_overlap=50, token_per_chunk=256, bm_25_answers=200,
23
+ translate_to_ru=False):
24
  results = ""
25
  global prev_files, retriever
26
  if not (isinstance(files, str) or isinstance(files[0], str)):
 
68
  gr.Slider(minimum=1, maximum=100, value=50, label="Chunk Overlap"),
69
  gr.Slider(minimum=64, maximum=512, value=256, label="Tokens Per Chunk (чем больше - тем бОльшие куски книги "
70
  "сможем находить)"),
71
+ gr.Slider(minimum=1, maximum=1000, value=200,
72
+ label="BM25 Answers (чем больше - тем больше будем учитывать неявные смысловые сравнения слов)"),
73
  gr.Checkbox(label="Translate to Russian", value=False),
74
  ],
75
  outputs="text",
76
  title="Similarity Search for eksmo books"
77
  )
78
 
79
+ interface.queue(P=1).launch(share=True)
requirements.txt CHANGED
@@ -164,3 +164,4 @@ xlrd==2.0.1
164
  XlsxWriter==3.1.9
165
  yarl==1.9.2
166
  zipp==3.15.0
 
 
164
  XlsxWriter==3.1.9
165
  yarl==1.9.2
166
  zipp==3.15.0
167
+ python-dotenv~=1.0.1