Spaces:
Runtime error
Runtime error
freQuensy23
commited on
Commit
•
1370a68
1
Parent(s):
c9c4854
[FIX]
Browse files- app.py +11 -9
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,25 +1,26 @@
|
|
|
|
|
|
|
|
1 |
import translator
|
2 |
from langchain_community.document_loaders import PyMuPDFLoader
|
3 |
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
|
4 |
-
from langchain_community.embeddings import
|
5 |
from langchain_community.retrievers import BM25Retriever
|
6 |
from langchain_community.vectorstores.utils import DistanceStrategy
|
7 |
-
from
|
8 |
import gradio as gr
|
9 |
import re
|
10 |
|
11 |
print('All imports are successful')
|
12 |
|
13 |
model = "msmarco-distilbert-base-tas-b"
|
14 |
-
|
15 |
-
embeddings = SentenceTransformerEmbeddings(model_name=model)
|
16 |
-
except:
|
17 |
-
embeddings = SentenceTransformerEmbeddings(model_name=model)
|
18 |
prev_files = None
|
19 |
retriever = None
|
20 |
|
21 |
|
22 |
-
def handle_files_and_query(query, files, chunk_overlap=50, token_per_chunk=256, bm_25_answers=200,
|
|
|
23 |
results = ""
|
24 |
global prev_files, retriever
|
25 |
if not (isinstance(files, str) or isinstance(files[0], str)):
|
@@ -67,11 +68,12 @@ interface = gr.Interface(
|
|
67 |
gr.Slider(minimum=1, maximum=100, value=50, label="Chunk Overlap"),
|
68 |
gr.Slider(minimum=64, maximum=512, value=256, label="Tokens Per Chunk (чем больше - тем бОльшие куски книги "
|
69 |
"сможем находить)"),
|
70 |
-
gr.Slider(minimum=1, maximum=1000, value=200,
|
|
|
71 |
gr.Checkbox(label="Translate to Russian", value=False),
|
72 |
],
|
73 |
outputs="text",
|
74 |
title="Similarity Search for eksmo books"
|
75 |
)
|
76 |
|
77 |
-
interface.launch(share=True)
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
|
3 |
+
load_dotenv()
|
4 |
import translator
|
5 |
from langchain_community.document_loaders import PyMuPDFLoader
|
6 |
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
|
7 |
+
from langchain_community.embeddings import OpenAIEmbeddings
|
8 |
from langchain_community.retrievers import BM25Retriever
|
9 |
from langchain_community.vectorstores.utils import DistanceStrategy
|
10 |
+
from langchain_community.vectorstores import FAISS
|
11 |
import gradio as gr
|
12 |
import re
|
13 |
|
14 |
print('All imports are successful')
|
15 |
|
16 |
model = "msmarco-distilbert-base-tas-b"
|
17 |
+
embeddings = OpenAIEmbeddings()
|
|
|
|
|
|
|
18 |
prev_files = None
|
19 |
retriever = None
|
20 |
|
21 |
|
22 |
+
def handle_files_and_query(query, files, chunk_overlap=50, token_per_chunk=256, bm_25_answers=200,
|
23 |
+
translate_to_ru=False):
|
24 |
results = ""
|
25 |
global prev_files, retriever
|
26 |
if not (isinstance(files, str) or isinstance(files[0], str)):
|
|
|
68 |
gr.Slider(minimum=1, maximum=100, value=50, label="Chunk Overlap"),
|
69 |
gr.Slider(minimum=64, maximum=512, value=256, label="Tokens Per Chunk (чем больше - тем бОльшие куски книги "
|
70 |
"сможем находить)"),
|
71 |
+
gr.Slider(minimum=1, maximum=1000, value=200,
|
72 |
+
label="BM25 Answers (чем больше - тем больше будем учитывать неявные смысловые сравнения слов)"),
|
73 |
gr.Checkbox(label="Translate to Russian", value=False),
|
74 |
],
|
75 |
outputs="text",
|
76 |
title="Similarity Search for eksmo books"
|
77 |
)
|
78 |
|
79 |
+
interface.queue(P=1).launch(share=True)
|
requirements.txt
CHANGED
@@ -164,3 +164,4 @@ xlrd==2.0.1
|
|
164 |
XlsxWriter==3.1.9
|
165 |
yarl==1.9.2
|
166 |
zipp==3.15.0
|
|
|
|
164 |
XlsxWriter==3.1.9
|
165 |
yarl==1.9.2
|
166 |
zipp==3.15.0
|
167 |
+
python-dotenv~=1.0.1
|