freQuensy23 commited on
Commit
82d9634
1 Parent(s): 6a3c13a

[IMP] translate to russian

Browse files
Files changed (2) hide show
  1. app.py +15 -6
  2. translator.py +14 -0
app.py CHANGED
@@ -2,6 +2,8 @@ import warnings
2
 
3
  from langchain_core._api import LangChainDeprecationWarning
4
 
 
 
5
  warnings.filterwarnings("ignore", category=DeprecationWarning)
6
  warnings.filterwarnings("ignore", category=UserWarning)
7
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
@@ -23,7 +25,7 @@ prev_files = None
23
  retriever = None
24
 
25
 
26
- def handle_files_and_query(query, files, chunk_overlap=50, token_per_chunk=256, bm_25_answers=200):
27
  results = ""
28
  global prev_files, retriever
29
  files = [f.name for f in files]
@@ -51,10 +53,16 @@ def handle_files_and_query(query, files, chunk_overlap=50, token_per_chunk=256,
51
  reranked_results = FAISS.from_documents(search_results, embeddings,
52
  distance_strategy=DistanceStrategy.COSINE).similarity_search(query,
53
  k=25)
54
- results = "\n".join([
55
- f"Source: {re.search(pattern, result.metadata['file_path']).group(0)}\nPage: {result.metadata['page']}\nContent:\n{result.page_content}\n"
56
- for result in reranked_results
57
- ])
 
 
 
 
 
 
58
  return results
59
 
60
 
@@ -66,7 +74,8 @@ interface = gr.Interface(
66
  gr.Slider(minimum=1, maximum=100, value=50, label="Chunk Overlap"),
67
  gr.Slider(minimum=64, maximum=512, value=256, label="Tokens Per Chunk (чем больше - тем бОльшие куски книги "
68
  "сможем находить)"),
69
- gr.Slider(minimum=1, maximum=1000, value=200, label="BM25 Answers (чем больше - тем больше будем учитывать неявные смысловые сравнения слов)")
 
70
  ],
71
  outputs="text",
72
  title="Similarity Search for eksmo books"
 
2
 
3
  from langchain_core._api import LangChainDeprecationWarning
4
 
5
+ import translator
6
+
7
  warnings.filterwarnings("ignore", category=DeprecationWarning)
8
  warnings.filterwarnings("ignore", category=UserWarning)
9
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
 
25
  retriever = None
26
 
27
 
28
+ def handle_files_and_query(query, files, chunk_overlap=50, token_per_chunk=256, bm_25_answers=200, translate_to_ru=False):
29
  results = ""
30
  global prev_files, retriever
31
  files = [f.name for f in files]
 
53
  reranked_results = FAISS.from_documents(search_results, embeddings,
54
  distance_strategy=DistanceStrategy.COSINE).similarity_search(query,
55
  k=25)
56
+ if translate_to_ru:
57
+ results = "\n".join([
58
+ f"Source: {re.search(pattern, result.metadata['file_path']).group(0)}\nPage: {result.metadata['page']}\nContent:\n{translator.translate(result.page_content, 'russian')}\n"
59
+ for result in reranked_results
60
+ ])
61
+ else:
62
+ results = "\n".join([
63
+ f"Source: {re.search(pattern, result.metadata['file_path']).group(0)}\nPage: {result.metadata['page']}\nContent:\n{result.page_content}\n"
64
+ for result in reranked_results
65
+ ])
66
  return results
67
 
68
 
 
74
  gr.Slider(minimum=1, maximum=100, value=50, label="Chunk Overlap"),
75
  gr.Slider(minimum=64, maximum=512, value=256, label="Tokens Per Chunk (чем больше - тем бОльшие куски книги "
76
  "сможем находить)"),
77
+ gr.Slider(minimum=1, maximum=1000, value=200, label="BM25 Answers (чем больше - тем больше будем учитывать неявные смысловые сравнения слов)"),
78
+ gr.Checkbox(label="Translate to Russian", value=False),
79
  ],
80
  outputs="text",
81
  title="Similarity Search for eksmo books"
translator.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+
3
+
4
+ def translate(text, target_lang):
5
+ """Translate text to target language using OpenAI's GPT-3 API."""
6
+ client = openai.Client()
7
+ response = client.chat.completions.create(
8
+ messages=[{"role": "system", "content": f"You are AI-translator and you should translate text to {target_lang}"},
9
+ {'role': 'user', 'content': f'Please translate this text to {target_lang}: {text}. '
10
+ f'Answer with tranlsatrion and no additional information.'},
11
+ ],
12
+ model="gpt-3.5-turbo",
13
+ )
14
+ return response.choices[0].message.content