sergey21000 commited on
Commit
fa7dd9e
1 Parent(s): 18c8c33

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +400 -0
  2. config.py +105 -0
  3. requirements-base.txt +9 -0
  4. requirements-cpu.txt +5 -0
  5. utils.py +495 -0
app.py ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+
3
+ import gradio as gr
4
+ from langchain_core.vectorstores import VectorStore
5
+
6
+ from config import (
7
+ LLM_MODEL_REPOS,
8
+ EMBED_MODEL_REPOS,
9
+ SUBTITLES_LANGUAGES,
10
+ GENERATE_KWARGS,
11
+ )
12
+
13
+ from utils import (
14
+ load_llm_model,
15
+ load_embed_model,
16
+ load_documents_and_create_db,
17
+ user_message_to_chatbot,
18
+ update_user_message_with_context,
19
+ get_llm_response,
20
+ get_gguf_model_names,
21
+ add_new_model_repo,
22
+ clear_llm_folder,
23
+ clear_embed_folder,
24
+ get_memory_usage,
25
+ )
26
+
27
+
28
+ # ============ INTERFACE COMPONENT INITIALIZATION FUNCS ============
29
+
30
+ def get_rag_settings(rag_mode: bool, render: bool = True):
31
+ k = gr.Radio(
32
+ choices=[1, 2, 3, 4, 5, 'all'],
33
+ value=2,
34
+ label='Number of relevant documents for search',
35
+ visible=rag_mode,
36
+ render=render,
37
+ )
38
+ score_threshold = gr.Slider(
39
+ minimum=0,
40
+ maximum=1,
41
+ value=0.5,
42
+ step=0.05,
43
+ label='relevance_scores_threshold',
44
+ visible=rag_mode,
45
+ render=render,
46
+ )
47
+ return k, score_threshold
48
+
49
+
50
+ def get_user_message_with_context(text: str, rag_mode: bool) -> gr.component:
51
+ num_lines = len(text.split('\n'))
52
+ max_lines = 10
53
+ num_lines = max_lines if num_lines > max_lines else num_lines
54
+ return gr.Textbox(
55
+ text,
56
+ visible=rag_mode,
57
+ interactive=False,
58
+ label='User Message With Context',
59
+ lines=num_lines,
60
+ )
61
+
62
+
63
+ def get_system_prompt_component(interactive: bool) -> gr.Textbox:
64
+ value = '' if interactive else 'System prompt is not supported by this model'
65
+ return gr.Textbox(value=value, label='System prompt', interactive=interactive)
66
+
67
+
68
+ def get_generate_args(do_sample: bool) -> List[gr.component]:
69
+ generate_args = [
70
+ gr.Slider(minimum=0.1, maximum=3, value=GENERATE_KWARGS['temperature'], step=0.1, label='temperature', visible=do_sample),
71
+ gr.Slider(minimum=0.1, maximum=1, value=GENERATE_KWARGS['top_p'], step=0.01, label='top_p', visible=do_sample),
72
+ gr.Slider(minimum=1, maximum=50, value=GENERATE_KWARGS['top_k'], step=1, label='top_k', visible=do_sample),
73
+ gr.Slider(minimum=1, maximum=5, value=GENERATE_KWARGS['repeat_penalty'], step=0.1, label='repeat_penalty', visible=do_sample),
74
+ ]
75
+ return generate_args
76
+
77
+
78
+ def get_rag_mode_component(db: Optional[VectorStore]) -> gr.Checkbox:
79
+ value = visible = db is not None
80
+ return gr.Checkbox(value=value, label='RAG Mode', scale=1, visible=visible)
81
+
82
+
83
+ # ================ LOADING AND INITIALIZING MODELS ========================
84
+
85
+ start_llm_model, start_support_system_role, load_log = load_llm_model(LLM_MODEL_REPOS[0], 'gemma-2-2b-it-Q8_0.gguf')
86
+ start_embed_model, load_log = load_embed_model(EMBED_MODEL_REPOS[0])
87
+
88
+
89
+
90
+ # ================== APPLICATION WEB INTERFACE ============================
91
+
92
+ theme = gr.themes.Base(primary_hue='green', secondary_hue='yellow', neutral_hue='zinc').set(
93
+ loader_color='rgb(0, 255, 0)',
94
+ slider_color='rgb(0, 200, 0)',
95
+ body_text_color_dark='rgb(0, 200, 0)',
96
+ button_secondary_background_fill_dark='green',
97
+ )
98
+ css = '''.gradio-container {width: 60% !important}'''
99
+
100
+ with gr.Blocks(theme=theme, css=css) as interface:
101
+
102
+ # ==================== GRADIO STATES ===============================
103
+
104
+ documents = gr.State([])
105
+ db = gr.State(None)
106
+ user_message_with_context = gr.State('')
107
+ support_system_role = gr.State(start_support_system_role)
108
+ llm_model_repos = gr.State(LLM_MODEL_REPOS)
109
+ embed_model_repos = gr.State(EMBED_MODEL_REPOS)
110
+ llm_model = gr.State(start_llm_model)
111
+ embed_model = gr.State(start_embed_model)
112
+
113
+
114
+
115
+ # ==================== BOT PAGE =================================
116
+
117
+ with gr.Tab(label='Chatbot'):
118
+ with gr.Row():
119
+ with gr.Column(scale=3):
120
+ chatbot = gr.Chatbot(
121
+ show_copy_button=True,
122
+ bubble_full_width=False,
123
+ height=480,
124
+ )
125
+ user_message = gr.Textbox(label='User')
126
+
127
+ with gr.Row():
128
+ user_message_btn = gr.Button('Send')
129
+ stop_btn = gr.Button('Stop')
130
+ clear_btn = gr.Button('Clear')
131
+
132
+ # ------------- GENERATION PARAMETERS -------------------
133
+
134
+ with gr.Column(scale=1, min_width=80):
135
+ with gr.Group():
136
+ gr.Markdown('History size')
137
+ history_len = gr.Slider(
138
+ minimum=0,
139
+ maximum=5,
140
+ value=0,
141
+ step=1,
142
+ info='Number of previous messages taken into account in history',
143
+ label='history_len',
144
+ show_label=False,
145
+ )
146
+
147
+ with gr.Group():
148
+ gr.Markdown('Generation parameters')
149
+ do_sample = gr.Checkbox(
150
+ value=False,
151
+ label='do_sample',
152
+ info='Activate random sampling',
153
+ )
154
+ generate_args = get_generate_args(do_sample.value)
155
+ do_sample.change(
156
+ fn=get_generate_args,
157
+ inputs=do_sample,
158
+ outputs=generate_args,
159
+ show_progress=False,
160
+ )
161
+
162
+ rag_mode = get_rag_mode_component(db=db.value)
163
+ k, score_threshold = get_rag_settings(rag_mode=rag_mode.value, render=False)
164
+ rag_mode.change(
165
+ fn=get_rag_settings,
166
+ inputs=[rag_mode],
167
+ outputs=[k, score_threshold],
168
+ )
169
+ with gr.Row():
170
+ k.render()
171
+ score_threshold.render()
172
+
173
+ # ---------------- SYSTEM PROMPT AND USER MESSAGE -----------
174
+
175
+ with gr.Accordion('Prompt', open=True):
176
+ system_prompt = get_system_prompt_component(interactive=support_system_role.value)
177
+ user_message_with_context = get_user_message_with_context(text='', rag_mode=rag_mode.value)
178
+
179
+ # ---------------- SEND, CLEAR AND STOP BUTTONS ------------
180
+
181
+ generate_event = gr.on(
182
+ triggers=[user_message.submit, user_message_btn.click],
183
+ fn=user_message_to_chatbot,
184
+ inputs=[user_message, chatbot],
185
+ outputs=[user_message, chatbot],
186
+ queue=False,
187
+ ).then(
188
+ fn=update_user_message_with_context,
189
+ inputs=[chatbot, rag_mode, db, k, score_threshold],
190
+ outputs=[user_message_with_context],
191
+ ).then(
192
+ fn=get_user_message_with_context,
193
+ inputs=[user_message_with_context, rag_mode],
194
+ outputs=[user_message_with_context],
195
+ ).then(
196
+ fn=get_llm_response,
197
+ inputs=[chatbot, llm_model, user_message_with_context, rag_mode, system_prompt,
198
+ support_system_role, history_len, do_sample, *generate_args],
199
+ outputs=[chatbot],
200
+ )
201
+
202
+ stop_btn.click(
203
+ fn=None,
204
+ inputs=None,
205
+ outputs=None,
206
+ cancels=generate_event,
207
+ queue=False,
208
+ )
209
+
210
+ clear_btn.click(
211
+ fn=lambda: (None, ''),
212
+ inputs=None,
213
+ outputs=[chatbot, user_message_with_context],
214
+ queue=False,
215
+ )
216
+
217
+
218
+
219
+ # ================= FILE DOWNLOAD PAGE =========================
220
+
221
+ with gr.Tab(label='Load documents'):
222
+ with gr.Row(variant='compact'):
223
+ upload_files = gr.File(file_count='multiple', label='Loading text files')
224
+ web_links = gr.Textbox(lines=6, label='Links to Web sites or YouTube')
225
+
226
+ with gr.Row(variant='compact'):
227
+ chunk_size = gr.Slider(50, 2000, value=500, step=50, label='Chunk size')
228
+ chunk_overlap = gr.Slider(0, 200, value=20, step=10, label='Chunk overlap')
229
+
230
+ subtitles_lang = gr.Radio(
231
+ SUBTITLES_LANGUAGES,
232
+ value=SUBTITLES_LANGUAGES[0],
233
+ label='YouTube subtitle language',
234
+ )
235
+
236
+ load_documents_btn = gr.Button(value='Upload documents and initialize database')
237
+ load_docs_log = gr.Textbox(label='Status of loading and splitting documents', interactive=False)
238
+
239
+ load_documents_btn.click(
240
+ fn=load_documents_and_create_db,
241
+ inputs=[upload_files, web_links, subtitles_lang, chunk_size, chunk_overlap, embed_model],
242
+ outputs=[documents, db, load_docs_log],
243
+ ).success(
244
+ fn=get_rag_mode_component,
245
+ inputs=[db],
246
+ outputs=[rag_mode],
247
+ )
248
+
249
+ gr.HTML("""<h3 style='text-align: center'>
250
+ <a href="https://github.com/sergey21000/chatbot-rag" target='_blank'>GitHub Repository</a></h3>
251
+ """)
252
+
253
+
254
+
255
+ # ================= VIEW PAGE FOR ALL DOCUMENTS =================
256
+
257
+ with gr.Tab(label='View documents'):
258
+ view_documents_btn = gr.Button(value='Show downloaded text chunks')
259
+ view_documents_textbox = gr.Textbox(
260
+ lines=1,
261
+ placeholder='To view chunks, load documents in the Load documents tab',
262
+ label='Uploaded chunks',
263
+ )
264
+ sep = '=' * 20
265
+ view_documents_btn.click(
266
+ lambda documents: f'\n{sep}\n\n'.join([doc.page_content for doc in documents]),
267
+ inputs=[documents],
268
+ outputs=[view_documents_textbox],
269
+ )
270
+
271
+
272
+ # ============== GGUF MODELS DOWNLOAD PAGE =====================
273
+
274
+ with gr.Tab('Load LLM model'):
275
+ new_llm_model_repo = gr.Textbox(
276
+ value='',
277
+ label='Add repository',
278
+ placeholder='Link to repository of HF models in GGUF format',
279
+ )
280
+ new_llm_model_repo_btn = gr.Button('Add repository')
281
+ curr_llm_model_repo = gr.Dropdown(
282
+ choices=LLM_MODEL_REPOS,
283
+ value=None,
284
+ label='HF Model Repository',
285
+ )
286
+ curr_llm_model_path = gr.Dropdown(
287
+ choices=[],
288
+ value=None,
289
+ label='GGUF model file',
290
+ )
291
+ load_llm_model_btn = gr.Button('Loading and initializing model')
292
+ load_llm_model_log = gr.Textbox(
293
+ value=f'Model {LLM_MODEL_REPOS[0]} loaded at application startup',
294
+ label='Model loading status',
295
+ lines=6,
296
+ )
297
+
298
+ with gr.Group():
299
+ gr.Markdown('Free up disk space by deleting all models except the currently selected one')
300
+ clear_llm_folder_btn = gr.Button('Clear folder')
301
+
302
+ new_llm_model_repo_btn.click(
303
+ fn=add_new_model_repo,
304
+ inputs=[new_llm_model_repo, llm_model_repos],
305
+ outputs=[curr_llm_model_repo, load_llm_model_log],
306
+ ).success(
307
+ fn=lambda: '',
308
+ inputs=None,
309
+ outputs=[new_llm_model_repo],
310
+ )
311
+
312
+ curr_llm_model_repo.change(
313
+ fn=get_gguf_model_names,
314
+ inputs=[curr_llm_model_repo],
315
+ outputs=[curr_llm_model_path],
316
+ )
317
+
318
+ load_llm_model_btn.click(
319
+ fn=load_llm_model,
320
+ inputs=[curr_llm_model_repo, curr_llm_model_path],
321
+ outputs=[llm_model, support_system_role, load_llm_model_log],
322
+ queue=True,
323
+ ).success(
324
+ fn=lambda log: log + get_memory_usage(),
325
+ inputs=[load_llm_model_log],
326
+ outputs=[load_llm_model_log],
327
+ ).then(
328
+ fn=get_system_prompt_component,
329
+ inputs=[support_system_role],
330
+ outputs=[system_prompt],
331
+ )
332
+
333
+ clear_llm_folder_btn.click(
334
+ fn=clear_llm_folder,
335
+ inputs=[curr_llm_model_path],
336
+ outputs=None,
337
+ ).success(
338
+ fn=lambda model_path: f'Models other than {model_path} removed',
339
+ inputs=[curr_llm_model_path],
340
+ outputs=None,
341
+ )
342
+
343
+
344
+ # ============== EMBEDDING MODELS DOWNLOAD PAGE =============
345
+
346
+ with gr.Tab('Load embed model'):
347
+ new_embed_model_repo = gr.Textbox(
348
+ value='',
349
+ label='Add repository',
350
+ placeholder='Link to HF model repository',
351
+ )
352
+ new_embed_model_repo_btn = gr.Button('Add repository')
353
+ curr_embed_model_repo = gr.Dropdown(
354
+ choices=EMBED_MODEL_REPOS,
355
+ value=None,
356
+ label='HF model repository',
357
+ )
358
+
359
+ load_embed_model_btn = gr.Button('Loading and initializing model')
360
+ load_embed_model_log = gr.Textbox(
361
+ value=f'Model {EMBED_MODEL_REPOS[0]} loaded at application startup',
362
+ label='Model loading status',
363
+ lines=7,
364
+ )
365
+ with gr.Group():
366
+ gr.Markdown('Free up disk space by deleting all models except the currently selected one')
367
+ clear_embed_folder_btn = gr.Button('Clear folder')
368
+
369
+ new_embed_model_repo_btn.click(
370
+ fn=add_new_model_repo,
371
+ inputs=[new_embed_model_repo, embed_model_repos],
372
+ outputs=[curr_embed_model_repo, load_embed_model_log],
373
+ ).success(
374
+ fn=lambda: '',
375
+ inputs=None,
376
+ outputs=new_embed_model_repo,
377
+ )
378
+
379
+ load_embed_model_btn.click(
380
+ fn=load_embed_model,
381
+ inputs=[curr_embed_model_repo],
382
+ outputs=[embed_model, load_embed_model_log],
383
+ ).success(
384
+ fn=lambda log: log + get_memory_usage(),
385
+ inputs=[load_embed_model_log],
386
+ outputs=[load_embed_model_log],
387
+ )
388
+
389
+ clear_embed_folder_btn.click(
390
+ fn=clear_embed_folder,
391
+ inputs=[curr_embed_model_repo],
392
+ outputs=None,
393
+ ).success(
394
+ fn=lambda model_repo: f'Models other than {model_repo} removed',
395
+ inputs=[curr_embed_model_repo],
396
+ outputs=None,
397
+ )
398
+
399
+
400
+ interface.launch(server_name='0.0.0.0', server_port=7860) # debug=True
config.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ # document loaders
4
+ from langchain_community.document_loaders import (
5
+ CSVLoader,
6
+ PDFMinerLoader,
7
+ PyPDFLoader,
8
+ TextLoader,
9
+ UnstructuredHTMLLoader,
10
+ UnstructuredMarkdownLoader,
11
+ UnstructuredPowerPointLoader,
12
+ UnstructuredWordDocumentLoader,
13
+ WebBaseLoader,
14
+ YoutubeLoader,
15
+ DirectoryLoader,
16
+ )
17
+
18
+
19
+ # langchain classes for extracting text from various sources
20
+ LOADER_CLASSES = {
21
+ '.csv': CSVLoader,
22
+ '.doc': UnstructuredWordDocumentLoader,
23
+ '.docx': UnstructuredWordDocumentLoader,
24
+ '.html': UnstructuredHTMLLoader,
25
+ '.md': UnstructuredMarkdownLoader,
26
+ '.pdf': PDFMinerLoader,
27
+ '.ppt': UnstructuredPowerPointLoader,
28
+ '.pptx': UnstructuredPowerPointLoader,
29
+ '.txt': TextLoader,
30
+ 'web': WebBaseLoader,
31
+ 'directory': DirectoryLoader,
32
+ 'youtube': YoutubeLoader,
33
+ }
34
+
35
+ # languages ​​for youtube subtitles
36
+ SUBTITLES_LANGUAGES = ['ru', 'en']
37
+
38
+ # prom template subject to context
39
+ CONTEXT_TEMPLATE = '''Ответь на вопрос при условии контекста.
40
+
41
+ Контекст:
42
+ {context}
43
+
44
+ Вопрос:
45
+ {user_message}
46
+
47
+ Ответ:'''
48
+
49
+ # dictionary for text generation config
50
+ GENERATE_KWARGS = dict(
51
+ temperature=0.2,
52
+ top_p=0.95,
53
+ top_k=40,
54
+ repeat_penalty=1.0,
55
+ )
56
+
57
+ # paths to LLM and embeddings models
58
+ LLM_MODELS_PATH = Path('models')
59
+ EMBED_MODELS_PATH = Path('embed_models')
60
+ LLM_MODELS_PATH.mkdir(exist_ok=True)
61
+ EMBED_MODELS_PATH.mkdir(exist_ok=True)
62
+
63
+ # available when running the LLM application models in GGUF format
64
+ LLM_MODEL_REPOS = [
65
+ # https://huggingface.co/bartowski/gemma-2-2b-it-GGUF
66
+ 'bartowski/gemma-2-2b-it-GGUF',
67
+ # https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF
68
+ 'bartowski/Qwen2.5-3B-Instruct-GGUF',
69
+ # https://huggingface.co/bartowski/Qwen2.5-1.5B-Instruct-GGUF
70
+ 'bartowski/Qwen2.5-1.5B-Instruct-GGUF',
71
+ # https://huggingface.co/bartowski/openchat-3.6-8b-20240522-GGUF
72
+ 'bartowski/openchat-3.6-8b-20240522-GGUF',
73
+ # https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF
74
+ 'bartowski/Mistral-7B-Instruct-v0.3-GGUF',
75
+ # https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF
76
+ 'bartowski/Llama-3.2-3B-Instruct-GGUF',
77
+ ]
78
+
79
+ # Embedding models available at application startup
80
+ EMBED_MODEL_REPOS = [
81
+ # https://huggingface.co/sergeyzh/rubert-tiny-turbo # 117 MB
82
+ 'sergeyzh/rubert-tiny-turbo',
83
+ # https://huggingface.co/cointegrated/rubert-tiny2 # 118 MB
84
+ 'cointegrated/rubert-tiny2',
85
+ # https://huggingface.co/cointegrated/LaBSE-en-ru # 516 MB
86
+ 'cointegrated/LaBSE-en-ru',
87
+ # https://huggingface.co/sergeyzh/LaBSE-ru-turbo # 513 MB
88
+ 'sergeyzh/LaBSE-ru-turbo',
89
+ # https://huggingface.co/intfloat/multilingual-e5-large # 2.24 GB
90
+ 'intfloat/multilingual-e5-large',
91
+ # https://huggingface.co/intfloat/multilingual-e5-base # 1.11 GB
92
+ 'intfloat/multilingual-e5-base',
93
+ # https://huggingface.co/intfloat/multilingual-e5-small # 471 MB
94
+ 'intfloat/multilingual-e5-small',
95
+ # https://huggingface.co/intfloat/multilingual-e5-large-instruct # 1.12 GB
96
+ 'intfloat/multilingual-e5-large-instruct',
97
+ # https://huggingface.co/sentence-transformers/all-mpnet-base-v2 # 438 MB
98
+ 'sentence-transformers/all-mpnet-base-v2',
99
+ # https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 # 1.11 GB
100
+ 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2',
101
+ # https://huggingface.co/ai-forever?search_models=ruElectra # 356 MB
102
+ 'ai-forever/ruElectra-medium',
103
+ # https://huggingface.co/ai-forever/sbert_large_nlu_ru # 1.71 GB
104
+ 'ai-forever/sbert_large_nlu_ru',
105
+ ]
requirements-base.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.44.1
2
+ huggingface-hub==0.24.7
3
+ langchain==0.3.1
4
+ langchain-community==0.3.1
5
+ langchain-huggingface==0.1.0
6
+ pdfminer.six==20240706
7
+ youtube-transcript-api==0.6.2
8
+ psutil==6.0.0
9
+ faiss-cpu==1.8.0.post1
requirements-cpu.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cpu
2
+ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
3
+ torch==2.4.1
4
+ llama_cpp_python==0.2.88
5
+ -r requirements-base.txt
utils.py ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ from pathlib import Path
3
+ from shutil import rmtree
4
+ from typing import List, Tuple, Dict, Union, Optional, Any, Iterable
5
+ from tqdm import tqdm
6
+
7
+ import psutil
8
+ import requests
9
+ from requests.exceptions import MissingSchema
10
+
11
+ import torch
12
+ import gradio as gr
13
+
14
+ from llama_cpp import Llama
15
+ from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
16
+ from huggingface_hub import hf_hub_download, list_repo_tree, list_repo_files, repo_info, repo_exists, snapshot_download
17
+
18
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
19
+ from langchain_community.vectorstores import FAISS
20
+ from langchain_huggingface import HuggingFaceEmbeddings
21
+
22
+ # imports for annotations
23
+ from langchain.docstore.document import Document
24
+ from langchain_core.embeddings import Embeddings
25
+ from langchain_core.vectorstores import VectorStore
26
+
27
+ from config import (
28
+ LLM_MODELS_PATH,
29
+ EMBED_MODELS_PATH,
30
+ GENERATE_KWARGS,
31
+ LOADER_CLASSES,
32
+ CONTEXT_TEMPLATE,
33
+ )
34
+
35
+
36
+ # type annotations
37
+ CHAT_HISTORY = List[Tuple[Optional[str], Optional[str]]]
38
+ LLM_MODEL_DICT = Dict[str, Llama]
39
+ EMBED_MODEL_DICT = Dict[str, Embeddings]
40
+
41
+
42
+ # ===================== ADDITIONAL FUNCS =======================
43
+
44
+ # getting the amount of free memory on disk, CPU and GPU
45
+ def get_memory_usage() -> str:
46
+ print_memory = ''
47
+
48
+ memory_type = 'Disk'
49
+ psutil_stats = psutil.disk_usage('.')
50
+ memory_total = psutil_stats.total / 1024**3
51
+ memory_usage = psutil_stats.used / 1024**3
52
+ print_memory += f'{memory_type} Menory Usage: {memory_usage:.2f} / {memory_total:.2f} GB\n'
53
+
54
+ memory_type = 'CPU'
55
+ psutil_stats = psutil.virtual_memory()
56
+ memory_total = psutil_stats.total / 1024**3
57
+ memory_usage = memory_total - (psutil_stats.available / 1024**3)
58
+ print_memory += f'{memory_type} Menory Usage: {memory_usage:.2f} / {memory_total:.2f} GB\n'
59
+
60
+ if torch.cuda.is_available():
61
+ memory_type = 'GPU'
62
+ memory_free, memory_total = torch.cuda.mem_get_info()
63
+ memory_usage = memory_total - memory_free
64
+ print_memory += f'{memory_type} Menory Usage: {memory_usage / 1024**3:.2f} / {memory_total:.2f} GB\n'
65
+
66
+ print_memory = f'---------------\n{print_memory}---------------'
67
+ return print_memory
68
+
69
+
70
+ # clearing the list of documents
71
+ def clear_documents(documents: Iterable[Document]) -> Iterable[Document]:
72
+ def clear_text(text: str) -> str:
73
+ lines = text.split('\n')
74
+ lines = [line for line in lines if len(line.strip()) > 2]
75
+ text = '\n'.join(lines).strip()
76
+ return text
77
+
78
+ output_documents = []
79
+ for document in documents:
80
+ text = clear_text(document.page_content)
81
+ if len(text) > 10:
82
+ document.page_content = text
83
+ output_documents.append(document)
84
+ return output_documents
85
+
86
+
87
+ # ===================== INTERFACE FUNCS =============================
88
+
89
+
90
+ # ------------- LLM AND EMBEDDING MODELS LOADING ------------------------
91
+
92
+ # функция для загрузки файла по URL ссылке и отображением прогресс баров tqdm и gradio
93
+ def download_file(file_url: str, file_path: Union[str, Path]) -> None:
94
+ response = requests.get(file_url, stream=True)
95
+ if response.status_code != 200:
96
+ raise Exception(f'The file is not available for download at the link: {file_url}')
97
+ total_size = int(response.headers.get('content-length', 0))
98
+ progress_tqdm = tqdm(desc='Loading GGUF file', total=total_size, unit='iB', unit_scale=True)
99
+ progress_gradio = gr.Progress()
100
+ completed_size = 0
101
+ with open(file_path, 'wb') as file:
102
+ for data in response.iter_content(chunk_size=4096):
103
+ size = file.write(data)
104
+ progress_tqdm.update(size)
105
+ completed_size += size
106
+ desc = f'Loading GGUF file, {completed_size/1024**3:.3f}/{total_size/1024**3:.3f} GB'
107
+ progress_gradio(completed_size/total_size, desc=desc)
108
+
109
+
110
+ # loading and initializing the GGUF model
111
+ def load_llm_model(model_repo: str, model_file: str) -> Tuple[LLM_MODEL_DICT, str, str]:
112
+ llm_model = None
113
+ load_log = ''
114
+ support_system_role = False
115
+
116
+ if isinstance(model_file, list):
117
+ load_log += 'No model selected\n'
118
+ return llm_model, load_log
119
+ if '(' in model_file:
120
+ model_file = model_file.split('(')[0].rstrip()
121
+
122
+ progress = gr.Progress()
123
+ progress(0.3, desc='Step 1/2: Download the GGUF file')
124
+ model_path = LLM_MODELS_PATH / model_file
125
+
126
+ if model_path.is_file():
127
+ load_log += f'Model {model_file} already loaded, reinitializing\n'
128
+ else:
129
+ try:
130
+ gguf_url = f'https://huggingface.co/{model_repo}/resolve/main/{model_file}'
131
+ download_file(gguf_url, model_path)
132
+ load_log += f'Model {model_file} loaded\n'
133
+ except Exception as ex:
134
+ model_path = ''
135
+ load_log += f'Error loading model, error code:\n{ex}\n'
136
+
137
+ if model_path:
138
+ progress(0.7, desc='Step 2/2: Initialize the model')
139
+ try:
140
+ llm_model = Llama(model_path=str(model_path), n_gpu_layers=-1, verbose=False)
141
+ support_system_role = 'System role not supported' not in llm_model.metadata['tokenizer.chat_template']
142
+ load_log += f'Model {model_file} initialized, max context size is {llm_model.n_ctx()} tokens\n'
143
+ except Exception as ex:
144
+ load_log += f'Error initializing model, error code:\n{ex}\n'
145
+
146
+ llm_model = {'model': llm_model}
147
+ return llm_model, support_system_role, load_log
148
+
149
+
150
+ # loading and initializing the embedding model
151
+ def load_embed_model(model_repo: str) -> Tuple[Dict[str, HuggingFaceEmbeddings], str]:
152
+ embed_model = None
153
+ load_log = ''
154
+
155
+ if isinstance(model_repo, list):
156
+ load_log = 'No model selected'
157
+ return embed_model, load_log
158
+
159
+ progress = gr.Progress()
160
+ folder_name = model_repo.replace('/', '_')
161
+ folder_path = EMBED_MODELS_PATH / folder_name
162
+ if Path(folder_path).is_dir():
163
+ load_log += f'Reinitializing model {model_repo} \n'
164
+ else:
165
+ progress(0.5, desc='Step 1/2: Download model repository')
166
+ snapshot_download(
167
+ repo_id=model_repo,
168
+ local_dir=folder_path,
169
+ ignore_patterns='*.h5',
170
+ )
171
+ load_log += f'Model {model_repo} loaded\n'
172
+
173
+ progress(0.7, desc='Шаг 2/2: Инициализация модели')
174
+ model_kwargs = {'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
175
+ embed_model = HuggingFaceEmbeddings(
176
+ model_name=str(folder_path),
177
+ model_kwargs=model_kwargs,
178
+ # encode_kwargs={'normalize_embeddings': True},
179
+ )
180
+ load_log += f'Embeddings model {model_repo} initialized\n'
181
+ load_log += f'Please upload documents and initialize database again\n'
182
+ embed_model = {'embed_model': embed_model}
183
+ return embed_model, load_log
184
+
185
+
186
+ # adding a new HF repository new_model_repo to the current list of model_repos
187
+ def add_new_model_repo(new_model_repo: str, model_repos: List[str]) -> Tuple[gr.Dropdown, str]:
188
+ load_log = ''
189
+ repo = new_model_repo.strip()
190
+ if repo:
191
+ repo = repo.split('/')[-2:]
192
+ if len(repo) == 2:
193
+ repo = '/'.join(repo).split('?')[0]
194
+ if repo_exists(repo) and repo not in model_repos:
195
+ model_repos.insert(0, repo)
196
+ load_log += f'Model repository {repo} successfully added\n'
197
+ else:
198
+ load_log += 'Invalid HF repository name or model already in the list\n'
199
+ else:
200
+ load_log += 'Invalid link to HF repository\n'
201
+ else:
202
+ load_log += 'Empty line in HF repository field\n'
203
+ model_repo_dropdown = gr.Dropdown(choices=model_repos, value=model_repos[0])
204
+ return model_repo_dropdown, load_log
205
+
206
+
207
+ # get list of GGUF models from HF repository
208
+ def get_gguf_model_names(model_repo: str) -> gr.Dropdown:
209
+ repo_files = list(list_repo_tree(model_repo))
210
+ repo_files = [file for file in repo_files if file.path.endswith('.gguf')]
211
+ model_paths = [f'{file.path} ({file.size / 1000 ** 3:.2f}G)' for file in repo_files]
212
+ model_paths_dropdown = gr.Dropdown(
213
+ choices=model_paths,
214
+ value=model_paths[0],
215
+ label='GGUF model file',
216
+ )
217
+ return model_paths_dropdown
218
+
219
+
220
+ # delete model files and folders to clear space except for the current model gguf_filename
221
+ def clear_llm_folder(gguf_filename: str) -> None:
222
+ if gguf_filename is None:
223
+ gr.Info(f'The name of the model file that does not need to be deleted is not selected.')
224
+ return
225
+ if '(' in gguf_filename:
226
+ gguf_filename = gguf_filename.split('(')[0].rstrip()
227
+ for path in LLM_MODELS_PATH.iterdir():
228
+ if path.name == gguf_filename:
229
+ continue
230
+ if path.is_file():
231
+ path.unlink(missing_ok=True)
232
+ gr.Info(f'All files removed from directory {LLM_MODELS_PATH} except {gguf_filename}')
233
+
234
+
235
+ # delete model folders to clear space except for the current model model_folder_name
236
+ def clear_embed_folder(model_repo: str) -> None:
237
+ if model_repo is None:
238
+ gr.Info(f'The name of the model that does not need to be deleted is not selected.')
239
+ return
240
+ model_folder_name = model_repo.replace('/', '_')
241
+ for path in EMBED_MODELS_PATH.iterdir():
242
+ if path.name == model_folder_name:
243
+ continue
244
+ if path.is_dir():
245
+ rmtree(path, ignore_errors=True)
246
+ gr.Info(f'All directories have been removed from the {EMBED_MODELS_PATH} directory except {model_folder_name}')
247
+
248
+
249
+ # ------------------------ YOUTUBE ------------------------
250
+
251
+ # function to check availability of subtitles, if manual or automatic are available - returns True and logs
252
+ # if subtitles are not available - returns False and logs
253
+ def check_subtitles_available(yt_video_link: str, target_lang: str) -> Tuple[bool, str]:
254
+ video_id = yt_video_link.split('watch?v=')[-1].split('&')[0]
255
+ load_log = ''
256
+ available = True
257
+ try:
258
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
259
+ try:
260
+ transcript = transcript_list.find_transcript([target_lang])
261
+ if transcript.is_generated:
262
+ load_log += f'Automatic subtitles will be loaded, manual ones are not available for video {yt_video_link}\n'
263
+ else:
264
+ load_log += f'Manual subtitles will be downloaded for the video {yt_video_link}\n'
265
+ except NoTranscriptFound:
266
+ load_log += f'Subtitle language {target_lang} is not available for video {yt_video_link}\n'
267
+ available = False
268
+ except TranscriptsDisabled:
269
+ load_log += f'No subtitles for video {yt_video_link}\n'
270
+ available = False
271
+ return available, load_log
272
+
273
+
274
+ # ------------- UPLOADING DOCUMENTS FOR RAG ------------------------
275
+
276
+ # extract documents (in langchain Documents format) from downloaded files
277
+ def load_documents_from_files(upload_files: List[str]) -> Tuple[List[Document], str]:
278
+ load_log = ''
279
+ documents = []
280
+ for upload_file in upload_files:
281
+ file_extension = f".{upload_file.split('.')[-1]}"
282
+ if file_extension in LOADER_CLASSES:
283
+ loader_class = LOADER_CLASSES[file_extension]
284
+ loader_kwargs = {}
285
+ if file_extension == '.csv':
286
+ with open(upload_file) as csvfile:
287
+ delimiter = csv.Sniffer().sniff(csvfile.read(4096)).delimiter
288
+ loader_kwargs = {'csv_args': {'delimiter': delimiter}}
289
+ try:
290
+ load_documents = loader_class(upload_file, **loader_kwargs).load()
291
+ documents.extend(load_documents)
292
+ except Exception as ex:
293
+ load_log += f'Error uploading file {upload_file}\n'
294
+ load_log += f'Error code: {ex}\n'
295
+ continue
296
+ else:
297
+ load_log += f'Unsupported file format {upload_file}\n'
298
+ continue
299
+ return documents, load_log
300
+
301
+
302
+ # extracting documents (in langchain Documents format) from WEB links
303
+ def load_documents_from_links(
304
+ web_links: str,
305
+ subtitles_lang: str,
306
+ ) -> Tuple[List[Document], str]:
307
+
308
+ load_log = ''
309
+ documents = []
310
+ loader_class_kwargs = {}
311
+ web_links = [web_link.strip() for web_link in web_links.split('\n') if web_link.strip()]
312
+ for web_link in web_links:
313
+ if 'youtube.com' in web_link:
314
+ available, log = check_subtitles_available(web_link, subtitles_lang)
315
+ load_log += log
316
+ if not available:
317
+ continue
318
+ loader_class = LOADER_CLASSES['youtube'].from_youtube_url
319
+ loader_class_kwargs = {'language': subtitles_lang}
320
+ else:
321
+ loader_class = LOADER_CLASSES['web']
322
+
323
+ try:
324
+ if requests.get(web_link).status_code != 200:
325
+ load_log += f'Ссылка недоступна для Python requests: {web_link}\n'
326
+ continue
327
+ load_documents = loader_class(web_link, **loader_class_kwargs).load()
328
+ if len(load_documents) == 0:
329
+ load_log += f'No text chunks were found at the link: {web_link}\n'
330
+ continue
331
+ documents.extend(load_documents)
332
+ except MissingSchema:
333
+ load_log += f'Invalid link: {web_link}\n'
334
+ continue
335
+ except Exception as ex:
336
+ load_log += f'Error loading data by web loader at link: {web_link}\n'
337
+ load_log += f'Error code: {ex}\n'
338
+ continue
339
+ return documents, load_log
340
+
341
+
342
+ # uploading files and generating documents and databases
343
+ def load_documents_and_create_db(
344
+ upload_files: Optional[List[str]],
345
+ web_links: str,
346
+ subtitles_lang: str,
347
+ chunk_size: int,
348
+ chunk_overlap: int,
349
+ embed_model_dict: EMBED_MODEL_DICT,
350
+ ) -> Tuple[List[Document], Optional[VectorStore], str]:
351
+
352
+ load_log = ''
353
+ all_documents = []
354
+ db = None
355
+ progress = gr.Progress()
356
+
357
+ embed_model = embed_model_dict.get('embed_model')
358
+ if embed_model is None:
359
+ load_log += 'Embeddings model not initialized, DB cannot be created'
360
+ return all_documents, db, load_log
361
+
362
+ if upload_files is None and not web_links:
363
+ load_log = 'No files or links selected'
364
+ return all_documents, db, load_log
365
+
366
+ if upload_files is not None:
367
+ progress(0.3, desc='Step 1/2: Upload documents from files')
368
+ docs, log = load_documents_from_files(upload_files)
369
+ all_documents.extend(docs)
370
+ load_log += log
371
+
372
+ if web_links:
373
+ progress(0.3 if upload_files is None else 0.5, desc='Step 1/2: Upload documents via links')
374
+ docs, log = load_documents_from_links(web_links, subtitles_lang)
375
+ all_documents.extend(docs)
376
+ load_log += log
377
+
378
+ if len(all_documents) == 0:
379
+ load_log += 'Download was interrupted because no documents were extracted\n'
380
+ load_log += 'RAG mode cannot be activated'
381
+ return all_documents, db, load_log
382
+
383
+ load_log += f'Documents loaded: {len(all_documents)}\n'
384
+ text_splitter = RecursiveCharacterTextSplitter(
385
+ chunk_size=chunk_size,
386
+ chunk_overlap=chunk_overlap,
387
+ )
388
+ documents = text_splitter.split_documents(all_documents)
389
+ documents = clear_documents(documents)
390
+ load_log += f'Documents are divided, number of text chunks: {len(documents)}\n'
391
+
392
+ progress(0.7, desc='Step 2/2: Initialize DB')
393
+ db = FAISS.from_documents(documents=documents, embedding=embed_model)
394
+ load_log += 'DB is initialized, RAG mode is activated and can be activated in the Chatbot tab'
395
+ return documents, db, load_log
396
+
397
+
398
+ # ------------------ ФУНКЦИИ ЧАТ БОТА ------------------------
399
+
400
+ # adding a user message to the chat bot window
401
+ def user_message_to_chatbot(user_message: str, chatbot: CHAT_HISTORY) -> Tuple[str, CHAT_HISTORY]:
402
+ chatbot.append([user_message, None])
403
+ return '', chatbot
404
+
405
+
406
+ # formatting prompt with adding context if DB is available and RAG mode is enabled
407
+ def update_user_message_with_context(
408
+ chatbot: CHAT_HISTORY,
409
+ rag_mode: bool,
410
+ db: VectorStore,
411
+ k: Union[int, str],
412
+ score_threshold: float,
413
+ ) -> Tuple[str, CHAT_HISTORY]:
414
+
415
+ user_message = chatbot[-1][0]
416
+ user_message_with_context = ''
417
+ if db is not None and rag_mode and user_message.strip():
418
+ if k == 'all':
419
+ k = len(db.docstore._dict)
420
+ docs_and_distances = db.similarity_search_with_relevance_scores(
421
+ user_message,
422
+ k=k,
423
+ score_threshold=score_threshold,
424
+ )
425
+ if len(docs_and_distances) > 0:
426
+ retriever_context = '\n\n'.join([doc[0].page_content for doc in docs_and_distances])
427
+ user_message_with_context = CONTEXT_TEMPLATE.format(
428
+ user_message=user_message,
429
+ context=retriever_context,
430
+ )
431
+ return user_message_with_context
432
+
433
+
434
+ # model response generation
435
+ def get_llm_response(
436
+ chatbot: CHAT_HISTORY,
437
+ llm_model_dict: LLM_MODEL_DICT,
438
+ user_message_with_context: str,
439
+ rag_mode: bool,
440
+ system_prompt: str,
441
+ support_system_role: bool,
442
+ history_len: int,
443
+ do_sample: bool,
444
+ *generate_args,
445
+ ) -> CHAT_HISTORY:
446
+
447
+ user_message = chatbot[-1][0]
448
+ if not user_message.strip():
449
+ yield chatbot[:-1]
450
+ return None
451
+
452
+ if rag_mode:
453
+ if user_message_with_context:
454
+ user_message = user_message_with_context
455
+ else:
456
+ gr.Info((
457
+ f'No documents relevant to the query were found, generation in RAG mode is not possible.\n'
458
+ f'Try reducing searh_score_threshold or disable RAG mode for normal generation'
459
+ ))
460
+ yield chatbot[:-1]
461
+ return None
462
+
463
+ llm_model = llm_model_dict.get('model')
464
+ gen_kwargs = dict(zip(GENERATE_KWARGS.keys(), generate_args))
465
+ gen_kwargs['top_k'] = int(gen_kwargs['top_k'])
466
+ if not do_sample:
467
+ gen_kwargs['top_p'] = 0.0
468
+ gen_kwargs['top_k'] = 1
469
+ gen_kwargs['repeat_penalty'] = 1.0
470
+
471
+ messages = []
472
+ if support_system_role and system_prompt:
473
+ messages.append({'role': 'system', 'content': system_prompt})
474
+
475
+ if history_len != 0:
476
+ for user_msg, bot_msg in chatbot[:-1][-history_len:]:
477
+ messages.append({'role': 'user', 'content': user_msg})
478
+ messages.append({'role': 'assistant', 'content': bot_msg})
479
+
480
+ messages.append({'role': 'user', 'content': user_message})
481
+ stream_response = llm_model.create_chat_completion(
482
+ messages=messages,
483
+ stream=True,
484
+ **gen_kwargs,
485
+ )
486
+ try:
487
+ chatbot[-1][1] = ''
488
+ for chunk in stream_response:
489
+ token = chunk['choices'][0]['delta'].get('content')
490
+ if token is not None:
491
+ chatbot[-1][1] += token
492
+ yield chatbot
493
+ except Exception as ex:
494
+ gr.Info(f'Error generating response, error code: {ex}')
495
+ yield chatbot