sergey21000 commited on
Commit
62283cf
·
verified ·
1 Parent(s): 972b192

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +303 -175
app.py CHANGED
@@ -1,127 +1,63 @@
1
- from pathlib import Path
2
- from shutil import rmtree
3
- from typing import Union, List, Dict, Tuple, Optional
4
- from tqdm import tqdm
5
 
6
- import requests
7
  import gradio as gr
8
- from llama_cpp import Llama
9
-
10
-
11
- # ================== ANNOTATIONS ========================
12
-
13
- CHAT_HISTORY = List[Optional[Dict[str, Optional[str]]]]
14
- MODEL_DICT = Dict[str, Llama]
15
-
16
-
17
- # ================== FUNCS =============================
18
-
19
- def download_file(file_url: str, file_path: Union[str, Path]) -> None:
20
- response = requests.get(file_url, stream=True)
21
- if response.status_code != 200:
22
- raise Exception(f'Файл недоступен для скачивания по ссылке: {file_url}')
23
- total_size = int(response.headers.get('content-length', 0))
24
- progress_tqdm = tqdm(desc='Loading GGUF file', total=total_size, unit='iB', unit_scale=True)
25
- progress_gradio = gr.Progress()
26
- completed_size = 0
27
- with open(file_path, 'wb') as file:
28
- for data in response.iter_content(chunk_size=4096):
29
- size = file.write(data)
30
- progress_tqdm.update(size)
31
- completed_size += size
32
- desc = f'Loading GGUF file, {completed_size/1024**3:.3f}/{total_size/1024**3:.3f} GB'
33
- progress_gradio(completed_size/total_size, desc=desc)
34
-
35
-
36
- def download_gguf_and_init_model(gguf_url: str, model_dict: MODEL_DICT) -> Tuple[MODEL_DICT, bool, str]:
37
- log = ''
38
- if not gguf_url.endswith('.gguf'):
39
- log += f'The link must be a direct link to the GGUF file\n'
40
- return model_dict, log
41
-
42
- gguf_filename = gguf_url.rsplit('/')[-1]
43
- model_path = MODELS_PATH / gguf_filename
44
- progress = gr.Progress()
45
-
46
- if not model_path.is_file():
47
- progress(0.3, desc='Шаг 1/2: Loading GGUF model file')
48
- try:
49
- download_file(gguf_url, model_path)
50
- log += f'Model file {gguf_filename} successfully loaded\n'
51
- except Exception as ex:
52
- log += f'Error loading model from link {gguf_url}, error code:\n{ex}\n'
53
- curr_model = model_dict.get('model')
54
- if curr_model is None:
55
- log += f'Model is missing from dictionary "model_dict"\n'
56
- return model_dict, load_log
57
- curr_model_filename = Path(curr_model.model_path).name
58
- log += f'Current initialized model: {curr_model_filename}\n'
59
- return model_dict, log
60
- else:
61
- log += f'Model file {gguf_filename} loaded, initializing model...\n'
62
-
63
- progress(0.7, desc='Шаг 2/2: Model initialization')
64
- model = Llama(model_path=str(model_path), n_gpu_layers=-1, verbose=True)
65
- model_dict = {'model': model}
66
- support_system_role = 'System role not supported' not in model.metadata['tokenizer.chat_template']
67
- log += f'Model {gguf_filename} initialized\n'
68
- return model_dict, support_system_role, log
69
-
70
-
71
- def user_message_to_chatbot(user_message: str, chatbot: CHAT_HISTORY) -> Tuple[str, CHAT_HISTORY]:
72
- if user_message:
73
- chatbot.append({'role': 'user', 'metadata': {'title': None}, 'content': user_message})
74
- return '', chatbot
75
-
76
-
77
- def bot_response_to_chatbot(
78
- chatbot: CHAT_HISTORY,
79
- model_dict: MODEL_DICT,
80
- system_prompt: str,
81
- support_system_role: bool,
82
- history_len: int,
83
- do_sample: bool,
84
- *generate_args,
85
- ):
86
-
87
- model = model_dict.get('model')
88
- if model is None:
89
- gr.Info('Model not initialized')
90
- yield chatbot
91
- return
92
-
93
- if len(chatbot) == 0 or chatbot[-1]['role'] == 'assistant':
94
- yield chatbot
95
- return
96
-
97
- messages = []
98
- if support_system_role and system_prompt:
99
- messages.append({'role': 'system', 'metadata': {'title': None}, 'content': system_prompt})
100
-
101
- if history_len != 0:
102
- messages.extend(chatbot[:-1][-(history_len*2):])
103
-
104
- messages.append(chatbot[-1])
105
-
106
- gen_kwargs = dict(zip(GENERATE_KWARGS.keys(), generate_args))
107
- gen_kwargs['top_k'] = int(gen_kwargs['top_k'])
108
- if not do_sample:
109
- gen_kwargs['top_p'] = 0.0
110
- gen_kwargs['top_k'] = 1
111
- gen_kwargs['repeat_penalty'] = 1.0
112
-
113
- stream_response = model.create_chat_completion(
114
- messages=messages,
115
- stream=True,
116
- **gen_kwargs,
117
- )
118
 
119
- chatbot.append({'role': 'assistant', 'metadata': {'title': None}, 'content': ''})
120
- for chunk in stream_response:
121
- token = chunk['choices'][0]['delta'].get('content')
122
- if token is not None:
123
- chatbot[-1]['content'] += token
124
- yield chatbot
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
 
127
  def get_system_prompt_component(interactive: bool) -> gr.Textbox:
@@ -130,32 +66,28 @@ def get_system_prompt_component(interactive: bool) -> gr.Textbox:
130
 
131
 
132
  def get_generate_args(do_sample: bool) -> List[gr.component]:
133
- visible = do_sample
134
  generate_args = [
135
- gr.Slider(label='temperature', value=GENERATE_KWARGS['temperature'], minimum=0.1, maximum=3, step=0.1, visible=visible),
136
- gr.Slider(label='top_p', value=GENERATE_KWARGS['top_p'], minimum=0.1, maximum=1, step=0.1, visible=visible),
137
- gr.Slider(label='top_k', value=GENERATE_KWARGS['top_k'], minimum=1, maximum=50, step=5, visible=visible),
138
- gr.Slider(label='repeat_penalty', value=GENERATE_KWARGS['repeat_penalty'], minimum=1, maximum=5, step=0.1, visible=visible),
139
  ]
140
  return generate_args
141
 
142
 
143
- # ================== VARIABLES =============================
 
 
 
 
 
144
 
145
- MODELS_PATH = Path('models')
146
- MODELS_PATH.mkdir(exist_ok=True)
147
- DEFAULT_GGUF_URL = 'https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q8_0.gguf'
148
 
149
- start_model_dict, start_support_system_role, start_load_log = download_gguf_and_init_model(
150
- gguf_url=DEFAULT_GGUF_URL, model_dict={},
151
- )
152
 
153
- GENERATE_KWARGS = dict(
154
- temperature=0.2,
155
- top_p=0.95,
156
- top_k=40,
157
- repeat_penalty=1.0,
158
- )
159
 
160
  theme = gr.themes.Base(primary_hue='green', secondary_hue='yellow', neutral_hue='zinc').set(
161
  loader_color='rgb(0, 255, 0)',
@@ -165,23 +97,32 @@ theme = gr.themes.Base(primary_hue='green', secondary_hue='yellow', neutral_hue=
165
  )
166
  css = '''.gradio-container {width: 60% !important}'''
167
 
 
168
 
169
- # ================== INTERFACE =============================
170
 
171
- with gr.Blocks(theme=theme, css=css) as interface:
172
- model_dict = gr.State(start_model_dict)
 
173
  support_system_role = gr.State(start_support_system_role)
174
-
175
- # ================= CHAT BOT PAGE ======================
176
- with gr.Tab('Chatbot'):
 
 
 
 
 
 
 
177
  with gr.Row():
178
  with gr.Column(scale=3):
179
  chatbot = gr.Chatbot(
180
  type='messages', # new in gradio 5+
181
- show_copy_button=True,
182
- bubble_full_width=False,
183
  height=480,
184
- )
185
  user_message = gr.Textbox(label='User')
186
 
187
  with gr.Row():
@@ -189,14 +130,14 @@ with gr.Blocks(theme=theme, css=css) as interface:
189
  stop_btn = gr.Button('Stop')
190
  clear_btn = gr.Button('Clear')
191
 
192
- system_prompt = get_system_prompt_component(interactive=support_system_role.value)
193
 
194
  with gr.Column(scale=1, min_width=80):
195
  with gr.Group():
196
- gr.Markdown('Length of message history')
197
  history_len = gr.Slider(
198
  minimum=0,
199
- maximum=10,
200
  value=0,
201
  step=1,
202
  info='Number of previous messages taken into account in history',
@@ -217,56 +158,243 @@ with gr.Blocks(theme=theme, css=css) as interface:
217
  inputs=do_sample,
218
  outputs=generate_args,
219
  show_progress=False,
220
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
  generate_event = gr.on(
223
  triggers=[user_message.submit, user_message_btn.click],
224
  fn=user_message_to_chatbot,
225
  inputs=[user_message, chatbot],
226
  outputs=[user_message, chatbot],
 
 
 
 
 
 
 
 
 
227
  ).then(
228
- fn=bot_response_to_chatbot,
229
- inputs=[chatbot, model_dict, system_prompt, support_system_role, history_len, do_sample, *generate_args],
 
230
  outputs=[chatbot],
231
  )
 
232
  stop_btn.click(
233
  fn=None,
234
  inputs=None,
235
  outputs=None,
236
  cancels=generate_event,
 
237
  )
 
238
  clear_btn.click(
239
- fn=lambda: None,
240
  inputs=None,
241
- outputs=[chatbot],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  )
 
 
 
 
 
 
 
243
 
244
- # ================= LOAD MODELS PAGE ======================
245
- with gr.Tab('Load model'):
246
- gguf_url = gr.Textbox(
 
247
  value='',
248
- label='Link to GGUF',
249
- placeholder='URL link to the model in GGUF format',
 
 
 
 
 
 
 
 
 
 
 
250
  )
251
- load_model_btn = gr.Button('Downloading GGUF and initializing the model')
252
- load_log = gr.Textbox(
253
- value=start_load_log,
254
  label='Model loading status',
255
- lines=3,
256
  )
257
-
258
- load_model_btn.click(
259
- fn=download_gguf_and_init_model,
260
- inputs=[gguf_url, model_dict],
261
- outputs=[model_dict, support_system_role, load_log],
 
 
 
 
262
  ).success(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  fn=get_system_prompt_component,
264
  inputs=[support_system_role],
265
  outputs=[system_prompt],
266
  )
267
 
268
- gr.HTML("""<h3 style='text-align: center'>
269
- <a href="https://github.com/sergey21000/gradio-llamacpp-chatbot" target='_blank'>GitHub Repository</a></h3>
270
- """)
271
-
272
- interface.launch(server_name='0.0.0.0', server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
 
 
 
2
 
 
3
  import gradio as gr
4
+ from langchain_core.vectorstores import VectorStore
5
+
6
+ from config import (
7
+ LLM_MODEL_REPOS,
8
+ EMBED_MODEL_REPOS,
9
+ SUBTITLES_LANGUAGES,
10
+ GENERATE_KWARGS,
11
+ )
12
+
13
+ from utils import (
14
+ load_llm_model,
15
+ load_embed_model,
16
+ load_documents_and_create_db,
17
+ user_message_to_chatbot,
18
+ update_user_message_with_context,
19
+ get_llm_response,
20
+ get_gguf_model_names,
21
+ add_new_model_repo,
22
+ clear_llm_folder,
23
+ clear_embed_folder,
24
+ get_memory_usage,
25
+ )
26
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # ============ INTERFACE COMPONENT INITIALIZATION FUNCS ============
29
+
30
+ def get_rag_settings(rag_mode: bool, render: bool = True):
31
+ k = gr.Radio(
32
+ choices=[1, 2, 3, 4, 5, 'all'],
33
+ value=2,
34
+ label='Number of relevant documents for search',
35
+ visible=rag_mode,
36
+ render=render,
37
+ )
38
+ score_threshold = gr.Slider(
39
+ minimum=0,
40
+ maximum=1,
41
+ value=0.5,
42
+ step=0.05,
43
+ label='relevance_scores_threshold',
44
+ visible=rag_mode,
45
+ render=render,
46
+ )
47
+ return k, score_threshold
48
+
49
+
50
+ def get_user_message_with_context(text: str, rag_mode: bool) -> gr.component:
51
+ num_lines = len(text.split('\n'))
52
+ max_lines = 10
53
+ num_lines = max_lines if num_lines > max_lines else num_lines
54
+ return gr.Textbox(
55
+ text,
56
+ visible=rag_mode,
57
+ interactive=False,
58
+ label='User Message With Context',
59
+ lines=num_lines,
60
+ )
61
 
62
 
63
  def get_system_prompt_component(interactive: bool) -> gr.Textbox:
 
66
 
67
 
68
  def get_generate_args(do_sample: bool) -> List[gr.component]:
 
69
  generate_args = [
70
+ gr.Slider(minimum=0.1, maximum=3, value=GENERATE_KWARGS['temperature'], step=0.1, label='temperature', visible=do_sample),
71
+ gr.Slider(minimum=0.1, maximum=1, value=GENERATE_KWARGS['top_p'], step=0.01, label='top_p', visible=do_sample),
72
+ gr.Slider(minimum=1, maximum=50, value=GENERATE_KWARGS['top_k'], step=1, label='top_k', visible=do_sample),
73
+ gr.Slider(minimum=1, maximum=5, value=GENERATE_KWARGS['repeat_penalty'], step=0.1, label='repeat_penalty', visible=do_sample),
74
  ]
75
  return generate_args
76
 
77
 
78
+ def get_rag_mode_component(db: Optional[VectorStore]) -> gr.Checkbox:
79
+ value = visible = db is not None
80
+ return gr.Checkbox(value=value, label='RAG Mode', scale=1, visible=visible)
81
+
82
+
83
+ # ================ LOADING AND INITIALIZING MODELS ========================
84
 
85
+ start_llm_model, start_support_system_role, load_log = load_llm_model(LLM_MODEL_REPOS[0], 'gemma-2-2b-it-Q8_0.gguf')
86
+ start_embed_model, load_log = load_embed_model(EMBED_MODEL_REPOS[0])
 
87
 
 
 
 
88
 
89
+
90
+ # ================== APPLICATION WEB INTERFACE ============================
 
 
 
 
91
 
92
  theme = gr.themes.Base(primary_hue='green', secondary_hue='yellow', neutral_hue='zinc').set(
93
  loader_color='rgb(0, 255, 0)',
 
97
  )
98
  css = '''.gradio-container {width: 60% !important}'''
99
 
100
+ with gr.Blocks(theme=theme, css=css) as interface:
101
 
102
+ # ==================== GRADIO STATES ===============================
103
 
104
+ documents = gr.State([])
105
+ db = gr.State(None)
106
+ user_message_with_context = gr.State('')
107
  support_system_role = gr.State(start_support_system_role)
108
+ llm_model_repos = gr.State(LLM_MODEL_REPOS)
109
+ embed_model_repos = gr.State(EMBED_MODEL_REPOS)
110
+ llm_model = gr.State(start_llm_model)
111
+ embed_model = gr.State(start_embed_model)
112
+
113
+
114
+
115
+ # ==================== BOT PAGE =================================
116
+
117
+ with gr.Tab(label='Chatbot'):
118
  with gr.Row():
119
  with gr.Column(scale=3):
120
  chatbot = gr.Chatbot(
121
  type='messages', # new in gradio 5+
122
+ show_copy_button=True,
123
+ bubble_full_width=False,
124
  height=480,
125
+ )
126
  user_message = gr.Textbox(label='User')
127
 
128
  with gr.Row():
 
130
  stop_btn = gr.Button('Stop')
131
  clear_btn = gr.Button('Clear')
132
 
133
+ # ------------- GENERATION PARAMETERS -------------------
134
 
135
  with gr.Column(scale=1, min_width=80):
136
  with gr.Group():
137
+ gr.Markdown('History size')
138
  history_len = gr.Slider(
139
  minimum=0,
140
+ maximum=5,
141
  value=0,
142
  step=1,
143
  info='Number of previous messages taken into account in history',
 
158
  inputs=do_sample,
159
  outputs=generate_args,
160
  show_progress=False,
161
+ )
162
+
163
+ rag_mode = get_rag_mode_component(db=db.value)
164
+ k, score_threshold = get_rag_settings(rag_mode=rag_mode.value, render=False)
165
+ rag_mode.change(
166
+ fn=get_rag_settings,
167
+ inputs=[rag_mode],
168
+ outputs=[k, score_threshold],
169
+ )
170
+ with gr.Row():
171
+ k.render()
172
+ score_threshold.render()
173
+
174
+ # ---------------- SYSTEM PROMPT AND USER MESSAGE -----------
175
+
176
+ with gr.Accordion('Prompt', open=True):
177
+ system_prompt = get_system_prompt_component(interactive=support_system_role.value)
178
+ user_message_with_context = get_user_message_with_context(text='', rag_mode=rag_mode.value)
179
+
180
+ # ---------------- SEND, CLEAR AND STOP BUTTONS ------------
181
 
182
  generate_event = gr.on(
183
  triggers=[user_message.submit, user_message_btn.click],
184
  fn=user_message_to_chatbot,
185
  inputs=[user_message, chatbot],
186
  outputs=[user_message, chatbot],
187
+ queue=False,
188
+ ).then(
189
+ fn=update_user_message_with_context,
190
+ inputs=[chatbot, rag_mode, db, k, score_threshold],
191
+ outputs=[user_message_with_context],
192
+ ).then(
193
+ fn=get_user_message_with_context,
194
+ inputs=[user_message_with_context, rag_mode],
195
+ outputs=[user_message_with_context],
196
  ).then(
197
+ fn=get_llm_response,
198
+ inputs=[chatbot, llm_model, user_message_with_context, rag_mode, system_prompt,
199
+ support_system_role, history_len, do_sample, *generate_args],
200
  outputs=[chatbot],
201
  )
202
+
203
  stop_btn.click(
204
  fn=None,
205
  inputs=None,
206
  outputs=None,
207
  cancels=generate_event,
208
+ queue=False,
209
  )
210
+
211
  clear_btn.click(
212
+ fn=lambda: (None, ''),
213
  inputs=None,
214
+ outputs=[chatbot, user_message_with_context],
215
+ queue=False,
216
+ )
217
+
218
+
219
+
220
+ # ================= FILE DOWNLOAD PAGE =========================
221
+
222
+ with gr.Tab(label='Load documents'):
223
+ with gr.Row(variant='compact'):
224
+ upload_files = gr.File(file_count='multiple', label='Loading text files')
225
+ web_links = gr.Textbox(lines=6, label='Links to Web sites or YouTube')
226
+
227
+ with gr.Row(variant='compact'):
228
+ chunk_size = gr.Slider(50, 2000, value=500, step=50, label='Chunk size')
229
+ chunk_overlap = gr.Slider(0, 200, value=20, step=10, label='Chunk overlap')
230
+
231
+ subtitles_lang = gr.Radio(
232
+ SUBTITLES_LANGUAGES,
233
+ value=SUBTITLES_LANGUAGES[0],
234
+ label='YouTube subtitle language',
235
+ )
236
+
237
+ load_documents_btn = gr.Button(value='Upload documents and initialize database')
238
+ load_docs_log = gr.Textbox(label='Status of loading and splitting documents', interactive=False)
239
+
240
+ load_documents_btn.click(
241
+ fn=load_documents_and_create_db,
242
+ inputs=[upload_files, web_links, subtitles_lang, chunk_size, chunk_overlap, embed_model],
243
+ outputs=[documents, db, load_docs_log],
244
+ ).success(
245
+ fn=get_rag_mode_component,
246
+ inputs=[db],
247
+ outputs=[rag_mode],
248
+ )
249
+
250
+ gr.HTML("""<h3 style='text-align: center'>
251
+ <a href="https://github.com/sergey21000/chatbot-rag" target='_blank'>GitHub Repository</a></h3>
252
+ """)
253
+
254
+
255
+
256
+ # ================= VIEW PAGE FOR ALL DOCUMENTS =================
257
+
258
+ with gr.Tab(label='View documents'):
259
+ view_documents_btn = gr.Button(value='Show downloaded text chunks')
260
+ view_documents_textbox = gr.Textbox(
261
+ lines=1,
262
+ placeholder='To view chunks, load documents in the Load documents tab',
263
+ label='Uploaded chunks',
264
  )
265
+ sep = '=' * 20
266
+ view_documents_btn.click(
267
+ lambda documents: f'\n{sep}\n\n'.join([doc.page_content for doc in documents]),
268
+ inputs=[documents],
269
+ outputs=[view_documents_textbox],
270
+ )
271
+
272
 
273
+ # ============== GGUF MODELS DOWNLOAD PAGE =====================
274
+
275
+ with gr.Tab('Load LLM model'):
276
+ new_llm_model_repo = gr.Textbox(
277
  value='',
278
+ label='Add repository',
279
+ placeholder='Link to repository of HF models in GGUF format',
280
+ )
281
+ new_llm_model_repo_btn = gr.Button('Add repository')
282
+ curr_llm_model_repo = gr.Dropdown(
283
+ choices=LLM_MODEL_REPOS,
284
+ value=None,
285
+ label='HF Model Repository',
286
+ )
287
+ curr_llm_model_path = gr.Dropdown(
288
+ choices=[],
289
+ value=None,
290
+ label='GGUF model file',
291
  )
292
+ load_llm_model_btn = gr.Button('Loading and initializing model')
293
+ load_llm_model_log = gr.Textbox(
294
+ value=f'Model {LLM_MODEL_REPOS[0]} loaded at application startup',
295
  label='Model loading status',
296
+ lines=6,
297
  )
298
+
299
+ with gr.Group():
300
+ gr.Markdown('Free up disk space by deleting all models except the currently selected one')
301
+ clear_llm_folder_btn = gr.Button('Clear folder')
302
+
303
+ new_llm_model_repo_btn.click(
304
+ fn=add_new_model_repo,
305
+ inputs=[new_llm_model_repo, llm_model_repos],
306
+ outputs=[curr_llm_model_repo, load_llm_model_log],
307
  ).success(
308
+ fn=lambda: '',
309
+ inputs=None,
310
+ outputs=[new_llm_model_repo],
311
+ )
312
+
313
+ curr_llm_model_repo.change(
314
+ fn=get_gguf_model_names,
315
+ inputs=[curr_llm_model_repo],
316
+ outputs=[curr_llm_model_path],
317
+ )
318
+
319
+ load_llm_model_btn.click(
320
+ fn=load_llm_model,
321
+ inputs=[curr_llm_model_repo, curr_llm_model_path],
322
+ outputs=[llm_model, support_system_role, load_llm_model_log],
323
+ ).success(
324
+ fn=lambda log: log + get_memory_usage(),
325
+ inputs=[load_llm_model_log],
326
+ outputs=[load_llm_model_log],
327
+ ).then(
328
  fn=get_system_prompt_component,
329
  inputs=[support_system_role],
330
  outputs=[system_prompt],
331
  )
332
 
333
+ clear_llm_folder_btn.click(
334
+ fn=clear_llm_folder,
335
+ inputs=[curr_llm_model_path],
336
+ outputs=None,
337
+ ).success(
338
+ fn=lambda model_path: f'Models other than {model_path} removed',
339
+ inputs=[curr_llm_model_path],
340
+ outputs=None,
341
+ )
342
+
343
+
344
+ # ============== EMBEDDING MODELS DOWNLOAD PAGE =============
345
+
346
+ with gr.Tab('Load embed model'):
347
+ new_embed_model_repo = gr.Textbox(
348
+ value='',
349
+ label='Add repository',
350
+ placeholder='Link to HF model repository',
351
+ )
352
+ new_embed_model_repo_btn = gr.Button('Add repository')
353
+ curr_embed_model_repo = gr.Dropdown(
354
+ choices=EMBED_MODEL_REPOS,
355
+ value=None,
356
+ label='HF model repository',
357
+ )
358
+
359
+ load_embed_model_btn = gr.Button('Loading and initializing model')
360
+ load_embed_model_log = gr.Textbox(
361
+ value=f'Model {EMBED_MODEL_REPOS[0]} loaded at application startup',
362
+ label='Model loading status',
363
+ lines=7,
364
+ )
365
+ with gr.Group():
366
+ gr.Markdown('Free up disk space by deleting all models except the currently selected one')
367
+ clear_embed_folder_btn = gr.Button('Clear folder')
368
+
369
+ new_embed_model_repo_btn.click(
370
+ fn=add_new_model_repo,
371
+ inputs=[new_embed_model_repo, embed_model_repos],
372
+ outputs=[curr_embed_model_repo, load_embed_model_log],
373
+ ).success(
374
+ fn=lambda: '',
375
+ inputs=None,
376
+ outputs=new_embed_model_repo,
377
+ )
378
+
379
+ load_embed_model_btn.click(
380
+ fn=load_embed_model,
381
+ inputs=[curr_embed_model_repo],
382
+ outputs=[embed_model, load_embed_model_log],
383
+ ).success(
384
+ fn=lambda log: log + get_memory_usage(),
385
+ inputs=[load_embed_model_log],
386
+ outputs=[load_embed_model_log],
387
+ )
388
+
389
+ clear_embed_folder_btn.click(
390
+ fn=clear_embed_folder,
391
+ inputs=[curr_embed_model_repo],
392
+ outputs=None,
393
+ ).success(
394
+ fn=lambda model_repo: f'Models other than {model_repo} removed',
395
+ inputs=[curr_embed_model_repo],
396
+ outputs=None,
397
+ )
398
+
399
+
400
+ interface.launch(server_name='0.0.0.0', server_port=7860) # debug=True