Spaces:

sergey21000
/

gradio-llamacpp-chatbot

Sleeping

App Files Files Community

sergey21000 commited on Oct 12, 2024

Commit

62283cf

verified ·

1 Parent(s): 972b192

Update app.py

Browse files

Files changed (1) hide show

app.py +303 -175

app.py CHANGED Viewed

@@ -1,127 +1,63 @@
-from pathlib import Path
-from shutil import rmtree
-from typing import Union, List, Dict, Tuple, Optional
-from tqdm import tqdm
-import requests
 import gradio as gr
-from llama_cpp import Llama
-# ================== ANNOTATIONS ========================
-CHAT_HISTORY = List[Optional[Dict[str, Optional[str]]]]
-MODEL_DICT = Dict[str, Llama]
-# ================== FUNCS =============================
-def download_file(file_url: str, file_path: Union[str, Path]) -> None:
-    response = requests.get(file_url, stream=True)
-    if response.status_code != 200:
-        raise Exception(f'Файл недоступен для скачивания по ссылке: {file_url}')
-    total_size = int(response.headers.get('content-length', 0))
-    progress_tqdm = tqdm(desc='Loading GGUF file', total=total_size, unit='iB', unit_scale=True)
-    progress_gradio = gr.Progress()
-    completed_size = 0
-    with open(file_path, 'wb') as file:
-        for data in response.iter_content(chunk_size=4096):
-            size = file.write(data)
-            progress_tqdm.update(size)
-            completed_size += size
-            desc = f'Loading GGUF file, {completed_size/1024**3:.3f}/{total_size/1024**3:.3f} GB'
-            progress_gradio(completed_size/total_size, desc=desc)
-def download_gguf_and_init_model(gguf_url: str, model_dict: MODEL_DICT) -> Tuple[MODEL_DICT, bool, str]:
-    log = ''
-    if not gguf_url.endswith('.gguf'):
-        log += f'The link must be a direct link to the GGUF file\n'
-        return model_dict, log
-    gguf_filename = gguf_url.rsplit('/')[-1]
-    model_path = MODELS_PATH / gguf_filename
-    progress = gr.Progress()
-    if not model_path.is_file():
-        progress(0.3, desc='Шаг 1/2: Loading GGUF model file')
-        try:
-            download_file(gguf_url, model_path)
-            log += f'Model file {gguf_filename} successfully loaded\n'
-        except Exception as ex:
-            log += f'Error loading model from link {gguf_url}, error code:\n{ex}\n'
-            curr_model = model_dict.get('model')
-            if curr_model is None:
-                log += f'Model is missing from dictionary "model_dict"\n'
-                return model_dict, load_log
-            curr_model_filename = Path(curr_model.model_path).name
-            log += f'Current initialized model: {curr_model_filename}\n'
-            return model_dict, log
-    else:
-        log += f'Model file {gguf_filename} loaded, initializing model...\n'
-    progress(0.7, desc='Шаг 2/2: Model initialization')
-    model = Llama(model_path=str(model_path), n_gpu_layers=-1, verbose=True)
-    model_dict = {'model': model}
-    support_system_role = 'System role not supported' not in model.metadata['tokenizer.chat_template']
-    log += f'Model {gguf_filename} initialized\n'
-    return model_dict, support_system_role, log
-def user_message_to_chatbot(user_message: str, chatbot: CHAT_HISTORY) -> Tuple[str, CHAT_HISTORY]:
-    if user_message:
-        chatbot.append({'role': 'user', 'metadata': {'title': None}, 'content': user_message})
-    return '', chatbot
-def bot_response_to_chatbot(
-        chatbot: CHAT_HISTORY,
-        model_dict: MODEL_DICT,
-        system_prompt: str,
-        support_system_role: bool,
-        history_len: int,
-        do_sample: bool,
-        *generate_args,
-        ):
-    model = model_dict.get('model')
-    if model is None:
-        gr.Info('Model not initialized')
-        yield chatbot
-        return
-    if len(chatbot) == 0 or chatbot[-1]['role'] == 'assistant':
-        yield chatbot
-        return
-    messages = []
-    if support_system_role and system_prompt:
-        messages.append({'role': 'system', 'metadata': {'title': None}, 'content': system_prompt})
-    if history_len != 0:
-        messages.extend(chatbot[:-1][-(history_len*2):])
-    messages.append(chatbot[-1])
-    gen_kwargs = dict(zip(GENERATE_KWARGS.keys(), generate_args))
-    gen_kwargs['top_k'] = int(gen_kwargs['top_k'])
-    if not do_sample:
-        gen_kwargs['top_p'] = 0.0
-        gen_kwargs['top_k'] = 1
-        gen_kwargs['repeat_penalty'] = 1.0
-    stream_response = model.create_chat_completion(
-        messages=messages,
-        stream=True,
-        **gen_kwargs,
-        )
-    chatbot.append({'role': 'assistant', 'metadata': {'title': None}, 'content': ''})
-    for chunk in stream_response:
-        token = chunk['choices'][0]['delta'].get('content')
-        if token is not None:
-            chatbot[-1]['content'] += token
-            yield chatbot
 def get_system_prompt_component(interactive: bool) -> gr.Textbox:
@@ -130,32 +66,28 @@ def get_system_prompt_component(interactive: bool) -> gr.Textbox:
 def get_generate_args(do_sample: bool) -> List[gr.component]:
-    visible = do_sample
     generate_args = [
-        gr.Slider(label='temperature', value=GENERATE_KWARGS['temperature'], minimum=0.1, maximum=3, step=0.1, visible=visible),
-        gr.Slider(label='top_p', value=GENERATE_KWARGS['top_p'], minimum=0.1, maximum=1, step=0.1, visible=visible),
-        gr.Slider(label='top_k', value=GENERATE_KWARGS['top_k'], minimum=1, maximum=50, step=5, visible=visible),
-        gr.Slider(label='repeat_penalty', value=GENERATE_KWARGS['repeat_penalty'], minimum=1, maximum=5, step=0.1, visible=visible),
     ]
     return generate_args
-# ================== VARIABLES =============================
-MODELS_PATH = Path('models')
-MODELS_PATH.mkdir(exist_ok=True)
-DEFAULT_GGUF_URL = 'https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q8_0.gguf'
-start_model_dict, start_support_system_role, start_load_log = download_gguf_and_init_model(
-    gguf_url=DEFAULT_GGUF_URL, model_dict={},
-    )
-GENERATE_KWARGS = dict(
-    temperature=0.2,
-    top_p=0.95,
-    top_k=40,
-    repeat_penalty=1.0,
-    )
 theme = gr.themes.Base(primary_hue='green', secondary_hue='yellow', neutral_hue='zinc').set(
     loader_color='rgb(0, 255, 0)',
@@ -165,23 +97,32 @@ theme = gr.themes.Base(primary_hue='green', secondary_hue='yellow', neutral_hue=
 )
 css = '''.gradio-container {width: 60% !important}'''
-# ================== INTERFACE =============================
-with gr.Blocks(theme=theme, css=css) as interface:
-    model_dict = gr.State(start_model_dict)
     support_system_role = gr.State(start_support_system_role)
-    # ================= CHAT BOT PAGE ======================
-    with gr.Tab('Chatbot'):
         with gr.Row():
             with gr.Column(scale=3):
                 chatbot = gr.Chatbot(
                     type='messages',  # new in gradio 5+
-                    show_copy_button=True,
-                    bubble_full_width=False,
                     height=480,
-                    )
                 user_message = gr.Textbox(label='User')
                 with gr.Row():
@@ -189,14 +130,14 @@ with gr.Blocks(theme=theme, css=css) as interface:
                     stop_btn = gr.Button('Stop')
                     clear_btn = gr.Button('Clear')
-                system_prompt = get_system_prompt_component(interactive=support_system_role.value)
             with gr.Column(scale=1, min_width=80):
                 with gr.Group():
-                    gr.Markdown('Length of message history')
                     history_len = gr.Slider(
                         minimum=0,
-                        maximum=10,
                         value=0,
                         step=1,
                         info='Number of previous messages taken into account in history',
@@ -217,56 +158,243 @@ with gr.Blocks(theme=theme, css=css) as interface:
                             inputs=do_sample,
                             outputs=generate_args,
                             show_progress=False,
-                        )
         generate_event = gr.on(
             triggers=[user_message.submit, user_message_btn.click],
             fn=user_message_to_chatbot,
             inputs=[user_message, chatbot],
             outputs=[user_message, chatbot],
         ).then(
-            fn=bot_response_to_chatbot,
-            inputs=[chatbot, model_dict, system_prompt, support_system_role, history_len, do_sample, *generate_args],
             outputs=[chatbot],
         )
         stop_btn.click(
             fn=None,
             inputs=None,
             outputs=None,
             cancels=generate_event,
         )
         clear_btn.click(
-            fn=lambda: None,
             inputs=None,
-            outputs=[chatbot],
             )
-    # ================= LOAD MODELS PAGE ======================
-    with gr.Tab('Load model'):
-        gguf_url = gr.Textbox(
             value='',
-            label='Link to GGUF',
-            placeholder='URL link to the model in GGUF format',
             )
-        load_model_btn = gr.Button('Downloading GGUF and initializing the model')
-        load_log = gr.Textbox(
-            value=start_load_log,
             label='Model loading status',
-            lines=3,
             )
-        load_model_btn.click(
-            fn=download_gguf_and_init_model,
-            inputs=[gguf_url, model_dict],
-            outputs=[model_dict, support_system_role, load_log],
         ).success(
             fn=get_system_prompt_component,
             inputs=[support_system_role],
             outputs=[system_prompt],
         )
-        gr.HTML("""<h3 style='text-align: center'>
-        <a href="https://github.com/sergey21000/gradio-llamacpp-chatbot" target='_blank'>GitHub Repository</a></h3>
-        """)
-interface.launch(server_name='0.0.0.0', server_port=7860)

+from typing import List, Optional
 import gradio as gr
+from langchain_core.vectorstores import VectorStore
+from config import (
+    LLM_MODEL_REPOS,
+    EMBED_MODEL_REPOS,
+    SUBTITLES_LANGUAGES,
+    GENERATE_KWARGS,
+)
+from utils import (
+    load_llm_model,
+    load_embed_model,
+    load_documents_and_create_db,
+    user_message_to_chatbot,
+    update_user_message_with_context,
+    get_llm_response,
+    get_gguf_model_names,
+    add_new_model_repo,
+    clear_llm_folder,
+    clear_embed_folder,
+    get_memory_usage,
+)
+# ============ INTERFACE COMPONENT INITIALIZATION FUNCS ============
+def get_rag_settings(rag_mode: bool, render: bool = True):
+    k = gr.Radio(
+        choices=[1, 2, 3, 4, 5, 'all'],
+        value=2,
+        label='Number of relevant documents for search',
+        visible=rag_mode,
+        render=render,
+        )
+    score_threshold = gr.Slider(
+        minimum=0,
+        maximum=1,
+        value=0.5,
+        step=0.05,
+        label='relevance_scores_threshold',
+        visible=rag_mode,
+        render=render,
+        )
+    return k, score_threshold
+def get_user_message_with_context(text: str, rag_mode: bool) -> gr.component:
+    num_lines = len(text.split('\n'))
+    max_lines = 10
+    num_lines = max_lines if num_lines > max_lines else num_lines
+    return gr.Textbox(
+        text,
+        visible=rag_mode,
+        interactive=False,
+        label='User Message With Context',
+        lines=num_lines,
+        )
 def get_system_prompt_component(interactive: bool) -> gr.Textbox:
 def get_generate_args(do_sample: bool) -> List[gr.component]:
     generate_args = [
+        gr.Slider(minimum=0.1, maximum=3, value=GENERATE_KWARGS['temperature'], step=0.1, label='temperature', visible=do_sample),
+        gr.Slider(minimum=0.1, maximum=1, value=GENERATE_KWARGS['top_p'], step=0.01, label='top_p', visible=do_sample),
+        gr.Slider(minimum=1, maximum=50, value=GENERATE_KWARGS['top_k'], step=1, label='top_k', visible=do_sample),
+        gr.Slider(minimum=1, maximum=5, value=GENERATE_KWARGS['repeat_penalty'], step=0.1, label='repeat_penalty', visible=do_sample),
     ]
     return generate_args
+def get_rag_mode_component(db: Optional[VectorStore]) -> gr.Checkbox:
+    value = visible = db is not None
+    return gr.Checkbox(value=value, label='RAG Mode', scale=1, visible=visible)
+# ================ LOADING AND INITIALIZING MODELS ========================
+start_llm_model, start_support_system_role, load_log = load_llm_model(LLM_MODEL_REPOS[0], 'gemma-2-2b-it-Q8_0.gguf')
+start_embed_model, load_log = load_embed_model(EMBED_MODEL_REPOS[0])
+# ================== APPLICATION WEB INTERFACE ============================
 theme = gr.themes.Base(primary_hue='green', secondary_hue='yellow', neutral_hue='zinc').set(
     loader_color='rgb(0, 255, 0)',
 )
 css = '''.gradio-container {width: 60% !important}'''
+with gr.Blocks(theme=theme, css=css) as interface:
+    # ==================== GRADIO STATES ===============================
+    documents = gr.State([])
+    db = gr.State(None)
+    user_message_with_context = gr.State('')
     support_system_role = gr.State(start_support_system_role)
+    llm_model_repos = gr.State(LLM_MODEL_REPOS)
+    embed_model_repos = gr.State(EMBED_MODEL_REPOS)
+    llm_model = gr.State(start_llm_model)
+    embed_model = gr.State(start_embed_model)
+    # ==================== BOT PAGE =================================
+    with gr.Tab(label='Chatbot'):
         with gr.Row():
             with gr.Column(scale=3):
                 chatbot = gr.Chatbot(
                     type='messages',  # new in gradio 5+
+                    show_copy_button=True,
+                    bubble_full_width=False,
                     height=480,
+                )
                 user_message = gr.Textbox(label='User')
                 with gr.Row():
                     stop_btn = gr.Button('Stop')
                     clear_btn = gr.Button('Clear')
+            # ------------- GENERATION PARAMETERS -------------------
             with gr.Column(scale=1, min_width=80):
                 with gr.Group():
+                    gr.Markdown('History size')
                     history_len = gr.Slider(
                         minimum=0,
+                        maximum=5,
                         value=0,
                         step=1,
                         info='Number of previous messages taken into account in history',
                             inputs=do_sample,
                             outputs=generate_args,
                             show_progress=False,
+                            )
+        rag_mode = get_rag_mode_component(db=db.value)
+        k, score_threshold = get_rag_settings(rag_mode=rag_mode.value, render=False)
+        rag_mode.change(
+            fn=get_rag_settings,
+            inputs=[rag_mode],
+            outputs=[k, score_threshold],
+            )
+        with gr.Row():
+            k.render()
+            score_threshold.render()
+        # ---------------- SYSTEM PROMPT AND USER MESSAGE -----------
+        with gr.Accordion('Prompt', open=True):
+            system_prompt = get_system_prompt_component(interactive=support_system_role.value)
+            user_message_with_context = get_user_message_with_context(text='', rag_mode=rag_mode.value)
+        # ---------------- SEND, CLEAR AND STOP BUTTONS ------------
         generate_event = gr.on(
             triggers=[user_message.submit, user_message_btn.click],
             fn=user_message_to_chatbot,
             inputs=[user_message, chatbot],
             outputs=[user_message, chatbot],
+            queue=False,
+        ).then(
+            fn=update_user_message_with_context,
+            inputs=[chatbot, rag_mode, db, k, score_threshold],
+            outputs=[user_message_with_context],
+        ).then(
+            fn=get_user_message_with_context,
+            inputs=[user_message_with_context, rag_mode],
+            outputs=[user_message_with_context],
         ).then(
+            fn=get_llm_response,
+            inputs=[chatbot, llm_model, user_message_with_context, rag_mode, system_prompt,
+                    support_system_role, history_len, do_sample, *generate_args],
             outputs=[chatbot],
         )
         stop_btn.click(
             fn=None,
             inputs=None,
             outputs=None,
             cancels=generate_event,
+            queue=False,
         )
         clear_btn.click(
+            fn=lambda: (None, ''),
             inputs=None,
+            outputs=[chatbot, user_message_with_context],
+            queue=False,
+            )
+    # ================= FILE DOWNLOAD PAGE =========================
+    with gr.Tab(label='Load documents'):
+        with gr.Row(variant='compact'):
+            upload_files = gr.File(file_count='multiple', label='Loading text files')
+            web_links = gr.Textbox(lines=6, label='Links to Web sites or YouTube')
+        with gr.Row(variant='compact'):
+            chunk_size = gr.Slider(50, 2000, value=500, step=50, label='Chunk size')
+            chunk_overlap = gr.Slider(0, 200, value=20, step=10, label='Chunk overlap')
+            subtitles_lang = gr.Radio(
+                SUBTITLES_LANGUAGES,
+                value=SUBTITLES_LANGUAGES[0],
+                label='YouTube subtitle language',
+                )
+        load_documents_btn = gr.Button(value='Upload documents and initialize database')
+        load_docs_log = gr.Textbox(label='Status of loading and splitting documents', interactive=False)
+        load_documents_btn.click(
+            fn=load_documents_and_create_db,
+            inputs=[upload_files, web_links, subtitles_lang, chunk_size, chunk_overlap, embed_model],
+            outputs=[documents, db, load_docs_log],
+        ).success(
+            fn=get_rag_mode_component,
+            inputs=[db],
+            outputs=[rag_mode],
+        )
+        gr.HTML("""<h3 style='text-align: center'>
+        <a href="https://github.com/sergey21000/chatbot-rag" target='_blank'>GitHub Repository</a></h3>
+        """)
+    # ================= VIEW PAGE FOR ALL DOCUMENTS =================
+    with gr.Tab(label='View documents'):
+        view_documents_btn = gr.Button(value='Show downloaded text chunks')
+        view_documents_textbox = gr.Textbox(
+            lines=1,
+            placeholder='To view chunks, load documents in the Load documents tab',
+            label='Uploaded chunks',
             )
+        sep = '=' * 20
+        view_documents_btn.click(
+            lambda documents: f'\n{sep}\n\n'.join([doc.page_content for doc in documents]),
+            inputs=[documents],
+            outputs=[view_documents_textbox],
+        )
+    # ============== GGUF MODELS DOWNLOAD PAGE =====================
+    with gr.Tab('Load LLM model'):
+        new_llm_model_repo = gr.Textbox(
             value='',
+            label='Add repository',
+            placeholder='Link to repository of HF models in GGUF format',
+            )
+        new_llm_model_repo_btn = gr.Button('Add repository')
+        curr_llm_model_repo = gr.Dropdown(
+            choices=LLM_MODEL_REPOS,
+            value=None,
+            label='HF Model Repository',
+            )
+        curr_llm_model_path = gr.Dropdown(
+            choices=[],
+            value=None,
+            label='GGUF model file',
             )
+        load_llm_model_btn = gr.Button('Loading and initializing model')
+        load_llm_model_log = gr.Textbox(
+            value=f'Model {LLM_MODEL_REPOS[0]} loaded at application startup',
             label='Model loading status',
+            lines=6,
             )
+        with gr.Group():
+            gr.Markdown('Free up disk space by deleting all models except the currently selected one')
+            clear_llm_folder_btn = gr.Button('Clear folder')
+        new_llm_model_repo_btn.click(
+            fn=add_new_model_repo,
+            inputs=[new_llm_model_repo, llm_model_repos],
+            outputs=[curr_llm_model_repo, load_llm_model_log],
         ).success(
+            fn=lambda: '',
+            inputs=None,
+            outputs=[new_llm_model_repo],
+        )
+        curr_llm_model_repo.change(
+            fn=get_gguf_model_names,
+            inputs=[curr_llm_model_repo],
+            outputs=[curr_llm_model_path],
+        )
+        load_llm_model_btn.click(
+            fn=load_llm_model,
+            inputs=[curr_llm_model_repo, curr_llm_model_path],
+            outputs=[llm_model, support_system_role, load_llm_model_log],
+        ).success(
+            fn=lambda log: log + get_memory_usage(),
+            inputs=[load_llm_model_log],
+            outputs=[load_llm_model_log],
+        ).then(
             fn=get_system_prompt_component,
             inputs=[support_system_role],
             outputs=[system_prompt],
         )
+        clear_llm_folder_btn.click(
+            fn=clear_llm_folder,
+            inputs=[curr_llm_model_path],
+            outputs=None,
+        ).success(
+            fn=lambda model_path: f'Models other than {model_path} removed',
+            inputs=[curr_llm_model_path],
+            outputs=None,
+        )
+    # ============== EMBEDDING MODELS DOWNLOAD PAGE =============
+    with gr.Tab('Load embed model'):
+        new_embed_model_repo = gr.Textbox(
+            value='',
+            label='Add repository',
+            placeholder='Link to HF model repository',
+            )
+        new_embed_model_repo_btn = gr.Button('Add repository')
+        curr_embed_model_repo = gr.Dropdown(
+            choices=EMBED_MODEL_REPOS,
+            value=None,
+            label='HF model repository',
+            )
+        load_embed_model_btn = gr.Button('Loading and initializing model')
+        load_embed_model_log = gr.Textbox(
+            value=f'Model {EMBED_MODEL_REPOS[0]} loaded at application startup',
+            label='Model loading status',
+            lines=7,
+            )
+        with gr.Group():
+            gr.Markdown('Free up disk space by deleting all models except the currently selected one')
+            clear_embed_folder_btn = gr.Button('Clear folder')
+        new_embed_model_repo_btn.click(
+            fn=add_new_model_repo,
+            inputs=[new_embed_model_repo, embed_model_repos],
+            outputs=[curr_embed_model_repo, load_embed_model_log],
+        ).success(
+            fn=lambda: '',
+            inputs=None,
+            outputs=new_embed_model_repo,
+        )
+        load_embed_model_btn.click(
+            fn=load_embed_model,
+            inputs=[curr_embed_model_repo],
+            outputs=[embed_model, load_embed_model_log],
+        ).success(
+            fn=lambda log: log + get_memory_usage(),
+            inputs=[load_embed_model_log],
+            outputs=[load_embed_model_log],
+        )
+        clear_embed_folder_btn.click(
+            fn=clear_embed_folder,
+            inputs=[curr_embed_model_repo],
+            outputs=None,
+        ).success(
+            fn=lambda model_repo: f'Models other than {model_repo} removed',
+            inputs=[curr_embed_model_repo],
+            outputs=None,
+        )
+interface.launch(server_name='0.0.0.0', server_port=7860)  # debug=True