Spaces:

Kukedlc
/

Gemma-2-2B

Runtime error

App Files Files Community

Kukedlc commited on Sep 15, 2024

Commit

25aa566

verified ·

1 Parent(s): bf5203f

Create app.py

Browse files

Files changed (1) hide show

app.py +139 -0

app.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import spaces
+import os
+from llama_cpp import Llama
+from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
+from llama_cpp_agent.providers import LlamaCppPythonProvider
+from llama_cpp_agent.chat_history import BasicChatHistory
+from llama_cpp_agent.chat_history.messages import Roles
+import gradio as gr
+from huggingface_hub import hf_hub_download
+token_huggingface = os.getenv("HUGGINGFACE_TOKEN")
+hf_hub_download(
+    repo_id="google/gemma-2-2b-it-GGUF",
+    filename="2b_it_v2.gguf",
+    local_dir="./modelos",
+    token=token_huggingface
+)
+llm = None
+@spaces.GPU(duration=120)
+def responder(
+    mensaje,
+    historial: list[tuple[str, str]],
+    mensaje_sistema,
+    max_tokens,
+    temperatura,
+    top_p,
+    top_k,
+    penalizacion_repeticion,
+):
+    plantilla_chat = MessagesFormatterType.GEMMA_2
+    global llm
+    if llm is None:
+        llm = Llama(
+            model_path="modelos/2b_it_v2.gguf",
+            flash_attn=True,
+            n_gpu_layers=81,
+            n_batch=1024,
+            n_ctx=8192,
+        )
+    proveedor = LlamaCppPythonProvider(llm)
+    agente = LlamaCppAgent(
+        proveedor,
+        system_prompt=f"{mensaje_sistema}",
+        predefined_messages_formatter_type=plantilla_chat,
+        debug_output=True
+    )
+    configuracion = proveedor.get_provider_default_settings()
+    configuracion.temperature = temperatura
+    configuracion.top_k = top_k
+    configuracion.top_p = top_p
+    configuracion.max_tokens = max_tokens
+    configuracion.repeat_penalty = penalizacion_repeticion
+    configuracion.stream = True
+    mensajes = BasicChatHistory()
+    for msj in historial:
+        usuario = {
+            'role': Roles.user,
+            'content': msj[0]
+        }
+        asistente = {
+            'role': Roles.assistant,
+            'content': msj[1]
+        }
+        mensajes.add_message(usuario)
+        mensajes.add_message(asistente)
+    flujo = agente.get_chat_response(
+        mensaje,
+        llm_sampling_settings=configuracion,
+        chat_history=mensajes,
+        returns_streaming_generator=True,
+        print_output=False
+    )
+    salida = ""
+    for fragmento in flujo:
+        salida += fragmento
+        yield salida
+descripcion = """<p align="center">Chat con Gemma 2B usando llama.cpp</p>
+<p><center>
+<a href="https://huggingface.co/google/gemma-2-2b-it" target="_blank">[Modelo Gemma 2B it]</a>
+<a href="https://huggingface.co/google/gemma-2-2b-it-GGUF" target="_blank">[Modelo Gemma 2B it GGUF]</a>
+</center></p>
+"""
+demo = gr.ChatInterface(
+    responder,
+    additional_inputs=[
+        gr.Textbox(value="Eres un asistente útil.", label="Mensaje del sistema"),
+        gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Tokens máximos"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperatura"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p",
+        ),
+        gr.Slider(
+            minimum=0,
+            maximum=100,
+            value=40,
+            step=1,
+            label="Top-k",
+        ),
+        gr.Slider(
+            minimum=0.0,
+            maximum=2.0,
+            value=1.1,
+            step=0.1,
+            label="Penalización por repetición",
+        ),
+    ],
+    retry_btn="Reintentar",
+    undo_btn="Deshacer",
+    clear_btn="Limpiar",
+    submit_btn="Enviar",
+    title="Chat con Gemma 2B usando llama.cpp",
+    description=descripcion,
+    chatbot=gr.Chatbot(
+        scale=1,
+        likeable=False,
+        show_copy_button=True
+    )
+)
+if __name__ == "__main__":
+    demo.launch()