Llama-3.2-1b-CPU

Running

App Files Files Community

KingNish commited on Sep 26

Commit

66b33d0

•

1 Parent(s): 55b49ed

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -22

app.py CHANGED Viewed

@@ -8,27 +8,18 @@ from llama_cpp_agent.chat_history.messages import Roles
 import gradio as gr
 from huggingface_hub import hf_hub_download
-# Global variables
-MODEL_PATH = "models/llama-3.2-1b-instruct-q4_k_m.gguf"
-CHAT_TEMPLATE = MessagesFormatterType.LLAMA_3
-# Download the model (if not already present)
 hf_hub_download(
     repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
     filename="llama-3.2-1b-instruct-q4_k_m.gguf",
     local_dir="./models"
 )
-# Initialize the model globally
-llm = Llama(
-    model_path=MODEL_PATH,
-    n_gpu_layers=0,
-    n_batch=32000,
-    n_ctx=2048,
-)
-# Initialize the provider globally
-provider = LlamaCppPythonProvider(llm)
 def respond(
     message,
@@ -41,10 +32,26 @@ def respond(
     top_k,
     repeat_penalty,
 ):
     agent = LlamaCppAgent(
         provider,
-        system_prompt=system_message,
-        predefined_messages_formatter_type=CHAT_TEMPLATE,
         debug_output=True
     )
@@ -94,9 +101,10 @@ Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for c
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Dropdown(
-            [MODEL_PATH.split("/")[-1]],
-            value=MODEL_PATH.split("/")[-1],
             label="Model"
         ),
         gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
@@ -112,9 +120,27 @@ Always strive for accuracy, clarity, and helpfulness in your responses. If you'r
 """, label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=2.0, value=0.95, step=0.05, label="Top-p"),
-        gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
-        gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
     ],
     theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
         body_background_fill_dark="#16141c",

 import gradio as gr
 from huggingface_hub import hf_hub_download
+llm = None
+llm_model = None
+# Download the new model
 hf_hub_download(
     repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
     filename="llama-3.2-1b-instruct-q4_k_m.gguf",
     local_dir="./models"
 )
+def get_messages_formatter_type(model_name):
+    return MessagesFormatterType.LLAMA_3
 def respond(
     message,
     top_k,
     repeat_penalty,
 ):
+    global llm
+    global llm_model
+    chat_template = get_messages_formatter_type(model)
+    if llm is None or llm_model != model:
+        llm = Llama(
+            model_path=f"models/{model}",
+            n_gpu_layers=0,
+            n_batch=32000,
+            n_ctx=2048,
+        )
+        llm_model = model
+    provider = LlamaCppPythonProvider(llm)
     agent = LlamaCppAgent(
         provider,
+        system_prompt=f"{system_message}",
+        predefined_messages_formatter_type=chat_template,
         debug_output=True
     )
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Dropdown([
+                "llama-3.2-1b-instruct-q4_k_m.gguf"
+            ],
+            value="llama-3.2-1b-instruct-q4_k_m.gguf",
             label="Model"
         ),
         gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
 """, label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=2.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p",
+        ),
+        gr.Slider(
+            minimum=0,
+            maximum=100,
+            value=40,
+            step=1,
+            label="Top-k",
+        ),
+        gr.Slider(
+            minimum=0.0,
+            maximum=2.0,
+            value=1.1,
+            step=0.1,
+            label="Repetition penalty",
+        ),
     ],
     theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
         body_background_fill_dark="#16141c",