KingNish commited on
Commit
66b33d0
1 Parent(s): 55b49ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -22
app.py CHANGED
@@ -8,27 +8,18 @@ from llama_cpp_agent.chat_history.messages import Roles
8
  import gradio as gr
9
  from huggingface_hub import hf_hub_download
10
 
11
- # Global variables
12
- MODEL_PATH = "models/llama-3.2-1b-instruct-q4_k_m.gguf"
13
- CHAT_TEMPLATE = MessagesFormatterType.LLAMA_3
14
 
15
- # Download the model (if not already present)
16
  hf_hub_download(
17
  repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
18
  filename="llama-3.2-1b-instruct-q4_k_m.gguf",
19
  local_dir="./models"
20
  )
21
 
22
- # Initialize the model globally
23
- llm = Llama(
24
- model_path=MODEL_PATH,
25
- n_gpu_layers=0,
26
- n_batch=32000,
27
- n_ctx=2048,
28
- )
29
-
30
- # Initialize the provider globally
31
- provider = LlamaCppPythonProvider(llm)
32
 
33
  def respond(
34
  message,
@@ -41,10 +32,26 @@ def respond(
41
  top_k,
42
  repeat_penalty,
43
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  agent = LlamaCppAgent(
45
  provider,
46
- system_prompt=system_message,
47
- predefined_messages_formatter_type=CHAT_TEMPLATE,
48
  debug_output=True
49
  )
50
 
@@ -94,9 +101,10 @@ Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for c
94
  demo = gr.ChatInterface(
95
  respond,
96
  additional_inputs=[
97
- gr.Dropdown(
98
- [MODEL_PATH.split("/")[-1]],
99
- value=MODEL_PATH.split("/")[-1],
 
100
  label="Model"
101
  ),
102
  gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
@@ -112,9 +120,27 @@ Always strive for accuracy, clarity, and helpfulness in your responses. If you'r
112
  """, label="System message"),
113
  gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
114
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
115
- gr.Slider(minimum=0.1, maximum=2.0, value=0.95, step=0.05, label="Top-p"),
116
- gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
117
- gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  ],
119
  theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
120
  body_background_fill_dark="#16141c",
 
8
  import gradio as gr
9
  from huggingface_hub import hf_hub_download
10
 
11
+ llm = None
12
+ llm_model = None
 
13
 
14
+ # Download the new model
15
  hf_hub_download(
16
  repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
17
  filename="llama-3.2-1b-instruct-q4_k_m.gguf",
18
  local_dir="./models"
19
  )
20
 
21
+ def get_messages_formatter_type(model_name):
22
+ return MessagesFormatterType.LLAMA_3
 
 
 
 
 
 
 
 
23
 
24
  def respond(
25
  message,
 
32
  top_k,
33
  repeat_penalty,
34
  ):
35
+ global llm
36
+ global llm_model
37
+
38
+ chat_template = get_messages_formatter_type(model)
39
+
40
+ if llm is None or llm_model != model:
41
+ llm = Llama(
42
+ model_path=f"models/{model}",
43
+ n_gpu_layers=0,
44
+ n_batch=32000,
45
+ n_ctx=2048,
46
+ )
47
+ llm_model = model
48
+
49
+ provider = LlamaCppPythonProvider(llm)
50
+
51
  agent = LlamaCppAgent(
52
  provider,
53
+ system_prompt=f"{system_message}",
54
+ predefined_messages_formatter_type=chat_template,
55
  debug_output=True
56
  )
57
 
 
101
  demo = gr.ChatInterface(
102
  respond,
103
  additional_inputs=[
104
+ gr.Dropdown([
105
+ "llama-3.2-1b-instruct-q4_k_m.gguf"
106
+ ],
107
+ value="llama-3.2-1b-instruct-q4_k_m.gguf",
108
  label="Model"
109
  ),
110
  gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
 
120
  """, label="System message"),
121
  gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
122
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
123
+ gr.Slider(
124
+ minimum=0.1,
125
+ maximum=2.0,
126
+ value=0.95,
127
+ step=0.05,
128
+ label="Top-p",
129
+ ),
130
+ gr.Slider(
131
+ minimum=0,
132
+ maximum=100,
133
+ value=40,
134
+ step=1,
135
+ label="Top-k",
136
+ ),
137
+ gr.Slider(
138
+ minimum=0.0,
139
+ maximum=2.0,
140
+ value=1.1,
141
+ step=0.1,
142
+ label="Repetition penalty",
143
+ ),
144
  ],
145
  theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
146
  body_background_fill_dark="#16141c",