KingNish commited on
Commit
4fde3b2
1 Parent(s): 7ebbc54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -38
app.py CHANGED
@@ -9,8 +9,10 @@ from llama_cpp_agent.chat_history.messages import Roles
9
  import gradio as gr
10
  from huggingface_hub import hf_hub_download
11
 
 
12
  llm = None
13
  llm_model = None
 
14
 
15
  # Download the new model
16
  hf_hub_download(
@@ -22,6 +24,31 @@ hf_hub_download(
22
  def get_messages_formatter_type(model_name):
23
  return MessagesFormatterType.LLAMA_3
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def respond(
26
  message,
27
  history: list[tuple[str, str]],
@@ -34,29 +61,13 @@ def respond(
34
  repeat_penalty,
35
  ):
36
  global llm
37
- global llm_model
38
-
39
  chat_template = get_messages_formatter_type(model)
40
-
41
- if llm is None or llm_model != model:
42
- llm = Llama(
43
- model_path=f"models/{model}",
44
- n_gpu_layers=0, # Adjust based on your GPU
45
- n_batch=32398, # Adjust based on your RAM
46
- n_ctx=512, # Adjust based on your RAM and desired context length
47
- )
48
- llm_model = model
49
-
50
- provider = LlamaCppPythonProvider(llm)
51
 
52
- agent = LlamaCppAgent(
53
- provider,
54
- system_prompt=f"{system_message}",
55
- predefined_messages_formatter_type=chat_template,
56
- debug_output=True
57
- )
58
-
59
- settings = provider.get_provider_default_settings()
60
  settings.temperature = temperature
61
  settings.top_k = top_k
62
  settings.top_p = top_p
@@ -118,30 +129,20 @@ demo = gr.ChatInterface(
118
  value="llama-3.2-1b-instruct-q4_k_m.gguf",
119
  label="Model"
120
  ),
121
- gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
122
-
123
- 1. Complex reasoning and problem-solving
124
- 2. Multilingual understanding and generation
125
- 3. Creative and analytical writing
126
- 4. Code understanding and generation
127
- 5. Task decomposition and step-by-step guidance
128
- 6. Summarization and information extraction
129
-
130
- Always strive for accuracy, clarity, and helpfulness in your responses. If you're unsure about something, express your uncertainty. Use the following format for your responses:
131
- """, label="System message"),
132
  gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
133
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
134
  gr.Slider(
135
  minimum=0.1,
136
  maximum=2.0,
137
- value=0.95,
138
  step=0.05,
139
  label="Top-p",
140
  ),
141
  gr.Slider(
142
  minimum=0,
143
  maximum=100,
144
- value=40,
145
  step=1,
146
  label="Top-k",
147
  ),
@@ -181,11 +182,11 @@ Always strive for accuracy, clarity, and helpfulness in your responses. If you'r
181
  ["Explain the difference between machine learning and deep learning."],
182
  ["Summarize the key points of climate change and its global impact."],
183
  ["Explain quantum computing to a 10-year-old."],
184
- ["Design a step-by-step meal plan for someone trying to lose weight and build muscle."]
185
  ],
186
  cache_examples=False,
187
- autofocus=False,
188
- concurrency_limit=None
189
  )
190
 
191
  if __name__ == "__main__":
 
9
  import gradio as gr
10
  from huggingface_hub import hf_hub_download
11
 
12
+ # Global variables to store the model and agent
13
  llm = None
14
  llm_model = None
15
+ agent = None
16
 
17
  # Download the new model
18
  hf_hub_download(
 
24
  def get_messages_formatter_type(model_name):
25
  return MessagesFormatterType.LLAMA_3
26
 
27
+ def load_model(model_path):
28
+ global llm
29
+ global llm_model
30
+ if llm is None or llm_model != model_path:
31
+ llm = Llama(
32
+ model_path=model_path,
33
+ n_gpu_layers=0, # Adjust based on your GPU
34
+ n_batch=32398, # Adjust based on your RAM
35
+ n_ctx=512, # Adjust based on your RAM and desired context length
36
+ )
37
+ llm_model = model_path
38
+ return llm
39
+
40
+ def load_agent(llm, system_message, chat_template):
41
+ global agent
42
+ if agent is None:
43
+ provider = LlamaCppPythonProvider(llm)
44
+ agent = LlamaCppAgent(
45
+ provider,
46
+ system_prompt=system_message,
47
+ predefined_messages_formatter_type=chat_template,
48
+ debug_output=True
49
+ )
50
+ return agent
51
+
52
  def respond(
53
  message,
54
  history: list[tuple[str, str]],
 
61
  repeat_penalty,
62
  ):
63
  global llm
64
+ global agent
65
+
66
  chat_template = get_messages_formatter_type(model)
67
+ llm = load_model(f"models/{model}")
68
+ agent = load_agent(llm, system_message, chat_template)
 
 
 
 
 
 
 
 
 
69
 
70
+ settings = agent.provider.get_provider_default_settings()
 
 
 
 
 
 
 
71
  settings.temperature = temperature
72
  settings.top_k = top_k
73
  settings.top_p = top_p
 
129
  value="llama-3.2-1b-instruct-q4_k_m.gguf",
130
  label="Model"
131
  ),
132
+ gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta.""", label="System message"),
 
 
 
 
 
 
 
 
 
 
133
  gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
134
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
135
  gr.Slider(
136
  minimum=0.1,
137
  maximum=2.0,
138
+ value=0.9,
139
  step=0.05,
140
  label="Top-p",
141
  ),
142
  gr.Slider(
143
  minimum=0,
144
  maximum=100,
145
+ value=1,
146
  step=1,
147
  label="Top-k",
148
  ),
 
182
  ["Explain the difference between machine learning and deep learning."],
183
  ["Summarize the key points of climate change and its global impact."],
184
  ["Explain quantum computing to a 10-year-old."],
185
+ ["Design a step-by-Step Meal Plan for Weight Loss and Muscle Gain."],
186
  ],
187
  cache_examples=False,
188
+ autofocus false,
189
+ concurrency_limit None
190
  )
191
 
192
  if __name__ == "__main__":