KingNish commited on
Commit
ebdfef4
1 Parent(s): b47259c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -38
app.py CHANGED
@@ -9,10 +9,8 @@ from llama_cpp_agent.chat_history.messages import Roles
9
  import gradio as gr
10
  from huggingface_hub import hf_hub_download
11
 
12
- # Global variables to store the model and agent
13
  llm = None
14
  llm_model = None
15
- agent = None
16
 
17
  # Download the new model
18
  hf_hub_download(
@@ -24,31 +22,6 @@ hf_hub_download(
24
  def get_messages_formatter_type(model_name):
25
  return MessagesFormatterType.LLAMA_3
26
 
27
- def load_model(model_path):
28
- global llm
29
- global llm_model
30
- if llm is None or llm_model != model_path:
31
- llm = Llama(
32
- model_path=model_path,
33
- n_gpu_layers=0, # Adjust based on your GPU
34
- n_batch=32398, # Adjust based on your RAM
35
- n_ctx=512, # Adjust based on your RAM and desired context length
36
- )
37
- llm_model = model_path
38
- return llm
39
-
40
- def load_agent(llm, system_message, chat_template):
41
- global agent
42
- if agent is None:
43
- provider = LlamaCppPythonProvider(llm)
44
- agent = LlamaCppAgent(
45
- provider,
46
- system_prompt=system_message,
47
- predefined_messages_formatter_type=chat_template,
48
- debug_output=True
49
- )
50
- return agent
51
-
52
  def respond(
53
  message,
54
  history: list[tuple[str, str]],
@@ -61,13 +34,29 @@ def respond(
61
  repeat_penalty,
62
  ):
63
  global llm
64
- global agent
65
-
66
  chat_template = get_messages_formatter_type(model)
67
- llm = load_model(f"models/{model}")
68
- agent = load_agent(llm, system_message, chat_template)
 
 
 
 
 
 
 
 
 
69
 
70
- settings = agent.provider.get_provider_default_settings()
 
 
 
 
 
 
 
71
  settings.temperature = temperature
72
  settings.top_k = top_k
73
  settings.top_p = top_p
@@ -129,9 +118,19 @@ demo = gr.ChatInterface(
129
  value="llama-3.2-1b-instruct-q4_k_m.gguf",
130
  label="Model"
131
  ),
132
- gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta.""", label="System message"),
133
- gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
134
- gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
 
 
 
 
 
 
 
 
 
 
135
  gr.Slider(
136
  minimum=0.1,
137
  maximum=2.0,
@@ -182,11 +181,11 @@ demo = gr.ChatInterface(
182
  ["Explain the difference between machine learning and deep learning."],
183
  ["Summarize the key points of climate change and its global impact."],
184
  ["Explain quantum computing to a 10-year-old."],
185
- ["Design a step-by-Step Meal Plan for Weight Loss and Muscle Gain."],
186
  ],
187
  cache_examples=False,
188
- autofocus = False,
189
- concurrency_limit = None
190
  )
191
 
192
  if __name__ == "__main__":
 
9
  import gradio as gr
10
  from huggingface_hub import hf_hub_download
11
 
 
12
  llm = None
13
  llm_model = None
 
14
 
15
  # Download the new model
16
  hf_hub_download(
 
22
  def get_messages_formatter_type(model_name):
23
  return MessagesFormatterType.LLAMA_3
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def respond(
26
  message,
27
  history: list[tuple[str, str]],
 
34
  repeat_penalty,
35
  ):
36
  global llm
37
+ global llm_model
38
+
39
  chat_template = get_messages_formatter_type(model)
40
+
41
+ if llm is None or llm_model != model:
42
+ llm = Llama(
43
+ model_path=f"models/{model}",
44
+ n_gpu_layers=0, # Adjust based on your GPU
45
+ n_batch=32398, # Adjust based on your RAM
46
+ n_ctx=512, # Adjust based on your RAM and desired context length
47
+ )
48
+ llm_model = model
49
+
50
+ provider = LlamaCppPythonProvider(llm)
51
 
52
+ agent = LlamaCppAgent(
53
+ provider,
54
+ system_prompt=f"{system_message}",
55
+ predefined_messages_formatter_type=chat_template,
56
+ debug_output=True
57
+ )
58
+
59
+ settings = provider.get_provider_default_settings()
60
  settings.temperature = temperature
61
  settings.top_k = top_k
62
  settings.top_p = top_p
 
118
  value="llama-3.2-1b-instruct-q4_k_m.gguf",
119
  label="Model"
120
  ),
121
+ gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
122
+
123
+ 1. Complex reasoning and problem-solving
124
+ 2. Multilingual understanding and generation
125
+ 3. Creative and analytical writing
126
+ 4. Code understanding and generation
127
+ 5. Task decomposition and step-by-step guidance
128
+ 6. Summarization and information extraction
129
+
130
+ Always strive for accuracy, clarity, and helpfulness in your responses. If you're unsure about something, express your uncertainty. Use the following format for your responses:
131
+ """, label="System message"),
132
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
133
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
134
  gr.Slider(
135
  minimum=0.1,
136
  maximum=2.0,
 
181
  ["Explain the difference between machine learning and deep learning."],
182
  ["Summarize the key points of climate change and its global impact."],
183
  ["Explain quantum computing to a 10-year-old."],
184
+ ["Design a step-by-step meal plan for someone trying to lose weight and build muscle."]
185
  ],
186
  cache_examples=False,
187
+ autofocus=False,
188
+ concurrency_limit=None
189
  )
190
 
191
  if __name__ == "__main__":