Nymbo commited on
Commit
0ef95ea
·
verified ·
1 Parent(s): 8d5a7cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -78
app.py CHANGED
@@ -3,23 +3,13 @@ from openai import OpenAI
3
  import os
4
 
5
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
6
 
7
- def show_loading_status(msg):
8
-
9
- try:
10
- gr.toast(msg)
11
- except:
12
- pass
13
- print(msg)
14
-
15
- show_loading_status("Access token loaded.")
16
-
17
- # Initialize the Hugging Face Inference-based OpenAI client
18
  client = OpenAI(
19
  base_url="https://api-inference.huggingface.co/v1/",
20
  api_key=ACCESS_TOKEN,
21
  )
22
- show_loading_status("OpenAI client initialized.")
23
 
24
 
25
  def respond(
@@ -33,18 +23,20 @@ def respond(
33
  seed,
34
  custom_model
35
  ):
36
- show_loading_status(f"Received message: {message}")
37
- show_loading_status(f"History: {history}")
38
- show_loading_status(f"System message: {system_message}")
39
- show_loading_status(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
40
- show_loading_status(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
41
- show_loading_status(f"Selected model (custom_model): {custom_model}")
 
42
 
43
  # Convert seed to None if -1 (meaning random)
44
- seed = seed if seed != -1 else random.randint(1, 1000000000),
 
45
 
46
  messages = [{"role": "system", "content": system_message}]
47
- show_loading_status("Initial messages array constructed.")
48
 
49
  # Add conversation history to the context
50
  for val in history:
@@ -52,62 +44,46 @@ def respond(
52
  assistant_part = val[1]
53
  if user_part:
54
  messages.append({"role": "user", "content": user_part})
55
- show_loading_status(f"Added user message to context: {user_part}")
56
  if assistant_part:
57
  messages.append({"role": "assistant", "content": assistant_part})
58
- show_loading_status(f"Added assistant message to context: {assistant_part}")
59
 
60
  # Append the latest user message
61
  messages.append({"role": "user", "content": message})
62
- show_loading_status("Latest user message appended.")
63
 
64
- # If user provided a model, use that; otherwise, fall back to a default
65
  model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
66
- show_loading_status(f"Model selected for inference: {model_to_use}")
67
-
68
- response_text = ""
69
- show_loading_status("Sending request to OpenAI API.")
70
-
71
- try:
72
- for message_chunk in client.chat.completions.create(
73
- model=model_to_use,
74
- max_tokens=max_tokens,
75
- stream=True,
76
- temperature=temperature,
77
- top_p=top_p,
78
- frequency_penalty=frequency_penalty,
79
- seed=seed,
80
- messages=messages,
81
- ):
82
- # Each chunk is a piece of the streaming text
83
- token_text = message_chunk.choices[0].delta.content
84
- show_loading_status(f"Received token: {token_text}")
85
- response_text += token_text
86
- yield response_text
87
-
88
- show_loading_status("Completed response generation.")
89
-
90
- except Exception as e:
91
- show_loading_status("Error encountered during completion streaming.")
92
- raise gr.Error(f"An unexpected error occurred: {str(e)}")
93
-
94
 
95
  # GRADIO UI
96
 
97
- chatbot = gr.Chatbot(
98
- height=600,
99
- show_copy_button=True,
100
- placeholder="Select a model and begin chatting",
101
- likeable=True,
102
- layout="panel"
103
- )
104
- show_loading_status("Chatbot interface created.")
105
 
106
- system_message_box = gr.Textbox(
107
- value="",
108
- placeholder="You are a helpful assistant.",
109
- label="System Prompt"
110
- )
111
 
112
  max_tokens_slider = gr.Slider(
113
  minimum=1,
@@ -139,21 +115,26 @@ frequency_penalty_slider = gr.Slider(
139
  )
140
  seed_slider = gr.Slider(
141
  minimum=-1,
142
- maximum=1000000000,
143
  value=-1,
144
  step=1,
145
  label="Seed (-1 for random)"
146
  )
147
 
 
148
  custom_model_box = gr.Textbox(
149
  value="",
150
  label="Custom Model",
151
- info="(Optional) Provide a custom Hugging Face model path. Supports Warm and Cold models.",
152
  placeholder="meta-llama/Llama-3.3-70B-Instruct"
153
  )
154
 
155
  def set_custom_model_from_radio(selected):
156
- show_loading_status(f"Featured model selected: {selected}")
 
 
 
 
157
  return selected
158
 
159
  demo = gr.ChatInterface(
@@ -171,7 +152,7 @@ demo = gr.ChatInterface(
171
  chatbot=chatbot,
172
  theme="Nymbo/Nymbo_Theme",
173
  )
174
- show_loading_status("ChatInterface object created.")
175
 
176
  with demo:
177
  with gr.Accordion("Model Selection", open=False):
@@ -180,7 +161,7 @@ with demo:
180
  placeholder="Search for a featured model...",
181
  lines=1
182
  )
183
- show_loading_status("Model search box created.")
184
 
185
  models_list = [
186
  "meta-llama/Llama-3.3-70B-Instruct",
@@ -188,15 +169,20 @@ with demo:
188
  "meta-llama/Llama-3.2-1B-Instruct",
189
  "meta-llama/Llama-3.1-8B-Instruct",
190
  "NousResearch/Hermes-3-Llama-3.1-8B",
 
 
 
191
  "mistralai/Mistral-Nemo-Instruct-2407",
192
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
193
  "mistralai/Mistral-7B-Instruct-v0.3",
194
  "Qwen/Qwen2.5-72B-Instruct",
195
  "Qwen/QwQ-32B-Preview",
 
196
  "HuggingFaceTB/SmolLM2-1.7B-Instruct",
 
197
  "microsoft/Phi-3.5-mini-instruct",
198
  ]
199
- show_loading_status("Models list initialized.")
200
 
201
  featured_model_radio = gr.Radio(
202
  label="Select a model below",
@@ -204,12 +190,12 @@ with demo:
204
  value="meta-llama/Llama-3.3-70B-Instruct",
205
  interactive=True
206
  )
207
- show_loading_status("Featured models radio button created.")
208
 
209
  def filter_models(search_term):
210
- show_loading_status(f"Filtering models with search term: {search_term}")
211
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
212
- show_loading_status(f"Filtered models: {filtered}")
213
  return gr.update(choices=filtered)
214
 
215
  model_search_box.change(
@@ -217,17 +203,17 @@ with demo:
217
  inputs=model_search_box,
218
  outputs=featured_model_radio
219
  )
220
- show_loading_status("Model search box change event linked.")
221
 
222
  featured_model_radio.change(
223
  fn=set_custom_model_from_radio,
224
  inputs=featured_model_radio,
225
  outputs=custom_model_box
226
  )
227
- show_loading_status("Featured model radio button change event linked.")
228
 
229
- show_loading_status("Gradio interface initialized.")
230
 
231
  if __name__ == "__main__":
232
- show_loading_status("Launching the demo application.")
233
  demo.launch()
 
3
  import os
4
 
5
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
6
+ print("Access token loaded.")
7
 
 
 
 
 
 
 
 
 
 
 
 
8
  client = OpenAI(
9
  base_url="https://api-inference.huggingface.co/v1/",
10
  api_key=ACCESS_TOKEN,
11
  )
12
+ print("OpenAI client initialized.")
13
 
14
 
15
  def respond(
 
23
  seed,
24
  custom_model
25
  ):
26
+
27
+ print(f"Received message: {message}")
28
+ print(f"History: {history}")
29
+ print(f"System message: {system_message}")
30
+ print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
31
+ print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
32
+ print(f"Selected model (custom_model): {custom_model}")
33
 
34
  # Convert seed to None if -1 (meaning random)
35
+ if seed == -1:
36
+ seed = None
37
 
38
  messages = [{"role": "system", "content": system_message}]
39
+ print("Initial messages array constructed.")
40
 
41
  # Add conversation history to the context
42
  for val in history:
 
44
  assistant_part = val[1]
45
  if user_part:
46
  messages.append({"role": "user", "content": user_part})
47
+ print(f"Added user message to context: {user_part}")
48
  if assistant_part:
49
  messages.append({"role": "assistant", "content": assistant_part})
50
+ print(f"Added assistant message to context: {assistant_part}")
51
 
52
  # Append the latest user message
53
  messages.append({"role": "user", "content": message})
54
+ print("Latest user message appended.")
55
 
56
+ # If user provided a model, use that; otherwise, fall back to a default model
57
  model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
58
+ print(f"Model selected for inference: {model_to_use}")
59
+
60
+ # Start with an empty string to build the response as tokens stream in
61
+ response = ""
62
+ print("Sending request to OpenAI API.")
63
+
64
+ for message_chunk in client.chat.completions.create(
65
+ model=model_to_use,
66
+ max_tokens=max_tokens,
67
+ stream=True,
68
+ temperature=temperature,
69
+ top_p=top_p,
70
+ frequency_penalty=frequency_penalty,
71
+ seed=seed,
72
+ messages=messages,
73
+ ):
74
+ token_text = message_chunk.choices[0].delta.content
75
+ print(f"Received token: {token_text}")
76
+ response += token_text
77
+ yield response
78
+
79
+ print("Completed response generation.")
 
 
 
 
 
 
80
 
81
  # GRADIO UI
82
 
83
+ chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", likeable=True, layout="panel")
84
+ print("Chatbot interface created.")
 
 
 
 
 
 
85
 
86
+ system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
 
 
 
 
87
 
88
  max_tokens_slider = gr.Slider(
89
  minimum=1,
 
115
  )
116
  seed_slider = gr.Slider(
117
  minimum=-1,
118
+ maximum=65535,
119
  value=-1,
120
  step=1,
121
  label="Seed (-1 for random)"
122
  )
123
 
124
+ # The custom_model_box is what the respond function sees as "custom_model"
125
  custom_model_box = gr.Textbox(
126
  value="",
127
  label="Custom Model",
128
+ info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
129
  placeholder="meta-llama/Llama-3.3-70B-Instruct"
130
  )
131
 
132
  def set_custom_model_from_radio(selected):
133
+ """
134
+ This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
135
+ We will update the Custom Model text box with that selection automatically.
136
+ """
137
+ print(f"Featured model selected: {selected}")
138
  return selected
139
 
140
  demo = gr.ChatInterface(
 
152
  chatbot=chatbot,
153
  theme="Nymbo/Nymbo_Theme",
154
  )
155
+ print("ChatInterface object created.")
156
 
157
  with demo:
158
  with gr.Accordion("Model Selection", open=False):
 
161
  placeholder="Search for a featured model...",
162
  lines=1
163
  )
164
+ print("Model search box created.")
165
 
166
  models_list = [
167
  "meta-llama/Llama-3.3-70B-Instruct",
 
169
  "meta-llama/Llama-3.2-1B-Instruct",
170
  "meta-llama/Llama-3.1-8B-Instruct",
171
  "NousResearch/Hermes-3-Llama-3.1-8B",
172
+ "google/gemma-2-27b-it",
173
+ "google/gemma-2-9b-it",
174
+ "google/gemma-2-2b-it",
175
  "mistralai/Mistral-Nemo-Instruct-2407",
176
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
177
  "mistralai/Mistral-7B-Instruct-v0.3",
178
  "Qwen/Qwen2.5-72B-Instruct",
179
  "Qwen/QwQ-32B-Preview",
180
+ "PowerInfer/SmallThinker-3B-Preview",
181
  "HuggingFaceTB/SmolLM2-1.7B-Instruct",
182
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
183
  "microsoft/Phi-3.5-mini-instruct",
184
  ]
185
+ print("Models list initialized.")
186
 
187
  featured_model_radio = gr.Radio(
188
  label="Select a model below",
 
190
  value="meta-llama/Llama-3.3-70B-Instruct",
191
  interactive=True
192
  )
193
+ print("Featured models radio button created.")
194
 
195
  def filter_models(search_term):
196
+ print(f"Filtering models with search term: {search_term}")
197
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
198
+ print(f"Filtered models: {filtered}")
199
  return gr.update(choices=filtered)
200
 
201
  model_search_box.change(
 
203
  inputs=model_search_box,
204
  outputs=featured_model_radio
205
  )
206
+ print("Model search box change event linked.")
207
 
208
  featured_model_radio.change(
209
  fn=set_custom_model_from_radio,
210
  inputs=featured_model_radio,
211
  outputs=custom_model_box
212
  )
213
+ print("Featured model radio button change event linked.")
214
 
215
+ print("Gradio interface initialized.")
216
 
217
  if __name__ == "__main__":
218
+ print("Launching the demo application.")
219
  demo.launch()