crystal99 commited on
Commit
5e6a1ff
·
verified ·
1 Parent(s): 7184cce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -27
app.py CHANGED
@@ -1,60 +1,46 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
 
10
  def respond(
11
  message,
12
- history: [],
13
  system_message,
14
  max_tokens,
15
  temperature,
16
  top_p,
17
  ):
 
18
  messages = [{"role": "system", "content": system_message}]
19
-
20
-
21
  messages.append({"role": "user", "content": message})
22
 
23
- response = ""
24
-
25
- for message in client.chat_completion(
26
- messages,
27
  max_tokens=max_tokens,
28
- stream=False,
29
  temperature=temperature,
30
  top_p=top_p,
31
- ):
32
- print(message.choices)
33
- # token = message.choices[0].delta.content
34
 
35
- response = "helo" # += token
36
- yield response
 
37
 
38
 
39
- """
40
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
41
- """
42
  demo = gr.ChatInterface(
43
  respond,
44
  additional_inputs=[
45
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
46
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
47
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
48
- gr.Slider(
49
- minimum=0.1,
50
- maximum=1.0,
51
- value=0.95,
52
- step=0.05,
53
- label="Top-p (nucleus sampling)",
54
- ),
55
  ],
56
  )
57
 
58
-
59
  if __name__ == "__main__":
60
  demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ # Initialize Hugging Face client with your model
 
 
5
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
6
 
7
 
8
  def respond(
9
  message,
10
+ history,
11
  system_message,
12
  max_tokens,
13
  temperature,
14
  top_p,
15
  ):
16
+ # Prepare messages for the API call
17
  messages = [{"role": "system", "content": system_message}]
 
 
18
  messages.append({"role": "user", "content": message})
19
 
20
+ # Make API call without streaming
21
+ response = client.chat_completion(
22
+ messages=messages,
 
23
  max_tokens=max_tokens,
 
24
  temperature=temperature,
25
  top_p=top_p,
26
+ stream=False, # Streaming disabled
27
+ )
 
28
 
29
+ # Extract the response content
30
+ response_text = response.choices[0].message['content']
31
+ return response_text # Directly return the response text
32
 
33
 
34
+ # Gradio interface setup
 
 
35
  demo = gr.ChatInterface(
36
  respond,
37
  additional_inputs=[
38
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
39
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
40
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
41
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
42
  ],
43
  )
44
 
 
45
  if __name__ == "__main__":
46
  demo.launch()