Files changed (1) hide show
  1. app.py +20 -29
app.py CHANGED
@@ -1,22 +1,17 @@
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
 
4
- client = InferenceClient(
5
- "mistralai/Mistral-7B-Instruct-v0.3"
6
- )
7
-
8
 
9
  def format_prompt(message, history):
10
- prompt = "<s>"
11
- for user_prompt, bot_response in history:
12
- prompt += f"[INST] {user_prompt} [/INST]"
13
- prompt += f" {bot_response}</s> "
14
- prompt += f"[INST] {message} [/INST]"
15
- return prompt
16
 
17
- def generate(
18
- prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
19
- ):
20
  temperature = float(temperature)
21
  if temperature < 1e-2:
22
  temperature = 1e-2
@@ -41,55 +36,51 @@ def generate(
41
  yield output
42
  return output
43
 
44
-
45
  additional_inputs=[
46
  gr.Slider(
47
  label="Temperature",
48
- value=0.9,
49
  minimum=0.0,
50
  maximum=1.0,
51
- step=0.05,
52
  interactive=True,
53
  info="Higher values produce more diverse outputs",
54
  ),
55
  gr.Slider(
56
  label="Max new tokens",
57
- value=256,
58
  minimum=0,
59
- maximum=1048,
60
- step=64,
61
  interactive=True,
62
  info="The maximum numbers of new tokens",
63
  ),
64
  gr.Slider(
65
  label="Top-p (nucleus sampling)",
66
- value=0.90,
67
  minimum=0.0,
68
- maximum=1,
69
- step=0.05,
70
  interactive=True,
71
  info="Higher values sample more low-probability tokens",
72
  ),
73
  gr.Slider(
74
  label="Repetition penalty",
75
- value=1.2,
76
  minimum=1.0,
77
  maximum=2.0,
78
- step=0.05,
79
  interactive=True,
80
  info="Penalize repeated tokens",
81
  )
82
  ]
83
 
84
-
85
  gr.ChatInterface(
86
  fn=generate,
87
  chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
88
  additional_inputs=additional_inputs,
89
- title="""Mistral 7B v0.3"""
90
  ).launch(show_api=False)
91
 
92
-
93
  gr.load("models/ehristoforu/dalle-3-xl-v2").launch()
94
-
95
- gr.load("models/microsoft/Phi-3-mini-4k-instruct").launch()
 
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
 
4
+ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
 
 
 
5
 
6
  def format_prompt(message, history):
7
+ prompt = "<s>"
8
+ for user_prompt, bot_response in history:
9
+ prompt += f"[INST] {user_prompt} [/INST]"
10
+ prompt += f" {bot_response}</s> "
11
+ prompt += f"[INST] {message} [/INST]"
12
+ return prompt
13
 
14
+ def generate(prompt, history, temperature=1.0, max_new_tokens=1048, top_p=1.0, repetition_penalty=1.0):
 
 
15
  temperature = float(temperature)
16
  if temperature < 1e-2:
17
  temperature = 1e-2
 
36
  yield output
37
  return output
38
 
 
39
  additional_inputs=[
40
  gr.Slider(
41
  label="Temperature",
42
+ value=1.0,
43
  minimum=0.0,
44
  maximum=1.0,
45
+ step=0.01,
46
  interactive=True,
47
  info="Higher values produce more diverse outputs",
48
  ),
49
  gr.Slider(
50
  label="Max new tokens",
51
+ value=1048,
52
  minimum=0,
53
+ maximum=2048,
54
+ step=128,
55
  interactive=True,
56
  info="The maximum numbers of new tokens",
57
  ),
58
  gr.Slider(
59
  label="Top-p (nucleus sampling)",
60
+ value=1.0,
61
  minimum=0.0,
62
+ maximum=1.0,
63
+ step=0.01,
64
  interactive=True,
65
  info="Higher values sample more low-probability tokens",
66
  ),
67
  gr.Slider(
68
  label="Repetition penalty",
69
+ value=1.0,
70
  minimum=1.0,
71
  maximum=2.0,
72
+ step=0.01,
73
  interactive=True,
74
  info="Penalize repeated tokens",
75
  )
76
  ]
77
 
 
78
  gr.ChatInterface(
79
  fn=generate,
80
  chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
81
  additional_inputs=additional_inputs,
82
+ title="Mistral 7B v0.3"
83
  ).launch(show_api=False)
84
 
 
85
  gr.load("models/ehristoforu/dalle-3-xl-v2").launch()
86
+ gr.load("models/microsoft/Phi-3-mini-4k-instruct").launch()