xsa-dev commited on
Commit
f8615ed
β€’
1 Parent(s): 90bf432

files added

Browse files
Files changed (2) hide show
  1. app.py +236 -4
  2. home-banner.jpg +0 -0
app.py CHANGED
@@ -1,7 +1,239 @@
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+
4
  import gradio as gr
5
+ from text_generation import Client
6
+
7
+ TITLE = """<h2 align="center">πŸ¦… Falcon-7B-instruct Chat demo πŸš€πŸ€˜</h2>"""
8
+ USER_NAME = "User"
9
+ BOT_NAME = "Falcon7B-instruct"
10
+
11
+ DEFAULT_INSTRUCTIONS = f"""
12
+ The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions.
13
+ Falcon was built to be respectful, polite and inclusive.
14
+ Falcon was built by the Technology Innovation Institute in Abu Dhabi.
15
+ Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with.
16
+ Falcon knows a lot, and always tells the truth. The conversation begins.
17
+ """
18
+
19
+ RETRY_COMMAND = "/retry"
20
+ STOP_STR = f"\n{USER_NAME}:"
21
+ STOP_SUSPECT_LIST = [":", "\n", "User"]
22
+
23
+ INFERENCE_ENDPOINT = os.environ.get("INFERENCE_ENDPOINT")
24
+ INFERENCE_AUTH = os.environ.get("INFERENCE_AUTH")
25
+
26
+
27
+ def chat_accordion():
28
+ with gr.Accordion("Parameters", open=False):
29
+ temperature = gr.Slider(
30
+ minimum=0.1,
31
+ maximum=2.0,
32
+ value=0.8,
33
+ step=0.1,
34
+ interactive=True,
35
+ label="Temperature",
36
+ )
37
+ top_p = gr.Slider(
38
+ minimum=0.1,
39
+ maximum=0.99,
40
+ value=0.9,
41
+ step=0.01,
42
+ interactive=True,
43
+ label="p (nucleus sampling)",
44
+ )
45
+ return temperature, top_p
46
+
47
+
48
+ def format_chat_prompt(message: str, chat_history, instructions: str) -> str:
49
+ instructions = instructions.strip(" ").strip("\n")
50
+ prompt = instructions
51
+ for turn in chat_history:
52
+ user_message, bot_message = turn
53
+ prompt = f"{prompt}\n{USER_NAME}: {user_message}\n{BOT_NAME}: {bot_message}"
54
+ prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:"
55
+ return prompt
56
+
57
+
58
+ def chat(client: Client):
59
+ with gr.Column(elem_id="chat_container"):
60
+ with gr.Row():
61
+ chatbot = gr.Chatbot(elem_id="chatbot")
62
+ with gr.Row():
63
+ inputs = gr.Textbox(
64
+ placeholder=f"Hello {BOT_NAME} !!",
65
+ label="Type an input and press Enter",
66
+ max_lines=3,
67
+ )
68
+ gr.Examples(
69
+ [
70
+ ["Hey Falcon! Any recommendations for my holidays in Abu Dhabi?"],
71
+ ["What's the Everett interpretation of quantum mechanics?"],
72
+ [
73
+ "Give me a list of the top 10 dive sites you would recommend around the world."
74
+ ],
75
+ ["Can you tell me more about deep-water soloing?"],
76
+ [
77
+ "Can you write a short tweet about the Apache 2.0 release of our latest AI model, Falcon LLM?"
78
+ ],
79
+ ],
80
+ inputs=inputs,
81
+ label="Click on any example and press Enter in the input textbox!",
82
+ )
83
+
84
+ with gr.Row(elem_id="button_container"):
85
+ with gr.Column():
86
+ retry_button = gr.Button("♻️ Retry last turn")
87
+ with gr.Column():
88
+ delete_turn_button = gr.Button("🧽 Delete last turn")
89
+ with gr.Column():
90
+ clear_chat_button = gr.Button("✨ Delete all history")
91
+
92
+ with gr.Row(elem_id="param_container"):
93
+ with gr.Column():
94
+ temperature, top_p = chat_accordion()
95
+ with gr.Column():
96
+ with gr.Accordion("Instructions", open=False):
97
+ instructions = gr.Textbox(
98
+ placeholder="LLM instructions",
99
+ value=DEFAULT_INSTRUCTIONS,
100
+ lines=10,
101
+ interactive=True,
102
+ label="Instructions",
103
+ max_lines=16,
104
+ show_label=False,
105
+ )
106
+
107
+ def run_chat(
108
+ message: str, chat_history, instructions: str, temperature: float, top_p: float
109
+ ):
110
+ if not message or (message == RETRY_COMMAND and len(chat_history) == 0):
111
+ yield chat_history
112
+ return
113
+
114
+ if message == RETRY_COMMAND and chat_history:
115
+ prev_turn = chat_history.pop(-1)
116
+ user_message, _ = prev_turn
117
+ message = user_message
118
+
119
+ prompt = format_chat_prompt(message, chat_history, instructions)
120
+ chat_history = chat_history + [[message, ""]]
121
+ stream = client.generate_stream(
122
+ prompt,
123
+ do_sample=True,
124
+ max_new_tokens=1024,
125
+ stop_sequences=[STOP_STR, "<|endoftext|>"],
126
+ temperature=temperature,
127
+ top_p=top_p,
128
+ )
129
+ acc_text = ""
130
+ for idx, response in enumerate(stream):
131
+ text_token = response.token.text
132
+
133
+ if response.details:
134
+ return
135
+
136
+ if text_token in STOP_SUSPECT_LIST:
137
+ acc_text += text_token
138
+ continue
139
+
140
+ if idx == 0 and text_token.startswith(" "):
141
+ text_token = text_token[1:]
142
+
143
+ acc_text += text_token
144
+ last_turn = list(chat_history.pop(-1))
145
+ last_turn[-1] += acc_text
146
+ chat_history = chat_history + [last_turn]
147
+ yield chat_history
148
+ acc_text = ""
149
+
150
+ def delete_last_turn(chat_history):
151
+ if chat_history:
152
+ chat_history.pop(-1)
153
+ return {chatbot: gr.update(value=chat_history)}
154
+
155
+ def run_retry(
156
+ message: str, chat_history, instructions: str, temperature: float, top_p: float
157
+ ):
158
+ yield from run_chat(
159
+ RETRY_COMMAND, chat_history, instructions, temperature, top_p
160
+ )
161
+
162
+ def clear_chat():
163
+ return []
164
+
165
+ inputs.submit(
166
+ run_chat,
167
+ [inputs, chatbot, instructions, temperature, top_p],
168
+ outputs=[chatbot],
169
+ show_progress=False,
170
+ )
171
+ inputs.submit(lambda: "", inputs=None, outputs=inputs)
172
+ delete_turn_button.click(delete_last_turn, inputs=[chatbot], outputs=[chatbot])
173
+ retry_button.click(
174
+ run_retry,
175
+ [inputs, chatbot, instructions, temperature, top_p],
176
+ outputs=[chatbot],
177
+ show_progress=False,
178
+ )
179
+ clear_chat_button.click(clear_chat, [], chatbot)
180
+
181
+
182
+ def get_demo(client: Client):
183
+ with gr.Blocks(
184
+ # css=None
185
+ # css="""#chat_container {width: 700px; margin-left: auto; margin-right: auto;}
186
+ # #button_container {width: 700px; margin-left: auto; margin-right: auto;}
187
+ # #param_container {width: 700px; margin-left: auto; margin-right: auto;}"""
188
+ css="""#chatbot {
189
+ font-size: 14px;
190
+ min-height: 300px;
191
+ }"""
192
+ ) as demo:
193
+ gr.HTML(TITLE)
194
+
195
+ with gr.Row():
196
+ with gr.Column():
197
+ gr.Markdown(
198
+ """**Chat with [Falcon-7B-Instruct](https://huggingface.co/tiiuae/falcon-7b-instruct)!**
199
+
200
+ ✨ This demo is powered by [Falcon-7B-Instruct](https://huggingface.co/tiiuae/falcon-7b-instruct) and running with [Text Generation Inference](https://github.com/huggingface/text-generation-inference) ✨
201
+
202
+ πŸ‘€ **Learn more about Falcon LLM:** [falconllm.tii.ae](https://falconllm.tii.ae/)
203
+
204
+ Why use Falcon-7B-Instruct?
205
+
206
+ You are looking for a ready-to-use chat/instruct model based on Falcon-7B?
207
+
208
+ Falcon-7B is a strong base model, outperforming comparable open-source models (e.g., MPT-7B, StableLM, RedPajama etc.), thanks to being trained on 1,500B tokens of RefinedWeb enhanced with curated corpora. See the OpenLLM Leaderboard.
209
+ It features an architecture optimized for inference, with FlashAttention (Dao et al., 2022) and multiquery (Shazeer et al., 2019).
210
+ πŸ’¬ This is an instruct model, which may not be ideal for further finetuning. If you are interested in building your own instruct/chat model, we recommend starting from Falcon-7B.
211
+
212
+ πŸ”₯ Looking for an even more powerful model? Falcon-40B-Instruct is Falcon-7B-Instruct's big brother!
213
+
214
+ 🚜 **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. Finally, this demo is limited to a session length of about 1,000 words.
215
+ 🚜 **Recomendation**: We recommend users of Falcon-7B-Instruct to develop guardrails and to take appropriate precautions for any production use.
216
+ """
217
+ )
218
+
219
+ with gr.Column():
220
+ gr.Image("home-banner.jpg", elem_id="banner-image", show_label=False)
221
+
222
+ chat(client)
223
+
224
+ return demo
225
 
 
 
226
 
227
+ if __name__ == "__main__":
228
+ parser = argparse.ArgumentParser("Playground Demo")
229
+ parser.add_argument(
230
+ "--addr",
231
+ type=str,
232
+ required=False,
233
+ default=INFERENCE_ENDPOINT,
234
+ )
235
+ args = parser.parse_args()
236
+ client = Client(args.addr, headers={"Authorization": f"Bearer {INFERENCE_AUTH}"})
237
+ demo = get_demo(client)
238
+ demo.queue(max_size=128, concurrency_count=16)
239
+ demo.launch()
home-banner.jpg ADDED