Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,8 +15,10 @@ from gradio.themes.utils import (
|
|
15 |
|
16 |
# ================================================================================================================================
|
17 |
TOKEN = os.getenv("HF_TOKEN")
|
18 |
-
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta" , token=TOKEN)
|
19 |
system_message ="You are a capable and freindly assistant."
|
|
|
|
|
20 |
|
21 |
no_change_btn = gr.Button()
|
22 |
enable_btn = gr.Button(interactive=True)
|
@@ -125,18 +127,31 @@ def chat(
|
|
125 |
messages.append({"role": "user", "content": run_rag(message)})
|
126 |
response = "This is a response to the question"
|
127 |
chatbot.append((question,""))
|
128 |
-
for msg in client.chat_completion(
|
129 |
-
|
130 |
-
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
temperature=temperature,
|
|
|
133 |
top_p=top_p,
|
|
|
134 |
):
|
135 |
-
|
136 |
-
token = msg.choices[0].delta.content
|
137 |
-
response += str(
|
138 |
-
|
139 |
-
|
|
|
140 |
chatbot.clear()
|
141 |
chatbot.append((question , response))
|
142 |
state.save_response(response)
|
@@ -230,7 +245,7 @@ with gr.Blocks(title="RAG", theme=theme, css=block_css , fill_height=True) as de
|
|
230 |
with gr.Accordion("Parameters", open=False) as parameter_row:
|
231 |
temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",)
|
232 |
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
|
233 |
-
max_output_tokens = gr.Slider(minimum=0, maximum=4096, value=
|
234 |
|
235 |
|
236 |
# ================================================================================================================================
|
|
|
15 |
|
16 |
# ================================================================================================================================
|
17 |
TOKEN = os.getenv("HF_TOKEN")
|
18 |
+
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta" , token=TOKEN)
|
19 |
system_message ="You are a capable and freindly assistant."
|
20 |
+
Endpoint_URL = "https://gx986bv0z1k42aqe.us-east-1.aws.endpoints.huggingface.cloud/"
|
21 |
+
client = InferenceClient(Endpoint_URL, token=TOKEN)
|
22 |
|
23 |
no_change_btn = gr.Button()
|
24 |
enable_btn = gr.Button(interactive=True)
|
|
|
127 |
messages.append({"role": "user", "content": run_rag(message)})
|
128 |
response = "This is a response to the question"
|
129 |
chatbot.append((question,""))
|
130 |
+
# for msg in client.chat_completion(
|
131 |
+
# messages,
|
132 |
+
# max_tokens=max_tokens,
|
133 |
+
# stream=True,
|
134 |
+
# temperature=temperature,
|
135 |
+
# top_p=top_p,
|
136 |
+
# ):
|
137 |
+
|
138 |
+
# token = msg.choices[0].delta.content
|
139 |
+
# response += str(token)
|
140 |
+
# # chatbot.append(( response, response))
|
141 |
+
# # yield "" , chatbot
|
142 |
+
for msg in client.text_generation(
|
143 |
+
prompt=run_rag(message),
|
144 |
temperature=temperature,
|
145 |
+
max_new_tokens=max_tokens,
|
146 |
top_p=top_p,
|
147 |
+
stream=False,
|
148 |
):
|
149 |
+
|
150 |
+
# token = msg.choices[0].delta.content
|
151 |
+
response += str(msg)
|
152 |
+
chatbot.append(( response, response))
|
153 |
+
|
154 |
+
|
155 |
chatbot.clear()
|
156 |
chatbot.append((question , response))
|
157 |
state.save_response(response)
|
|
|
245 |
with gr.Accordion("Parameters", open=False) as parameter_row:
|
246 |
temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",)
|
247 |
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
|
248 |
+
max_output_tokens = gr.Slider(minimum=0, maximum=4096, value=480, step=64, interactive=True, label="Max output tokens",)
|
249 |
|
250 |
|
251 |
# ================================================================================================================================
|