islam23 commited on
Commit
e5bacb6
1 Parent(s): 79abb0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -11
app.py CHANGED
@@ -15,8 +15,10 @@ from gradio.themes.utils import (
15
 
16
  # ================================================================================================================================
17
  TOKEN = os.getenv("HF_TOKEN")
18
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta" , token=TOKEN)
19
  system_message ="You are a capable and freindly assistant."
 
 
20
 
21
  no_change_btn = gr.Button()
22
  enable_btn = gr.Button(interactive=True)
@@ -125,18 +127,31 @@ def chat(
125
  messages.append({"role": "user", "content": run_rag(message)})
126
  response = "This is a response to the question"
127
  chatbot.append((question,""))
128
- for msg in client.chat_completion(
129
- messages,
130
- max_tokens=max_tokens,
131
- stream=True,
 
 
 
 
 
 
 
 
 
 
132
  temperature=temperature,
 
133
  top_p=top_p,
 
134
  ):
135
-
136
- token = msg.choices[0].delta.content
137
- response += str(token)
138
- # chatbot.append(( response, response))
139
- # yield "" , chatbot
 
140
  chatbot.clear()
141
  chatbot.append((question , response))
142
  state.save_response(response)
@@ -230,7 +245,7 @@ with gr.Blocks(title="RAG", theme=theme, css=block_css , fill_height=True) as de
230
  with gr.Accordion("Parameters", open=False) as parameter_row:
231
  temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",)
232
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
233
- max_output_tokens = gr.Slider(minimum=0, maximum=4096, value=1024, step=64, interactive=True, label="Max output tokens",)
234
 
235
 
236
  # ================================================================================================================================
 
15
 
16
  # ================================================================================================================================
17
  TOKEN = os.getenv("HF_TOKEN")
18
+ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta" , token=TOKEN)
19
  system_message ="You are a capable and freindly assistant."
20
+ Endpoint_URL = "https://gx986bv0z1k42aqe.us-east-1.aws.endpoints.huggingface.cloud/"
21
+ client = InferenceClient(Endpoint_URL, token=TOKEN)
22
 
23
  no_change_btn = gr.Button()
24
  enable_btn = gr.Button(interactive=True)
 
127
  messages.append({"role": "user", "content": run_rag(message)})
128
  response = "This is a response to the question"
129
  chatbot.append((question,""))
130
+ # for msg in client.chat_completion(
131
+ # messages,
132
+ # max_tokens=max_tokens,
133
+ # stream=True,
134
+ # temperature=temperature,
135
+ # top_p=top_p,
136
+ # ):
137
+
138
+ # token = msg.choices[0].delta.content
139
+ # response += str(token)
140
+ # # chatbot.append(( response, response))
141
+ # # yield "" , chatbot
142
+ for msg in client.text_generation(
143
+ prompt=run_rag(message),
144
  temperature=temperature,
145
+ max_new_tokens=max_tokens,
146
  top_p=top_p,
147
+ stream=False,
148
  ):
149
+
150
+ # token = msg.choices[0].delta.content
151
+ response += str(msg)
152
+ chatbot.append(( response, response))
153
+
154
+
155
  chatbot.clear()
156
  chatbot.append((question , response))
157
  state.save_response(response)
 
245
  with gr.Accordion("Parameters", open=False) as parameter_row:
246
  temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",)
247
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
248
+ max_output_tokens = gr.Slider(minimum=0, maximum=4096, value=480, step=64, interactive=True, label="Max output tokens",)
249
 
250
 
251
  # ================================================================================================================================