Spaces:
Running
Running
File size: 2,393 Bytes
02358ab bd26bed 136c9d6 02358ab bd26bed 02358ab bd26bed 02358ab bd26bed 02358ab 0b474ef 02358ab bd26bed 02358ab bd26bed 02358ab 136c9d6 02358ab 136c9d6 02358ab bd26bed 136c9d6 bd26bed 02358ab bd26bed 02358ab bd26bed 02358ab 136c9d6 02358ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import gradio as gr
from huggingface_hub import InferenceClient
import requests
import os
url = "http://59.110.170.104:8085/chat_completion"
def respond(
message,
history: list[tuple[str, str]],
do_sample: bool,
seed: int,
max_new_tokens,
temperature,
top_p,
top_k,
repetition_penalty
):
messages = []
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
request_data = dict(
messages=messages,
max_new_tokens=max_new_tokens,
do_sample=do_sample,
seed=seed,
top_p=top_p,
top_k=top_k,
temperature=temperature,
repetition_penalty=repetition_penalty
)
print(request_data)
with requests.post(url, json=request_data, stream=True, headers={"Authorization": f"Bearer {os.environ['HF_TOKEN']}"}) as r:
# printing response of each stream
for chunk in r.iter_content(1024):
response += chunk.decode("utf8")
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
chatbot=gr.Chatbot(height=600),
additional_inputs=[
# gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Checkbox(True, label="do sample"),
gr.Number(42, precision=0, label="seed"),
gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max new tokens"),
gr.Slider(minimum=0.01, maximum=4.0, value=0.7, step=0.01, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=1.0,
step=0.05,
label="Top-p (nucleus sampling)",
),
gr.Slider(
minimum=0,
maximum=100,
value=0,
step=1,
label="Top-K (Top-K sampling)",
),
gr.Slider(
minimum=1,
maximum=2,
value=1.03,
step=0.01,
label="repetition penalty",
),
],
)
if __name__ == "__main__":
demo.queue(default_concurrency_limit=2, max_size=10)
demo.launch() |