File size: 2,393 Bytes
02358ab
 
bd26bed
 
 
136c9d6
02358ab
 
 
 
 
bd26bed
 
 
02358ab
 
bd26bed
 
02358ab
bd26bed
02358ab
0b474ef
 
 
 
 
02358ab
 
 
 
bd26bed
 
 
 
 
02358ab
bd26bed
 
 
 
 
 
 
 
 
 
02358ab
 
 
 
136c9d6
02358ab
 
136c9d6
02358ab
bd26bed
 
 
136c9d6
bd26bed
02358ab
 
 
bd26bed
02358ab
 
 
bd26bed
 
 
 
 
 
 
 
 
 
 
 
 
 
02358ab
 
 
 
 
136c9d6
02358ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
from huggingface_hub import InferenceClient
import requests
import os

url = "http://59.110.170.104:8085/chat_completion"


def respond(
    message,
    history: list[tuple[str, str]],
    do_sample: bool,
    seed: int,
    max_new_tokens,
    temperature,
    top_p,
    top_k,
    repetition_penalty
):
    messages = []

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""
    request_data = dict(
        messages=messages,
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,
        seed=seed,
        top_p=top_p,
        top_k=top_k,
        temperature=temperature,
        repetition_penalty=repetition_penalty
    )
    print(request_data)
    with requests.post(url, json=request_data, stream=True, headers={"Authorization": f"Bearer {os.environ['HF_TOKEN']}"}) as r:
        # printing response of each stream
        for chunk in r.iter_content(1024):
            response += chunk.decode("utf8")
            yield response

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""

demo = gr.ChatInterface(
    respond,
    chatbot=gr.Chatbot(height=600),
    additional_inputs=[
        # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Checkbox(True, label="do sample"),
        gr.Number(42, precision=0, label="seed"),
        gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.01, maximum=4.0, value=0.7, step=0.01, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=1.0,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
        gr.Slider(
            minimum=0,
            maximum=100,
            value=0,
            step=1,
            label="Top-K (Top-K sampling)",
        ),
        gr.Slider(
            minimum=1,
            maximum=2,
            value=1.03,
            step=0.01,
            label="repetition penalty",
        ),
    ],
)


if __name__ == "__main__":
    demo.queue(default_concurrency_limit=2, max_size=10)
    demo.launch()