File size: 2,376 Bytes
28c6f95
 
548b6a7
b3a9230
1234221
28c6f95
d801608
28c6f95
3c7a61c
28c6f95
 
1234221
 
 
 
 
 
28c6f95
1234221
28c6f95
1234221
 
 
 
 
28c6f95
1234221
28c6f95
1234221
28c6f95
44fc6dd
1234221
44fc6dd
1234221
bbcb5dc
1234221
 
 
 
28c6f95
1234221
 
d801608
ccf21d2
aedd53f
 
 
 
d801608
 
 
1234221
 
d801608
ccf21d2
6706d71
28c6f95
1234221
 
0d7cb73
f871c53
1234221
 
ab2fe8c
1234221
 
 
 
 
28c6f95
 
1234221
3c7a61c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
from huggingface_hub import InferenceClient
import os

api_key = os.environ.get('qwen_API_KEY')
"""
For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co./docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient( token=api_key)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        model="Qwen/Qwen2.5-72B-Instruct",
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p
    ):
        token = message.choices[0].delta.content

        response += token
        yield response
    
example_prompts = [
    ["泰语的起源?", "你是一个歌词助手"],
    ["你是谁开发的?", "你是一个歌词助手"],
    ["写一篇关于青春的五言绝句", "你是一个歌词助手"],
    ["你是谁?", "你是一个歌词助手"]
]
latex_delimiters = [
    {"left": "$$", "right": "$$", "display": True},
    {"left": "\\[", "right": "\\]", "display": True},
    {"left": "$", "right": "$", "display": False},
    {"left": "\\(", "right": "\\)", "display": False}
]

demo = gr.ChatInterface(
    fn=respond,
    examples=example_prompts,
    cache_examples=False,
    title="Qwen2.5-72B-Instruct",
    description="千问2.5-72B聊天机器人",
    additional_inputs=[
        gr.Textbox(value="You are a helpful assistant.", label="System message"),
        gr.Slider(minimum=1, maximum=8888, value=2048, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
    chatbot=gr.Chatbot(show_label=True, latex_delimiters=latex_delimiters, show_copy_button=True)
)

if __name__ == "__main__":
    demo.queue(default_concurrency_limit=60)
    demo.launch(max_threads=60)