File size: 2,376 Bytes
28c6f95 548b6a7 b3a9230 1234221 28c6f95 d801608 28c6f95 3c7a61c 28c6f95 1234221 28c6f95 1234221 28c6f95 1234221 28c6f95 1234221 28c6f95 1234221 28c6f95 44fc6dd 1234221 44fc6dd 1234221 bbcb5dc 1234221 28c6f95 1234221 d801608 ccf21d2 aedd53f d801608 1234221 d801608 ccf21d2 6706d71 28c6f95 1234221 0d7cb73 f871c53 1234221 ab2fe8c 1234221 28c6f95 1234221 3c7a61c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
api_key = os.environ.get('qwen_API_KEY')
"""
For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co./docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient( token=api_key)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
model="Qwen/Qwen2.5-72B-Instruct",
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p
):
token = message.choices[0].delta.content
response += token
yield response
example_prompts = [
["泰语的起源?", "你是一个歌词助手"],
["你是谁开发的?", "你是一个歌词助手"],
["写一篇关于青春的五言绝句", "你是一个歌词助手"],
["你是谁?", "你是一个歌词助手"]
]
latex_delimiters = [
{"left": "$$", "right": "$$", "display": True},
{"left": "\\[", "right": "\\]", "display": True},
{"left": "$", "right": "$", "display": False},
{"left": "\\(", "right": "\\)", "display": False}
]
demo = gr.ChatInterface(
fn=respond,
examples=example_prompts,
cache_examples=False,
title="Qwen2.5-72B-Instruct",
description="千问2.5-72B聊天机器人",
additional_inputs=[
gr.Textbox(value="You are a helpful assistant.", label="System message"),
gr.Slider(minimum=1, maximum=8888, value=2048, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
chatbot=gr.Chatbot(show_label=True, latex_delimiters=latex_delimiters, show_copy_button=True)
)
if __name__ == "__main__":
demo.queue(default_concurrency_limit=60)
demo.launch(max_threads=60) |