import gradio as gr from huggingface_hub import InferenceClient from optimum.intel import OVModelForCausalLM from transformers import AutoTokenizer, pipeline # 載入模型和標記器 model_id = "hsuwill000/SmolLM2-135M-openvino" model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto") tokenizer = AutoTokenizer.from_pretrained(model_id) # 建立生成管道 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) def respond(message, history): # 將當前訊息與歷史訊息合併 input_text = message if not history else history[-1]["content"] + " " + message input_text = message # 獲取模型的回應 response = pipe(input_text, max_length=500, truncation=True, num_return_sequences=1) reply = response[0]['generated_text'] # 返回新的消息格式 print(f"Message: {message}") print(f"Reply: {reply}") return reply # 設定 Gradio 的聊天界面 demo = gr.ChatInterface(fn=respond, title="Chat with Qwen(通義千問) 2.5-0.5B", description="與 Qwen2.5-0.5B-Instruct-openvino 聊天!", type='messages') if __name__ == "__main__": demo.launch()