from transformers import AutoModelForCausalLM, AutoTokenizer import torch import gradio as gr import os # Initialize the Qwen model and tokenizer tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True).eval() class ChatBot: def __init__(self): self.history = None def predict(self, user_input, system_prompt=""): response, self.history = model.chat(tokenizer, user_input, history=self.history, system=system_prompt) return response bot = ChatBot() title = "👋🏻Welcome to🌟Tonic's🦆Qwen📲On-Device🗣️Chat!" description = """ You can use this Space to test out the current model [Qwen/Qwen-1_8B-Chat](https://huggingface.co./Qwen/Qwen-1_8B-Chat) You can also use 🧑🏻‍🚀Qwen/Qwen-1_8B-Chat🚀 by cloning this space. 🧬🔬🔍 Simply click here: Duplicate Space Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/nXx5wbX9) On 🤗Huggingface: [TeamTonic](https://huggingface.co./TeamTonic) & [MultiTransformer](https://huggingface.co./MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha) """ examples = [["Hello, how are you today?"]] iface = gr.Interface( fn=bot.predict, title=title, description=description, examples=examples, inputs=["text", "text"], # User input and system prompt outputs="text", theme="default" ) iface.launch()