import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM model_name = "microsoft/Phi-3.5-MoE-instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def generate_response(instruction): input_text = f"Human: {instruction}\n\nAssistant:" inputs = tokenizer(input_text, return_tensors="pt") outputs = model.generate(**inputs, max_length=200, num_return_sequences=1) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response.split("Assistant:")[-1].strip() iface = gr.Interface( fn=generate_response, inputs="text", outputs="text", title="Phi-3.5-MoE-instruct Demo", description="Enter an instruction or question to get a response from the model." ) iface.launch()