Spaces:
Runtime error
Runtime error
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from transformers import pipeline | |
import torch | |
import gradio as gr | |
base_model_name = "microsoft/Phi-3-mini-4k-instruct" | |
model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32, device_map="cpu", low_cpu_mem_usage=True, trust_remote_code=True) | |
tokenizer = AutoTokenizer.from_pretrained(base_model_name , trust_remote_code=True) | |
def format_prompt(message, history): | |
system_prompt = "You are Phi3, a highly knowledgeable and friendly super intelligent AI assistant equipped with extensive information across various domains." | |
prompt = "" | |
prompt += f"<|system|>\n{system_prompt}<|end|>\n" | |
for user_prompt, bot_response in history: | |
prompt += f"<|user|>{user_prompt}<|end|>\n" | |
prompt += f"<|assistant|>{bot_response}<|end|>\n" | |
prompt += f"<|user|>{message}<|end|>\n<|assistant|>" | |
return prompt | |
def generate(prompt, history, max_new_tokens = 128, temperature = 0.6): | |
temperature = float(temperature) | |
if temperature < 1e-2: | |
temperature = 1e-2 | |
formatted_prompt = format_prompt(prompt, history) | |
response = "" | |
num_prompt_tokens = len(tokenizer(formatted_prompt)['input_ids']) | |
max_length = num_prompt_tokens + max_new_tokens | |
textgen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length=max_length, temperature=temperature) | |
output = textgen(formatted_prompt) | |
response = output[0]['generated_text'].replace(formatted_prompt, '') | |
return response | |
mychatbot = gr.Chatbot( | |
avatar_images=["user.png", "botp.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,) | |
demo = gr.ChatInterface(fn=generate, | |
chatbot=mychatbot, | |
title="Phi-3 Mini Chat Demo", | |
retry_btn=None, | |
undo_btn=None | |
) | |
demo.queue().launch(show_api=False) |