Spaces:

merterbak
/

phi-4

Running on Zero

App Files Files Community

phi-4 / app.py

merterbak

Update app.py

0d68ad6 verified 1 day ago

raw

history blame contribute delete

7.49 kB

	import gradio as gr
	import spaces
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	import torch
	from threading import Thread

	phi4_model_path = "microsoft/phi-4"
	phi4_mini_model_path = "microsoft/Phi-4-mini-instruct"

	device = "cuda:0" if torch.cuda.is_available() else "cpu"

	phi4_model = AutoModelForCausalLM.from_pretrained(phi4_model_path, torch_dtype="auto").to(device)
	phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)
	phi4_mini_model = AutoModelForCausalLM.from_pretrained(phi4_mini_model_path, torch_dtype="auto").to(device)
	phi4_mini_tokenizer = AutoTokenizer.from_pretrained(phi4_mini_model_path)

	@spaces.GPU(duration=60)
	def generate_response(user_message, model_name, max_tokens, temperature, top_k, top_p, repetition_penalty, history_state):
	if not user_message.strip():
	return history_state, history_state

	# Select models
	if model_name == "Phi-4":
	model = phi4_model
	tokenizer = phi4_tokenizer
	start_tag = "<\|im_start\|>"
	sep_tag = "<\|im_sep\|>"
	end_tag = "<\|im_end\|>"
	elif model_name == "Phi-4-mini-instruct":
	model = phi4_mini_model
	tokenizer = phi4_mini_tokenizer
	start_tag = ""
	sep_tag = ""
	end_tag = "<\|end\|>"
	else:
	raise ValueError("Error loading on models")

	# Recommended prompt settings by Microsoft
	system_message = "You are a friendly and knowledgeable assistant, here to help with any questions or tasks."
	if model_name == "Phi-4":
	prompt = f"{start_tag}system{sep_tag}{system_message}{end_tag}"
	for message in history_state:
	if message["role"] == "user":
	prompt += f"{start_tag}user{sep_tag}{message['content']}{end_tag}"
	elif message["role"] == "assistant" and message["content"]:
	prompt += f"{start_tag}assistant{sep_tag}{message['content']}{end_tag}"
	prompt += f"{start_tag}user{sep_tag}{user_message}{end_tag}{start_tag}assistant{sep_tag}"
	else:
	prompt = f"<\|system\|>{system_message}{end_tag}"
	for message in history_state:
	if message["role"] == "user":
	prompt += f"<\|user\|>{message['content']}{end_tag}"
	elif message["role"] == "assistant" and message["content"]:
	prompt += f"<\|assistant\|>{message['content']}{end_tag}"
	prompt += f"<\|user\|>{user_message}{end_tag}<\|assistant\|>"

	inputs = tokenizer(prompt, return_tensors="pt").to(device)

	do_sample = not (temperature == 1.0 and top_k >= 100 and top_p == 1.0)

	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)

	# sampling techniques
	generation_kwargs = {
	"input_ids": inputs["input_ids"],
	"attention_mask": inputs["attention_mask"],
	"max_new_tokens": int(max_tokens),
	"do_sample": do_sample,
	"temperature": temperature,
	"top_k": int(top_k),
	"top_p": top_p,
	"repetition_penalty": repetition_penalty,
	"streamer": streamer,
	}

	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	# Stream the response
	assistant_response = ""
	new_history = history_state + [
	{"role": "user", "content": user_message},
	{"role": "assistant", "content": ""}
	]
	for new_token in streamer:
	cleaned_token = new_token.replace("<\|im_start\|>", "").replace("<\|im_sep\|>", "").replace("<\|im_end\|>", "").replace("<\|end\|>", "").replace("<\|system\|>", "").replace("<\|user\|>", "").replace("<\|assistant\|>", "")
	assistant_response += cleaned_token
	new_history[-1]["content"] = assistant_response.strip()
	yield new_history, new_history

	yield new_history, new_history

	example_messages = {
	"Learn about physics": "Explain Newton’s laws of motion.",
	"Discover space facts": "What are some interesting facts about black holes?",
	"Write a factorial function": "Write a Python function to calculate the factorial of a number."
	}

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# Phi-4 Models Chatbot
	Welcome to the Phi-4 Chatbot! You can chat with Microsoft's Phi-4 or Phi-4-mini-instruct models. Adjust the settings on the left to customize the model's responses.
	"""
	)

	history_state = gr.State([])

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### Settings")
	model_dropdown = gr.Dropdown(
	choices=["Phi-4", "Phi-4-mini-instruct"],
	label="Select Model",
	value="Phi-4"
	)
	max_tokens_slider = gr.Slider(
	minimum=64,
	maximum=4096,
	step=50,
	value=512,
	label="Max Tokens"
	)
	with gr.Accordion("Advanced Settings", open=False):
	temperature_slider = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=1.0,
	label="Temperature"
	)
	top_k_slider = gr.Slider(
	minimum=1,
	maximum=100,
	step=1,
	value=50,
	label="Top-k"
	)
	top_p_slider = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.9,
	label="Top-p"
	)
	repetition_penalty_slider = gr.Slider(
	minimum=1.0,
	maximum=2.0,
	value=1.0,
	label="Repetition Penalty"
	)

	with gr.Column(scale=4):
	chatbot = gr.Chatbot(label="Chat", type="messages")
	with gr.Row():
	user_input = gr.Textbox(
	label="Your message",
	placeholder="Type your message here...",
	scale=3
	)
	submit_button = gr.Button("Send", variant="primary", scale=1)
	clear_button = gr.Button("Clear", scale=1)
	gr.Markdown("Try these examples:")
	with gr.Row():
	example1_button = gr.Button("Learn about physics")
	example2_button = gr.Button("Discover space facts")
	example3_button = gr.Button("Write a factorial function")

	submit_button.click(
	fn=generate_response,
	inputs=[user_input, model_dropdown, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider, repetition_penalty_slider, history_state],
	outputs=[chatbot, history_state]
	).then(
	fn=lambda: gr.update(value=""),
	inputs=None,
	outputs=user_input
	)

	clear_button.click(
	fn=lambda: ([], []),
	inputs=None,
	outputs=[chatbot, history_state]
	)

	example1_button.click(
	fn=lambda: gr.update(value=example_messages["Learn about physics"]),
	inputs=None,
	outputs=user_input
	)
	example2_button.click(
	fn=lambda: gr.update(value=example_messages["Discover space facts"]),
	inputs=None,
	outputs=user_input
	)
	example3_button.click(
	fn=lambda: gr.update(value=example_messages["Write a factorial function"]),
	inputs=None,
	outputs=user_input
	)

	demo.launch(ssr_mode=False)