Spaces:
Sleeping
Sleeping
import gradio as gr # type: ignore | |
import spaces # type: ignore | |
# Load model directly | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
tokenizer = AutoTokenizer.from_pretrained( | |
"microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
"microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True | |
) | |
def greet(name, sliderint): | |
return "Hellonyaaaaa " + name + "!!" + str(sliderint) | |
chat_template = ( | |
"{% for message in messages %}" | |
"{{'<|' + message['role'] + '|>' + message['content'] + '\n'}}" | |
"{% endfor %}" | |
"{% if add_generation_prompt %}" | |
"{{ '<|model|>\n' }}" | |
"{% endif %}" | |
) | |
# @spaces.GPU(duration=45) | |
def chatinterface_fn(message, history): | |
prompt = [] | |
for human, assistant in history: | |
prompt.append({"role": "user", "content": human}) | |
prompt.append({"role": "model", "content": assistant}) | |
prompt.append({"role": "user", "content": message}) | |
token_ids = tokenizer.apply_chat_template( | |
prompt, | |
tokenize=True, | |
add_generation_prompt=True, | |
chat_template=chat_template, | |
return_tensors="pt", | |
) | |
print("token_ids:", token_ids) # γγγγ°η¨γ«θΏ½ε | |
output_ids = model.generate( | |
token_ids.to(model.device), | |
temperature=0.1, | |
do_sample=True, | |
top_p=0.95, | |
top_k=40, | |
max_new_tokens=256, | |
) | |
text = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
print(text) | |
return text | |
def infer(message: str) -> str: | |
input_ids = tokenizer.encode( | |
"hello, this is", add_special_tokens=False, return_tensors="pt" | |
).to(model.device) | |
print(model.device) | |
outputs = model.generate(input_ids) | |
text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return text | |
with gr.Blocks() as demo: | |
name = gr.Textbox(label="name") | |
output = gr.Interface(fn=greet, inputs=["text", "slider"], outputs="text") | |
a = gr.ChatInterface(chatinterface_fn, title="microsoft/Phi-3-mini-4k-instruct") | |
b = gr.Interface(fn=infer, inputs="text", outputs="text") | |
demo.launch() | |