Spaces:
Sleeping
Sleeping
File size: 2,207 Bytes
ea87ea7 9ef92c0 4a75c30 9ef92c0 ea87ea7 11f3e7f 9ef92c0 59f90cb e9d07b5 ea87ea7 59f90cb 11f3e7f ea87ea7 11f3e7f ccb19a0 ea87ea7 6be107b 712a732 ea87ea7 11f3e7f ea87ea7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import gradio as gr # type: ignore
import spaces # type: ignore
import torch
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained(
"microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True
)
model.to("cuda")
def greet(name, sliderint):
return "Hellonyaaaaa " + name + "!!" + str(sliderint)
chat_template = (
"{% for message in messages %}"
"{{'<|' + message['role'] + '|>' + message['content'] + '\n'}}"
"{% endfor %}"
"{% if add_generation_prompt %}"
"{{ '<|model|>\n' }}"
"{% endif %}"
)
# @spaces.GPU(duration=45)
def chatinterface_fn(message, history):
prompt = []
for human, assistant in history:
prompt.append({"role": "user", "content": human})
prompt.append({"role": "model", "content": assistant})
prompt.append({"role": "user", "content": message})
token_ids = tokenizer.apply_chat_template(
prompt,
tokenize=True,
add_generation_prompt=True,
chat_template=chat_template,
return_tensors="pt",
)
print("token_ids:", token_ids) # デバッグ用に追加
output_ids = model.generate(
token_ids.to(model.device),
temperature=0.1,
do_sample=True,
top_p=0.95,
top_k=40,
max_new_tokens=256,
)
text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(text)
return text
@spaces.GPU(duration=45)
def infer(message: str) -> str:
input_ids = tokenizer.encode(
"hello, this is", add_special_tokens=False, return_tensors="pt"
).to(model.device)
print(model.device)
outputs = model.generate(input_ids)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return text
with gr.Blocks() as demo:
name = gr.Textbox(label="name")
output = gr.Interface(fn=greet, inputs=["text", "slider"], outputs="text")
a = gr.ChatInterface(chatinterface_fn, title="microsoft/Phi-3-mini-4k-instruct")
b = gr.Interface(fn=infer, inputs="text", outputs="text")
demo.launch()
|