File size: 2,207 Bytes
ea87ea7
9ef92c0
4a75c30
9ef92c0
 
ea87ea7
11f3e7f
 
 
 
 
 
 
 
 
 
9ef92c0
 
59f90cb
e9d07b5
ea87ea7
 
59f90cb
11f3e7f
 
 
 
 
 
 
 
 
 
 
ea87ea7
11f3e7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ccb19a0
ea87ea7
6be107b
712a732
ea87ea7
11f3e7f
 
ea87ea7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr  # type: ignore

import spaces  # type: ignore
import torch


# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True
)

model.to("cuda")


def greet(name, sliderint):
    return "Hellonyaaaaa " + name + "!!" + str(sliderint)


chat_template = (
    "{% for message in messages %}"
    "{{'<|' + message['role'] + '|>'  + message['content']  + '\n'}}"
    "{% endfor %}"
    "{% if add_generation_prompt %}"
    "{{ '<|model|>\n' }}"
    "{% endif %}"
)


# @spaces.GPU(duration=45)
def chatinterface_fn(message, history):
    prompt = []
    for human, assistant in history:
        prompt.append({"role": "user", "content": human})
        prompt.append({"role": "model", "content": assistant})

    prompt.append({"role": "user", "content": message})

    token_ids = tokenizer.apply_chat_template(
        prompt,
        tokenize=True,
        add_generation_prompt=True,
        chat_template=chat_template,
        return_tensors="pt",
    )
    print("token_ids:", token_ids)  # デバッグ用に追加
    output_ids = model.generate(
        token_ids.to(model.device),
        temperature=0.1,
        do_sample=True,
        top_p=0.95,
        top_k=40,
        max_new_tokens=256,
    )
    text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    print(text)
    return text


@spaces.GPU(duration=45)
def infer(message: str) -> str:
    input_ids = tokenizer.encode(
        "hello, this is", add_special_tokens=False, return_tensors="pt"
    ).to(model.device)
    print(model.device)
    outputs = model.generate(input_ids)
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return text


with gr.Blocks() as demo:
    name = gr.Textbox(label="name")
    output = gr.Interface(fn=greet, inputs=["text", "slider"], outputs="text")
    a = gr.ChatInterface(chatinterface_fn, title="microsoft/Phi-3-mini-4k-instruct")
    b = gr.Interface(fn=infer, inputs="text", outputs="text")
demo.launch()