|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from peft import PeftModel, PeftConfig |
|
import torch |
|
|
|
|
|
BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" |
|
PEFT_MODEL = "Aleks84/autotrain-5zwfh-fm328" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
BASE_MODEL, |
|
torch_dtype=torch.float32, |
|
trust_remote_code=True, |
|
low_cpu_mem_usage=True |
|
) |
|
|
|
|
|
model.resize_token_embeddings(len(tokenizer)) |
|
original_embeddings = model.get_input_embeddings().weight.data |
|
new_embeddings = original_embeddings[:len(tokenizer), :] |
|
model.get_input_embeddings().weight.data = new_embeddings |
|
model.lm_head.weight.data = new_embeddings |
|
|
|
|
|
peft_config = PeftConfig.from_pretrained(PEFT_MODEL) |
|
model = PeftModel.from_pretrained( |
|
model, |
|
PEFT_MODEL, |
|
config=peft_config, |
|
adapter_name="default", |
|
strict=False |
|
) |
|
|
|
|
|
model = model.merge_and_unload() |
|
model.eval() |
|
|
|
def generate_response(message, history): |
|
|
|
prompt = "<|im_start|>system\nОтветь как помощник<|im_end|>\n" |
|
for user, assistant in history: |
|
prompt += f"<|im_start|>user\n{user}<|im_end|>\n" |
|
prompt += f"<|im_start|>assistant\n{assistant}<|im_end|>\n" |
|
prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
|
|
outputs = model.generate( |
|
inputs.input_ids, |
|
max_new_tokens=128, |
|
temperature=0.7, |
|
repetition_penalty=1.1, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
return tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True) |
|
|
|
gr.ChatInterface( |
|
fn=generate_response, |
|
title="DeepSeek Assistant", |
|
examples=["Как работает ИИ?", "Напиши код на Python"], |
|
theme=gr.themes.Soft() |
|
).launch() |