|
from transformers import ( |
|
AutoModelForCausalLM, |
|
AutoTokenizer, |
|
) |
|
from peft import PeftModel, PeftConfig |
|
import torch |
|
|
|
orig_checkpoint = 'google/gemma-2b' |
|
checkpoint = '.' |
|
HF_TOKEN = '' |
|
PROMPT = 'Salut, ca sa imi schimb buletinul pot sa' |
|
|
|
seq_len = 256 |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(orig_checkpoint, token=HF_TOKEN) |
|
model = AutoModelForCausalLM.from_pretrained(orig_checkpoint, token=HF_TOKEN) |
|
|
|
|
|
model = PeftModel.from_pretrained(model, checkpoint) |
|
model.merge_and_unload() |
|
|
|
model = model.cuda() |
|
|
|
|
|
inputs = tokenizer.encode(PROMPT, return_tensors="pt").cuda() |
|
outputs = model.generate(inputs, max_new_tokens=seq_len) |
|
|
|
print(tokenizer.decode(outputs[0])) |