How to use:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
# Load Base Model
base_model_id = "mistralai/Mistral-7B-v0.1"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config)
eval_tokenizer = AutoTokenizer.from_pretrained(
base_model_id,
add_bos_token=True,
trust_remote_code=True,
)
eval_tokenizer.pad_token = eval_tokenizer.eos_token
# Load Peft Weights
from peft import PeftModel
ft_model = PeftModel.from_pretrained(model, "mistral-samsum-finetune/checkpoint-150")
# Format the Sample Input
def formatting_func(example):
text = f"### Summarize this dialog:\n{example['dialogue']}\n### Summary:\n{example['summary']}"
return text
max_length = 256
eval_prompt = {'dialogue': "Amanda: I baked cookies. Do you want some? Jerry: Sure! Amanda: I'll bring you tomorrow :-)",
'summary': ''}
eval_prompt = formatting_func(eval_prompt)
# Generate summary for sample Input
model_input = eval_tokenizer(
eval_prompt,
truncation=True,
max_length=max_length,
padding="max_length",
return_tensors="pt").to("cuda")
ft_model.eval()
with torch.no_grad():
print(eval_tokenizer.decode(ft_model.generate(**model_input,
max_new_tokens=256,
repetition_penalty=1.15)[0],
skip_special_tokens=True))
# here is the output:
"""
### Summarize this dialog:
Amanda: I baked cookies. Do you want some? Jerry: Sure! Amanda: I'll bring you tomorrow :-)
### Summary:
Jerry will get some cookies from Amanda tomorrow.
"""
Inference Providers
NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API:
The model has no library tag.