import torch | |
from peft import PeftModel | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model_id = "Qwen/Qwen2-7B-Instruct" | |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) | |
output_path = "Qwen2-7B-Instruct-Merged" | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
trust_remote_code=True, | |
).eval() | |
model = PeftModel.from_pretrained(model, model_id="./ckpt") | |
merged_model = model.merge_and_unload() | |
merged_model.save_pretrained( | |
output_path, max_shard_size="2048MB", safe_serialization=True | |
) | |