kurogane
/

Llama3-BioYouri-8B-instruct-chatvector-mergetest

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Llama3-BioYouri-8B-instruct-chatvector-mergetest / Merger.py

kurogane's picture

Upload Merger.py

1c786f8 verified 6 months ago

history blame contribute delete

3.09 kB

	#coding:utf-8

	import os
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
	from safetensors.torch import save_file, load_file

	DIR_CACHE = r"E:\llm_baack\cache"
	DIR_OFFLOAD = r"E:\llm_baack\offload"
	DIR_SAVE = r"E:\llm_baack\safetensors"

	for _dir in [DIR_CACHE, DIR_OFFLOAD, DIR_SAVE]:
	if not os.path.exists(_dir):
	os.makedirs(_dir)

	MODEL_SUBJ = "aaditya/Llama3-OpenBioLLM-8B"
	MODEL_VECTOR = "aixsatoshi/Llama-3-youko-8b-instruct-chatvector"
	MODEL_BASE = "NousResearch/Meta-Llama-3-8B"


	def download_model(model_name):
	s_name_offload = model_name.replace("/", "-")
	dir_offload = os.path.join(DIR_OFFLOAD, s_name_offload)
	if not os.path.exists(dir_offload):
	os.makedirs(dir_offload)

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	cache_dir=DIR_CACHE,
	torch_dtype=torch.bfloat16,
	device_map="cpu",
	offload_folder=dir_offload,
	offload_state_dict=True,
	trust_remote_code=True,
	)
	model.eval()
	model.hf_device_map

	model_state_dict = model.state_dict().copy()

	for key in model_state_dict.keys():
	model_value = model_state_dict[key].clone().to("cpu")
	print(key, model_value.dtype, model_value.shape, model_value)
	break

	s_name = model_name.replace("/", "-")
	dir_save_safe = os.path.join(DIR_SAVE, f"{s_name}.safetensors")
	save_file(model_state_dict, dir_save_safe)

	# modelを解放
	del model
	del model_state_dict

	return dir_save_safe, s_name


	DIR_MODEL_SUBJ, s_name_subj = download_model(MODEL_SUBJ)
	DIR_MODEL_VECTOR, s_name_vect = download_model(MODEL_VECTOR)
	DIR_MODEL_BASE, s_name_base = download_model(MODEL_BASE)


	d_state_subj = load_file(DIR_MODEL_SUBJ, device="cpu")
	d_state_vector = load_file(DIR_MODEL_VECTOR, device="cpu")
	new_state_dict = d_state_subj

	with torch.no_grad():
	for key in d_state_subj.keys():
	print(key)

	new_state_dict[key] = (
	new_state_dict[key].to("cuda") + d_state_vector[key].to("cuda")
	).to("cpu")

	new_state_dict
	del d_state_subj, d_state_vector
	torch.cuda.empty_cache()
	dir_save_subjpvect = os.path.join(DIR_SAVE, f"{s_name_subj}+{s_name_vect}.safetensors")
	save_file(new_state_dict, dir_save_subjpvect)

	# モデルの読み込み
	d_state_subj_subjpvect = load_file(dir_save_subjpvect, device="cpu")
	d_state_base = load_file(DIR_MODEL_BASE, device="cpu")

	# キー名が同じことを確認
	for key_subjpvect, key_base in zip(
	d_state_subj_subjpvect.keys(), d_state_base.keys()
	):
	assert key_subjpvect == key_base

	new_state_dict = d_state_subj_subjpvect

	with torch.no_grad():
	for key in new_state_dict.keys():
	print(key)

	new_state_dict[key] = (
	new_state_dict[key].to("cuda") - d_state_base[key].to("cuda")
	).to("cpu")

	new_state_dict
	save_file(new_state_dict, os.path.join(DIR_SAVE, f"{s_name_subj}+{s_name_vect}-{s_name_base}.safetensors"))