|
import torch, transformers |
|
import sys, os |
|
sys.path.append( |
|
os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) |
|
from transformers import AutoModelForCausalLM,AutoTokenizer,LlamaTokenizer |
|
|
|
from yuan_moe_hf_model import YuanModel, YuanForCausalLM |
|
|
|
|
|
print("Creat tokenizer...") |
|
|
|
tokenizer = LlamaTokenizer.from_pretrained('/temp_data/LLM_test/MOE/moe_yuan', add_eos_token=False, add_bos_token=False, eos_token='<eod>') |
|
tokenizer.add_tokens(['<sep>', '<pad>', '<mask>', '<predict>', '<FIM_SUFFIX>', '<FIM_PREFIX>', '<FIM_MIDDLE>','<commit_before>','<commit_msg>','<commit_after>','<jupyter_start>','<jupyter_text>','<jupyter_code>','<jupyter_output>','<empty_output>'], special_tokens=True) |
|
|
|
|
|
print("Creat model...") |
|
model = YuanForCausalLM.from_pretrained('/temp_data/LLM_test/MOE/moe_yuan', torch_dtype=torch.bfloat16, trust_remote_code=True).to("cuda:0") |
|
|
|
|
|
|
|
|
|
generation_params = { |
|
"max_length": 1024, |
|
"top_k": 1, |
|
"top_p": 0.0, |
|
"temperature": 1.0 |
|
} |
|
|
|
inputs = tokenizer("如果你是一个算法工程师,让你写一个大模型相关的规划,你应该怎么写?", return_tensors="pt")["input_ids"].to("cuda:0") |
|
outputs = model.generate(inputs,do_sample=False,**generation_params) |
|
print(tokenizer.decode(outputs[0])) |
|
|