# -*- coding: utf-8 -*- from __future__ import annotations import fla # noqa from lm_eval.__main__ import cli_evaluate from lm_eval.api.registry import register_model from lm_eval.models.huggingface import HFLM from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig from hf_gpt.hf_model import HF_GPTForCausalLM from hf_gpt.hf_config import HFConfig import requests import wandb import lm_eval # from lm_eval.loggers import WandbLogger import argparse AutoConfig.register("hf_gpt",HFConfig) AutoModelForCausalLM.register(HFConfig,HF_GPTForCausalLM) import logging logging.basicConfig(level=logging.INFO) import torch import os import pdb import os os.environ['HF_HOME'] = '/lustre/fs8/portfolios/nvr/users/ahatamizadeh/hf_cache/' def is_directory_non_empty(directory): if not os.path.isdir(directory): return "The provided path is not a directory." return len(os.listdir(directory)) > 0 def main(args): ### First convert to Huggingface models when neccessary import datasets datasets.config.HF_DATASETS_TRUST_REMOTE_CODE = True hf_save_dir = args.hf_save_dir or os.path.dirname(args.ckpt_path) # for example: /lustre/fsw/portfolios/nvr/users/soyang/code/next_gen_llm-1/checkpoint/outputs/tsz512x4k_20B_Samba_421M_tsz512x4k_20B_Samba_421M_sy_stream_v11/iter-009198-ckpt.pth ckpt = torch.load(args.ckpt_path) print("Checkpoint loaded") hf_config = HFConfig(name=args.model_name) hf_model = HF_GPTForCausalLM(hf_config) model_weight = ckpt['model'] new_weight = {} for k, v in model_weight.items(): if 'wte' in k: new_weight[k.replace("wte", "embeddings")] = v elif 'beta_proj' in k: new_weight[k.replace("beta_proj", "b_proj")] = v elif 'bias_proj' in k: new_weight[k.replace("bias_proj", "b_proj")] = v else: new_weight[k] = v hf_model.load_state_dict(new_weight) tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, trust_remote_code=True) hf_model.save_pretrained(hf_save_dir) tokenizer.save_pretrained(hf_save_dir) print("Huggingface model saved") ### Then call lm_eval tasks = args.tasks.split(',') assert hf_save_dir is not None assert args.dtype in ['bfloat16', 'float32'] # wandb_logger = wandb.init(project="llm_next_gen", name=args.exp_name, id=args.exp_name, group=args.wandb_group_name) # or empty if wandb.init(...) already called before print("Start lm eval....") results = lm_eval.simple_evaluate( model="hf", model_args=f"pretrained={hf_save_dir},trust_remote_code=True,dtype={args.dtype}", tasks=tasks, device="cuda", log_samples=False, batch_size=1, num_fewshot=args.num_fewshot, )['results'] print('swde: {}'.format(results['swde']['contains,none'])) print('squad_completion: {}'.format(results['squad_completion']['contains,none'])) # print('mmlu: {}'.format(results['mmlu']['acc,none'])) print('piqa: {}'.format(results['piqa']['acc,none'])) print('hellaswag: {}'.format(results['hellaswag']['acc_norm,none'])) print('winogrande: {}'.format(results['winogrande']['acc,none'])) print('arc_easy: {}'.format(results['arc_easy']['acc,none'])) print('arc_challenge: {}'.format(results['arc_challenge']['acc_norm,none'])) print('wikitext, ppl: {}'.format(results['wikitext']['word_perplexity,none'])) print('lambada_openai, acc: {}'.format(results['lambada_openai']['acc,none'])) print('lambada_openai, ppl: {}'.format(results['lambada_openai']['perplexity,none'])) if __name__ == "__main__": parser = argparse.ArgumentParser(description='LLM Training') parser.add_argument('--ckpt_path', type=str, default=None, help='Path to the ckpt directory') parser.add_argument('--hf_save_dir', type=str, default=None, help='(Selective) Path to the saved HF model directory') parser.add_argument('--dtype', type=str, default='bfloat16', help='Data type to use for inference') parser.add_argument('--model_name', type=str, default='Samba_421M', help='Model name') parser.add_argument('--exp_name', type=str, default='hf_eval', help='Experiment name') parser.add_argument('--wandb_dir', type=str, default='/lustre/fsw/portfolios/nvr/users/soyang/code/next_gen_llm-1/checkpoint/outputs', help='Wandb directory') parser.add_argument('--wandb_group_name', type=str, default='lm-eval-harness', help='Wandb group name') parser.add_argument('--tasks', type=str, default='wikitext,lambada_openai,piqa,hellaswag,winogrande,arc_easy,arc_challenge,mmlu', help='Tasks to evaluate') parser.add_argument('--tokenizer_name', type=str, default="TinyLlama/TinyLlama_v1.1", help="tokenizer name or path") parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--num_fewshot', type=int, default=0) # do convert or not parser.add_argument('--skip_convert', action='store_true', help='Whether to convert to Huggingface model') args = parser.parse_args() main(args)