Trying to replicate this!
Hello!
I have been trying to replicate this for a while now, however the script is just not running at my end.
the error output is something like this -
File "/u/choprahetarth/all_files/model_merging/merger.py", line 22, in
run_merge(
File "/u/choprahetarth/all_files/model_merging/mergekit/mergekit/merge.py", line 95, in run_merge
for _task, value in exec.run(quiet=options.quiet):
File "/u/choprahetarth/all_files/model_merging/mergekit/mergekit/graph.py", line 197, in run
res = task.execute(**arguments)
File "/u/choprahetarth/all_files/model_merging/mergekit/mergekit/tokenizer.py", line 280, in execute
tokenizer, permutations = build_tokenizer(
File "/u/choprahetarth/all_files/model_merging/mergekit/mergekit/tokenizer.py", line 215, in build_tokenizer
tokenizer_out = build_union_tokenizer(
File "/u/choprahetarth/all_files/model_merging/mergekit/mergekit/tokenizer.py", line 155, in build_union_tokenizer
res = transformers.AutoTokenizer.from_pretrained(
File "/u/choprahetarth/.conda/envs/hello_py10/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 837, in from_pretrained
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
File "/u/choprahetarth/.conda/envs/hello_py10/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 2086, in from_pretrained
return cls._from_pretrained(
File "/u/choprahetarth/.conda/envs/hello_py10/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 2325, in _from_pretrained
tokenizer = cls(*init_inputs, **init_kwargs)
File "/u/choprahetarth/.conda/envs/hello_py10/lib/python3.10/site-packages/transformers/models/llama/tokenization_llama_fast.py", line 133, in init
super().init(
File "/u/choprahetarth/.conda/envs/hello_py10/lib/python3.10/site-packages/transformers/tokenization_utils_fast.py", line 117, in init
slow_tokenizer = self.slow_tokenizer_class(*args, **kwargs)
File "/u/choprahetarth/.conda/envs/hello_py10/lib/python3.10/site-packages/transformers/models/llama/tokenization_llama.py", line 182, in init
self.sp_model = self.get_spm_processor(kwargs.pop("from_slow", False))
File "/u/choprahetarth/.conda/envs/hello_py10/lib/python3.10/site-packages/transformers/models/llama/tokenization_llama.py", line 212, in get_spm_processor
with open(self.vocab_file, "rb") as f:
TypeError: expected str, bytes or os.PathLike object, not NoneType
any comments on what i should be doing?