Zero-Shot Image Classification
Transformers
Safetensors
siglip
vision
Inference Endpoints

Error while loading processor: TypeError: expected str, bytes or os.PathLike object, not NoneType

#2
by armamut - opened

I'm getting an error while loading processor.

from transformers import SiglipProcessor, SiglipModel
device = "cuda" # the device to load the model onto

ckpt = "google/siglip2-base-patch16-224"
processor = SiglipProcessor.from_pretrained(ckpt, trust_remote_code=True)
The tokenizer class you load from this checkpoint is 'GemmaTokenizer'. 
The class this function is called from is 'SiglipTokenizer'.
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[1], line 5
      2 device = "cuda" # the device to load the model onto
      4 ckpt = "google/siglip2-base-patch16-224"
----> 5 processor = SiglipProcessor.from_pretrained(ckpt, trust_remote_code=True)

File /opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py:1070, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
   1067 if token is not None:
   1068     kwargs["token"] = token
-> 1070 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
   1071 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
   1073 return cls.from_args_and_dict(args, processor_dict, **kwargs)

File /opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py:1116, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
   1113     else:
   1114         attribute_class = getattr(transformers_module, class_name)
-> 1116     args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
   1117 return args

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2052, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)
   2049     else:
   2050         logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2052 return cls._from_pretrained(
   2053     resolved_vocab_files,
   2054     pretrained_model_name_or_path,
   2055     init_configuration,
   2056     *init_inputs,
   2057     token=token,
   2058     cache_dir=cache_dir,
   2059     local_files_only=local_files_only,
   2060     _commit_hash=commit_hash,
   2061     _is_local=is_local,
   2062     trust_remote_code=trust_remote_code,
   2063     **kwargs,
   2064 )

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2292, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, trust_remote_code, *init_inputs, **kwargs)
   2290 # Instantiate the tokenizer.
   2291 try:
-> 2292     tokenizer = cls(*init_inputs, **init_kwargs)
   2293 except import_protobuf_decode_error():
   2294     logger.info(
   2295         "Unable to load tokenizer model from SPM, loading from TikToken will be attempted instead."
   2296         "(Google protobuf error: Tried to load SPM model with non-SPM vocab file).",
   2297     )

File /opt/conda/lib/python3.10/site-packages/transformers/models/siglip/tokenization_siglip.py:123, in SiglipTokenizer.__init__(self, vocab_file, eos_token, unk_token, pad_token, additional_special_tokens, sp_model_kwargs, model_max_length, do_lower_case, **kwargs)
    120 self.do_lower_case = do_lower_case
    121 self.vocab_file = vocab_file
--> 123 self.sp_model = self.get_spm_processor()
    124 self.vocab_file = vocab_file
    126 super().__init__(
    127     eos_token=eos_token,
    128     unk_token=unk_token,
   (...)
    134     **kwargs,
    135 )

File /opt/conda/lib/python3.10/site-packages/transformers/models/siglip/tokenization_siglip.py:139, in SiglipTokenizer.get_spm_processor(self)
    137 def get_spm_processor(self):
    138     tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
--> 139     with open(self.vocab_file, "rb") as f:
    140         sp_model = f.read()
    141         model_pb2 = import_protobuf()

TypeError: expected str, bytes or os.PathLike object, not NoneType```
armamut changed discussion title from Error while loading model TypeError: expected str, bytes or os.PathLike object, not NoneType to Error while loading processor: TypeError: expected str, bytes or os.PathLike object, not NoneType

I have the same issue using the AutoModel and AutoProcessor classes (both from transformers-4.49.0 package)

You need to install the latest version of transformers, for now pip install git+https://github.com/huggingface/transformers

pip install transformers==4.49.0 does not work.

@menglan There is no pypi package compatible with siglip2 checkpoints. Please use the latest version installed directly from the github, as mentioned above

I had this same issue and was just wondering when siglip2 checkpoints will be available as a pypi release?

Edit: I was using this and using the section Using the model yourself. I installed the version from github but that gives another error. Have pasted it below:

Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no padding.
Traceback (most recent call last):
  File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 777, in convert_to_tensors
    tensor = as_tensor(value)
  File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 739, in as_tensor
    return torch.tensor(value)
ValueError: expected sequence of length 10 at dim 1 (got 9)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/rsp/data/ML/sarthakExperiments/test/test.py", line 16, in <module>
    inputs = processor(text = texts,
  File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/models/siglip/processing_siglip.py", line 108, in __call__
    encoding = self.tokenizer(
  File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 2877, in __call__
    encodings = self._call_one(text=text, text_pair=text_pair, **all_kwargs)
  File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 2965, in _call_one
    return self.batch_encode_plus(
  File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 3167, in batch_encode_plus
    return self._batch_encode_plus(
  File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_fast.py", line 587, in _batch_encode_plus
    return BatchEncoding(sanitized_tokens, sanitized_encodings, tensor_type=return_tensors)
  File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 241, in __init__
    self.convert_to_tensors(tensor_type=tensor_type, prepend_batch_axis=pre

Just change AutoProcessor to AutoImageProcessor.

@kurisu0306 Tried that and it worked but the inputs only embed the image and not the texts which throws another error
Code:

inputs = processor(text = texts,
                   images = image,
                   padding = "max_length", 
                   return_tensors = "pt").to('cuda')

Then I pass these inputs to the model as:

with torch.no_grad():
    with torch.autocast('cuda'):
        outputs = model(**inputs)

But it throws the error:

.......
958, in forward
    raise ValueError("You have to specify input_ids")
ValueError: You have to specify input_ids

On having a look at the output after passing through the processor, this is what inputs has:

(Pdb) inputs.keys()
dict_keys(['pixel_values'])
(Pdb) inputs['pixel_values'].shape
torch.Size([1, 3, 224, 224])

Now it makes sense that my input image got encoded but shouldn't the texts be encoded as well? Maybe that is what is throwing that error?

Sign up or log in to comment