Upload model
Browse files- config.json +1 -1
- generation_config.json +1 -1
- modelling_multi.py +5 -4
config.json
CHANGED
@@ -2248,5 +2248,5 @@
|
|
2248 |
"model_type": "vision-encoder-decoder",
|
2249 |
"tie_word_embeddings": false,
|
2250 |
"torch_dtype": "float32",
|
2251 |
-
"transformers_version": "4.
|
2252 |
}
|
|
|
2248 |
"model_type": "vision-encoder-decoder",
|
2249 |
"tie_word_embeddings": false,
|
2250 |
"torch_dtype": "float32",
|
2251 |
+
"transformers_version": "4.41.2"
|
2252 |
}
|
generation_config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
"pad_token_id": 0,
|
4 |
-
"transformers_version": "4.
|
5 |
}
|
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
"pad_token_id": 0,
|
4 |
+
"transformers_version": "4.41.2"
|
5 |
}
|
modelling_multi.py
CHANGED
@@ -6,11 +6,11 @@ import transformers
|
|
6 |
from torch.nn import CrossEntropyLoss
|
7 |
from transformers import PreTrainedTokenizerFast, VisionEncoderDecoderModel
|
8 |
from transformers.configuration_utils import PretrainedConfig
|
9 |
-
from transformers.modeling_outputs import
|
10 |
-
Seq2SeqLMOutput)
|
11 |
from transformers.modeling_utils import PreTrainedModel
|
12 |
-
from transformers.models.vision_encoder_decoder.configuration_vision_encoder_decoder import
|
13 |
-
VisionEncoderDecoderConfig
|
|
|
14 |
from transformers.utils import logging
|
15 |
|
16 |
logger = logging.get_logger(__name__)
|
@@ -55,6 +55,7 @@ class MultiCvtWithProjectionHead(transformers.CvtPreTrainedModel):
|
|
55 |
pixel_values: Optional[torch.Tensor] = None,
|
56 |
output_hidden_states: Optional[bool] = None,
|
57 |
return_dict: Optional[bool] = None,
|
|
|
58 |
) -> Union[Tuple, ModelOutput]:
|
59 |
|
60 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
|
|
6 |
from torch.nn import CrossEntropyLoss
|
7 |
from transformers import PreTrainedTokenizerFast, VisionEncoderDecoderModel
|
8 |
from transformers.configuration_utils import PretrainedConfig
|
9 |
+
from transformers.modeling_outputs import BaseModelOutput, ModelOutput, Seq2SeqLMOutput
|
|
|
10 |
from transformers.modeling_utils import PreTrainedModel
|
11 |
+
from transformers.models.vision_encoder_decoder.configuration_vision_encoder_decoder import (
|
12 |
+
VisionEncoderDecoderConfig,
|
13 |
+
)
|
14 |
from transformers.utils import logging
|
15 |
|
16 |
logger = logging.get_logger(__name__)
|
|
|
55 |
pixel_values: Optional[torch.Tensor] = None,
|
56 |
output_hidden_states: Optional[bool] = None,
|
57 |
return_dict: Optional[bool] = None,
|
58 |
+
output_attentions: Optional[bool] = None,
|
59 |
) -> Union[Tuple, ModelOutput]:
|
60 |
|
61 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|