Spaces:
Runtime error
Runtime error
File size: 8,840 Bytes
f549064 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
# Copyright (c) OpenMMLab. All right reserved.
import re
from collections import OrderedDict
from typing import List, Optional
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcls.registry import MODELS
from mmcls.structures import ClsDataSample
from .base import BaseClassifier
@MODELS.register_module()
class HuggingFaceClassifier(BaseClassifier):
"""Image classifiers for HuggingFace model.
This class accepts all positional and keyword arguments of the API
``from_pretrained`` (when ``pretrained=True``) and ``from_config`` (when
``pretrained=False``) of `transformers.AutoModelForImageClassification`_
and use it to create a model from hugging-face.
It can load checkpoints of hugging-face directly, and the saved checkpoints
also can be directly load by hugging-face.
Please confirm that you have installed ``transfromers`` if you want to use it.
.. _transformers.AutoModelForImageClassification:
https://huggingface.co./docs/transformers/main/en/model_doc/auto#transformers.AutoModelForImageClassification
Args:
model_name (str): The name of the model to use in hugging-face.
pretrained (bool): Whether to load pretrained checkpoint from
hugging-face. Defaults to False.
*args: Other positional arguments of the method
`from_pretrained` or `from_config`.
loss (dict): Config of classification loss. Defaults to
``dict(type='CrossEntropyLoss', loss_weight=1.0)``.
train_cfg (dict, optional): The training setting. The acceptable
fields are:
- augments (List[dict]): The batch augmentation methods to use.
More details can be found in :mod:`mmcls.model.utils.augment`.
Defaults to None.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Defaults to False.
data_preprocessor (dict, optional): The config for preprocessing input
data. If None or no specified type, it will use
"ClsDataPreprocessor" as type. See :class:`ClsDataPreprocessor` for
more details. Defaults to None.
init_cfg (dict, optional): the config to control the initialization.
Defaults to None.
**kwargs: Other keyword arguments of the method
`from_pretrained` or `from_config`.
Examples:
>>> import torch
>>> from mmcls.models import build_classifier
>>> cfg = dict(type='HuggingFaceClassifier', model_name='microsoft/resnet-50', pretrained=True)
>>> model = build_classifier(cfg)
>>> inputs = torch.rand(1, 3, 224, 224)
>>> out = model(inputs)
>>> print(out.shape)
torch.Size([1, 1000])
""" # noqa: E501
def __init__(self,
model_name,
pretrained=False,
*model_args,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
train_cfg: Optional[dict] = None,
with_cp: bool = False,
data_preprocessor: Optional[dict] = None,
init_cfg: Optional[dict] = None,
**kwargs):
if data_preprocessor is None:
data_preprocessor = {}
# The build process is in MMEngine, so we need to add scope here.
data_preprocessor.setdefault('type', 'mmcls.ClsDataPreprocessor')
if train_cfg is not None and 'augments' in train_cfg:
# Set batch augmentations by `train_cfg`
data_preprocessor['batch_augments'] = train_cfg
super().__init__(
init_cfg=init_cfg, data_preprocessor=data_preprocessor)
from transformers import AutoConfig, AutoModelForImageClassification
if pretrained:
self.model = AutoModelForImageClassification.from_pretrained(
model_name, *model_args, **kwargs)
else:
config = AutoConfig.from_pretrained(model_name, *model_args,
**kwargs)
self.model = AutoModelForImageClassification.from_config(config)
if not isinstance(loss, nn.Module):
loss = MODELS.build(loss)
self.loss_module = loss
self.with_cp = with_cp
if self.with_cp:
self.model.gradient_checkpointing_enable()
self._register_state_dict_hook(self._remove_state_dict_prefix)
self._register_load_state_dict_pre_hook(self._add_state_dict_prefix)
def forward(self, inputs, data_samples=None, mode='tensor'):
if mode == 'tensor':
return self.model(inputs).logits
elif mode == 'loss':
return self.loss(inputs, data_samples)
elif mode == 'predict':
return self.predict(inputs, data_samples)
else:
raise RuntimeError(f'Invalid mode "{mode}".')
def extract_feat(self, inputs: torch.Tensor):
raise NotImplementedError(
"The HuggingFaceClassifier doesn't support extract feature yet.")
def loss(self, inputs: torch.Tensor, data_samples: List[ClsDataSample],
**kwargs):
"""Calculate losses from a batch of inputs and data samples.
Args:
inputs (torch.Tensor): The input tensor with shape
(N, C, ...) in general.
data_samples (List[ClsDataSample]): The annotation data of
every samples.
**kwargs: Other keyword arguments of the loss module.
Returns:
dict[str, Tensor]: a dictionary of loss components
"""
# The part can be traced by torch.fx
cls_score = self.model(inputs).logits
# The part can not be traced by torch.fx
losses = self._get_loss(cls_score, data_samples, **kwargs)
return losses
def _get_loss(self, cls_score: torch.Tensor,
data_samples: List[ClsDataSample], **kwargs):
"""Unpack data samples and compute loss."""
# Unpack data samples and pack targets
if 'score' in data_samples[0].gt_label:
# Batch augmentation may convert labels to one-hot format scores.
target = torch.stack([i.gt_label.score for i in data_samples])
else:
target = torch.cat([i.gt_label.label for i in data_samples])
# compute loss
losses = dict()
loss = self.loss_module(
cls_score, target, avg_factor=cls_score.size(0), **kwargs)
losses['loss'] = loss
return losses
def predict(self,
inputs: torch.Tensor,
data_samples: Optional[List[ClsDataSample]] = None):
"""Predict results from a batch of inputs.
Args:
inputs (torch.Tensor): The input tensor with shape
(N, C, ...) in general.
data_samples (List[ClsDataSample], optional): The annotation
data of every samples. Defaults to None.
Returns:
List[ClsDataSample]: The prediction results.
"""
# The part can be traced by torch.fx
cls_score = self.model(inputs).logits
# The part can not be traced by torch.fx
predictions = self._get_predictions(cls_score, data_samples)
return predictions
def _get_predictions(self, cls_score, data_samples):
"""Post-process the output of head.
Including softmax and set ``pred_label`` of data samples.
"""
pred_scores = F.softmax(cls_score, dim=1)
pred_labels = pred_scores.argmax(dim=1, keepdim=True).detach()
if data_samples is not None:
for data_sample, score, label in zip(data_samples, pred_scores,
pred_labels):
data_sample.set_pred_score(score).set_pred_label(label)
else:
data_samples = []
for score, label in zip(pred_scores, pred_labels):
data_samples.append(ClsDataSample().set_pred_score(
score).set_pred_label(label))
return data_samples
@staticmethod
def _remove_state_dict_prefix(self, state_dict, prefix, local_metadata):
new_state_dict = OrderedDict()
for k, v in state_dict.items():
new_key = re.sub(f'^{prefix}model.', prefix, k)
new_state_dict[new_key] = v
return new_state_dict
@staticmethod
def _add_state_dict_prefix(state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
new_prefix = prefix + 'model.'
for k in list(state_dict.keys()):
new_key = re.sub(f'^{prefix}', new_prefix, k)
state_dict[new_key] = state_dict[k]
del state_dict[k]
|