|
|
|
import logging |
|
import math |
|
from bisect import bisect_right |
|
from typing import List |
|
import torch |
|
from fvcore.common.param_scheduler import ( |
|
CompositeParamScheduler, |
|
ConstantParamScheduler, |
|
LinearParamScheduler, |
|
ParamScheduler, |
|
) |
|
|
|
try: |
|
from torch.optim.lr_scheduler import LRScheduler |
|
except ImportError: |
|
from torch.optim.lr_scheduler import _LRScheduler as LRScheduler |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class WarmupParamScheduler(CompositeParamScheduler): |
|
""" |
|
Add an initial warmup stage to another scheduler. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
scheduler: ParamScheduler, |
|
warmup_factor: float, |
|
warmup_length: float, |
|
warmup_method: str = "linear", |
|
rescale_interval: bool = False, |
|
): |
|
""" |
|
Args: |
|
scheduler: warmup will be added at the beginning of this scheduler |
|
warmup_factor: the factor w.r.t the initial value of ``scheduler``, e.g. 0.001 |
|
warmup_length: the relative length (in [0, 1]) of warmup steps w.r.t the entire |
|
training, e.g. 0.01 |
|
warmup_method: one of "linear" or "constant" |
|
rescale_interval: whether we will rescale the interval of the scheduler after |
|
warmup |
|
""" |
|
end_value = scheduler(warmup_length) |
|
start_value = warmup_factor * scheduler(0.0) |
|
if warmup_method == "constant": |
|
warmup = ConstantParamScheduler(start_value) |
|
elif warmup_method == "linear": |
|
warmup = LinearParamScheduler(start_value, end_value) |
|
else: |
|
raise ValueError("Unknown warmup method: {}".format(warmup_method)) |
|
super().__init__( |
|
[warmup, scheduler], |
|
interval_scaling=["rescaled", "rescaled" if rescale_interval else "fixed"], |
|
lengths=[warmup_length, 1 - warmup_length], |
|
) |
|
|
|
|
|
class LRMultiplier(LRScheduler): |
|
""" |
|
A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the |
|
learning rate of each param in the optimizer. |
|
Every step, the learning rate of each parameter becomes its initial value |
|
multiplied by the output of the given :class:`ParamScheduler`. |
|
|
|
The absolute learning rate value of each parameter can be different. |
|
This scheduler can be used as long as the relative scale among them do |
|
not change during training. |
|
|
|
Examples: |
|
:: |
|
LRMultiplier( |
|
opt, |
|
WarmupParamScheduler( |
|
MultiStepParamScheduler( |
|
[1, 0.1, 0.01], |
|
milestones=[60000, 80000], |
|
num_updates=90000, |
|
), 0.001, 100 / 90000 |
|
), |
|
max_iter=90000 |
|
) |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__( |
|
self, |
|
optimizer: torch.optim.Optimizer, |
|
multiplier: ParamScheduler, |
|
max_iter: int, |
|
last_iter: int = -1, |
|
): |
|
""" |
|
Args: |
|
optimizer, last_iter: See ``torch.optim.lr_scheduler.LRScheduler``. |
|
``last_iter`` is the same as ``last_epoch``. |
|
multiplier: a fvcore ParamScheduler that defines the multiplier on |
|
every LR of the optimizer |
|
max_iter: the total number of training iterations |
|
""" |
|
if not isinstance(multiplier, ParamScheduler): |
|
raise ValueError( |
|
"_LRMultiplier(multiplier=) must be an instance of fvcore " |
|
f"ParamScheduler. Got {multiplier} instead." |
|
) |
|
self._multiplier = multiplier |
|
self._max_iter = max_iter |
|
super().__init__(optimizer, last_epoch=last_iter) |
|
|
|
def state_dict(self): |
|
|
|
return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch} |
|
|
|
def get_lr(self) -> List[float]: |
|
multiplier = self._multiplier(self.last_epoch / self._max_iter) |
|
return [base_lr * multiplier for base_lr in self.base_lrs] |
|
|
|
|
|
""" |
|
Content below is no longer needed! |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WarmupMultiStepLR(LRScheduler): |
|
def __init__( |
|
self, |
|
optimizer: torch.optim.Optimizer, |
|
milestones: List[int], |
|
gamma: float = 0.1, |
|
warmup_factor: float = 0.001, |
|
warmup_iters: int = 1000, |
|
warmup_method: str = "linear", |
|
last_epoch: int = -1, |
|
): |
|
logger.warning( |
|
"WarmupMultiStepLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!" |
|
) |
|
if not list(milestones) == sorted(milestones): |
|
raise ValueError( |
|
"Milestones should be a list of" " increasing integers. Got {}", milestones |
|
) |
|
self.milestones = milestones |
|
self.gamma = gamma |
|
self.warmup_factor = warmup_factor |
|
self.warmup_iters = warmup_iters |
|
self.warmup_method = warmup_method |
|
super().__init__(optimizer, last_epoch) |
|
|
|
def get_lr(self) -> List[float]: |
|
warmup_factor = _get_warmup_factor_at_iter( |
|
self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor |
|
) |
|
return [ |
|
base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) |
|
for base_lr in self.base_lrs |
|
] |
|
|
|
def _compute_values(self) -> List[float]: |
|
|
|
return self.get_lr() |
|
|
|
|
|
class WarmupCosineLR(LRScheduler): |
|
def __init__( |
|
self, |
|
optimizer: torch.optim.Optimizer, |
|
max_iters: int, |
|
warmup_factor: float = 0.001, |
|
warmup_iters: int = 1000, |
|
warmup_method: str = "linear", |
|
last_epoch: int = -1, |
|
): |
|
logger.warning( |
|
"WarmupCosineLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!" |
|
) |
|
self.max_iters = max_iters |
|
self.warmup_factor = warmup_factor |
|
self.warmup_iters = warmup_iters |
|
self.warmup_method = warmup_method |
|
super().__init__(optimizer, last_epoch) |
|
|
|
def get_lr(self) -> List[float]: |
|
warmup_factor = _get_warmup_factor_at_iter( |
|
self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor |
|
) |
|
|
|
|
|
|
|
|
|
|
|
return [ |
|
base_lr |
|
* warmup_factor |
|
* 0.5 |
|
* (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters)) |
|
for base_lr in self.base_lrs |
|
] |
|
|
|
def _compute_values(self) -> List[float]: |
|
|
|
return self.get_lr() |
|
|
|
|
|
def _get_warmup_factor_at_iter( |
|
method: str, iter: int, warmup_iters: int, warmup_factor: float |
|
) -> float: |
|
""" |
|
Return the learning rate warmup factor at a specific iteration. |
|
See :paper:`ImageNet in 1h` for more details. |
|
|
|
Args: |
|
method (str): warmup method; either "constant" or "linear". |
|
iter (int): iteration at which to calculate the warmup factor. |
|
warmup_iters (int): the number of warmup iterations. |
|
warmup_factor (float): the base warmup factor (the meaning changes according |
|
to the method used). |
|
|
|
Returns: |
|
float: the effective warmup factor at the given iteration. |
|
""" |
|
if iter >= warmup_iters: |
|
return 1.0 |
|
|
|
if method == "constant": |
|
return warmup_factor |
|
elif method == "linear": |
|
alpha = iter / warmup_iters |
|
return warmup_factor * (1 - alpha) + alpha |
|
else: |
|
raise ValueError("Unknown warmup method: {}".format(method)) |
|
|