Spaces:
Running
Running
File size: 2,927 Bytes
94f372a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import math
class WarmupLR:
"""
Linear Warmup learning rate scheduler. After warmup, learning rate is
constant.
Args:
optimizer (torch.optim.Optimizer): optimizer
warmup_steps (int): number of warmup steps
"""
def __init__(self, optimizer, warmup_steps):
self.optimizer = optimizer
self.warmup_steps = warmup_steps
self.base_lr = None
def get_lr(self, lr, step):
return lr * min(step / max(self.warmup_steps, 1), 1.0)
def step(self, step):
if self.base_lr is None:
self.base_lr = [
param_group["lr"] for param_group in self.optimizer.param_groups
]
for param_group, base_lr_group in zip(
self.optimizer.param_groups, self.base_lr
):
param_group["lr"] = self.get_lr(base_lr_group, step)
def state_dict(self):
return {
key: value for key, value in self.__dict__.items() if key != "optimizer"
}
def load_state_dict(self, state_dict):
self.__dict__.update(state_dict)
class WarmupCosineDecayLR:
"""
Linear Warmup learning rate scheduler. After warmup, learning rate is
constant.
After warmup, learning rate follows a cosine decay.
Args:
optimizer (torch.optim.Optimizer): optimizer
warmup_steps (int): number of warmup steps
total_steps (int): total number of steps
rate (float): cosine decay rate
"""
def __init__(self, optimizer, warmup_steps, total_steps, rate=1.0):
self.optimizer = optimizer
self.warmup_steps = warmup_steps
self.base_lr = None
self.total_steps = total_steps
self.rate = rate
def get_lr(self, lr, step):
if step < self.warmup_steps:
return lr * min(step / max(self.warmup_steps, 1), 1.0)
else:
return (
0.5
* lr
* (
1
+ math.cos(
self.rate
* math.pi
* (step - self.warmup_steps)
/ (self.total_steps - self.warmup_steps)
)
)
)
def step(self, step):
if self.base_lr is None:
self.base_lr = [
param_group["lr"] for param_group in self.optimizer.param_groups
]
for param_group, base_lr_group in zip(
self.optimizer.param_groups, self.base_lr
):
param_group["lr"] = self.get_lr(base_lr_group, step)
def state_dict(self):
return {
key: value for key, value in self.__dict__.items() if key != "optimizer"
}
def load_state_dict(self, state_dict):
self.__dict__.update(state_dict)
|