m2m100_418M_br_fr / train_config.toml
lgrobol's picture
doc updates
d845ecf
raw
history blame
347 Bytes
type = "mbart"
[task]
change_ratio = 0.3
denoise_langs = []
poisson_lambda = 3.0
source_langs = ["br"]
target_langs = ["fr"]
[tuning]
batch_size = 16
betas = [0.9, 0.999]
epsilon = 1e-8
learning_rate = 5e-5
gradient_clipping = 1.0
# Uncomment these for a more complex training setup
lr_decay_steps = -1
warmup_steps = 1024
# weight_decay = 1e-5