coqui / last-checkpoint /trainer_state.json
dq158's picture
Training in progress, epoch 3, checkpoint
e6488f2
raw
history blame contribute delete
No virus
31.1 kB
{
"best_metric": 2.873922109603882,
"best_model_checkpoint": "dq158/coqui/checkpoint-40162",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 120486,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 8e-05,
"loss": 2.9713,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 7.99999659782192e-05,
"loss": 2.9968,
"step": 1000
},
{
"epoch": 0.04,
"learning_rate": 7.999986391293467e-05,
"loss": 3.1051,
"step": 1500
},
{
"epoch": 0.05,
"learning_rate": 7.999969380432003e-05,
"loss": 3.0623,
"step": 2000
},
{
"epoch": 0.06,
"learning_rate": 7.999945565266465e-05,
"loss": 3.0735,
"step": 2500
},
{
"epoch": 0.07,
"learning_rate": 7.999914945837365e-05,
"loss": 3.0149,
"step": 3000
},
{
"epoch": 0.09,
"learning_rate": 7.999877522196789e-05,
"loss": 2.9847,
"step": 3500
},
{
"epoch": 0.1,
"learning_rate": 7.999833294408398e-05,
"loss": 3.1534,
"step": 4000
},
{
"epoch": 0.11,
"learning_rate": 7.999782262547429e-05,
"loss": 2.9796,
"step": 4500
},
{
"epoch": 0.12,
"learning_rate": 7.999724426700689e-05,
"loss": 2.984,
"step": 5000
},
{
"epoch": 0.14,
"learning_rate": 7.999659786966562e-05,
"loss": 2.9924,
"step": 5500
},
{
"epoch": 0.15,
"learning_rate": 7.99958834345501e-05,
"loss": 2.9193,
"step": 6000
},
{
"epoch": 0.16,
"learning_rate": 7.99951009628756e-05,
"loss": 3.0217,
"step": 6500
},
{
"epoch": 0.17,
"learning_rate": 7.999425045597321e-05,
"loss": 3.1339,
"step": 7000
},
{
"epoch": 0.19,
"learning_rate": 7.999333191528969e-05,
"loss": 3.0685,
"step": 7500
},
{
"epoch": 0.2,
"learning_rate": 7.999234534238758e-05,
"loss": 3.0571,
"step": 8000
},
{
"epoch": 0.21,
"learning_rate": 7.999129073894513e-05,
"loss": 3.1052,
"step": 8500
},
{
"epoch": 0.22,
"learning_rate": 7.999016810675628e-05,
"loss": 3.052,
"step": 9000
},
{
"epoch": 0.24,
"learning_rate": 7.998897744773075e-05,
"loss": 3.0529,
"step": 9500
},
{
"epoch": 0.25,
"learning_rate": 7.998771876389398e-05,
"loss": 3.1207,
"step": 10000
},
{
"epoch": 0.26,
"learning_rate": 7.998639205738706e-05,
"loss": 3.0854,
"step": 10500
},
{
"epoch": 0.27,
"learning_rate": 7.998499733046688e-05,
"loss": 3.0566,
"step": 11000
},
{
"epoch": 0.29,
"learning_rate": 7.998353458550596e-05,
"loss": 3.0659,
"step": 11500
},
{
"epoch": 0.3,
"learning_rate": 7.998200382499256e-05,
"loss": 2.9975,
"step": 12000
},
{
"epoch": 0.31,
"learning_rate": 7.998040505153066e-05,
"loss": 3.0442,
"step": 12500
},
{
"epoch": 0.32,
"learning_rate": 7.997873826783991e-05,
"loss": 3.0385,
"step": 13000
},
{
"epoch": 0.34,
"learning_rate": 7.997700347675568e-05,
"loss": 3.0444,
"step": 13500
},
{
"epoch": 0.35,
"learning_rate": 7.997520068122894e-05,
"loss": 3.042,
"step": 14000
},
{
"epoch": 0.36,
"learning_rate": 7.997332988432647e-05,
"loss": 3.0603,
"step": 14500
},
{
"epoch": 0.37,
"learning_rate": 7.997139108923062e-05,
"loss": 3.0842,
"step": 15000
},
{
"epoch": 0.39,
"learning_rate": 7.996938429923948e-05,
"loss": 3.0216,
"step": 15500
},
{
"epoch": 0.4,
"learning_rate": 7.996730951776675e-05,
"loss": 2.977,
"step": 16000
},
{
"epoch": 0.41,
"learning_rate": 7.996516674834186e-05,
"loss": 3.1044,
"step": 16500
},
{
"epoch": 0.42,
"learning_rate": 7.996295599460983e-05,
"loss": 3.0355,
"step": 17000
},
{
"epoch": 0.44,
"learning_rate": 7.996067726033133e-05,
"loss": 2.9189,
"step": 17500
},
{
"epoch": 0.45,
"learning_rate": 7.99583305493827e-05,
"loss": 3.0116,
"step": 18000
},
{
"epoch": 0.46,
"learning_rate": 7.995591586575593e-05,
"loss": 2.924,
"step": 18500
},
{
"epoch": 0.47,
"learning_rate": 7.99534332135586e-05,
"loss": 2.9726,
"step": 19000
},
{
"epoch": 0.49,
"learning_rate": 7.99508825970139e-05,
"loss": 3.0505,
"step": 19500
},
{
"epoch": 0.5,
"learning_rate": 7.994826402046067e-05,
"loss": 3.1109,
"step": 20000
},
{
"epoch": 0.51,
"learning_rate": 7.994557748835336e-05,
"loss": 3.0032,
"step": 20500
},
{
"epoch": 0.52,
"learning_rate": 7.994282300526196e-05,
"loss": 2.952,
"step": 21000
},
{
"epoch": 0.54,
"learning_rate": 7.994000057587214e-05,
"loss": 3.0557,
"step": 21500
},
{
"epoch": 0.55,
"learning_rate": 7.993711020498506e-05,
"loss": 3.0012,
"step": 22000
},
{
"epoch": 0.56,
"learning_rate": 7.993415189751751e-05,
"loss": 3.0834,
"step": 22500
},
{
"epoch": 0.57,
"learning_rate": 7.993112565850186e-05,
"loss": 3.1128,
"step": 23000
},
{
"epoch": 0.59,
"learning_rate": 7.992803149308598e-05,
"loss": 2.9702,
"step": 23500
},
{
"epoch": 0.6,
"learning_rate": 7.992486940653335e-05,
"loss": 2.9286,
"step": 24000
},
{
"epoch": 0.61,
"learning_rate": 7.992163940422294e-05,
"loss": 3.0021,
"step": 24500
},
{
"epoch": 0.62,
"learning_rate": 7.991834149164927e-05,
"loss": 3.0641,
"step": 25000
},
{
"epoch": 0.63,
"learning_rate": 7.991497567442239e-05,
"loss": 3.0452,
"step": 25500
},
{
"epoch": 0.65,
"learning_rate": 7.991154195826784e-05,
"loss": 3.1197,
"step": 26000
},
{
"epoch": 0.66,
"learning_rate": 7.99080403490267e-05,
"loss": 2.9332,
"step": 26500
},
{
"epoch": 0.67,
"learning_rate": 7.990447085265552e-05,
"loss": 2.9659,
"step": 27000
},
{
"epoch": 0.68,
"learning_rate": 7.990083347522633e-05,
"loss": 3.0604,
"step": 27500
},
{
"epoch": 0.7,
"learning_rate": 7.98971282229266e-05,
"loss": 3.0265,
"step": 28000
},
{
"epoch": 0.71,
"learning_rate": 7.989335510205932e-05,
"loss": 3.0863,
"step": 28500
},
{
"epoch": 0.72,
"learning_rate": 7.98895141190429e-05,
"loss": 3.0246,
"step": 29000
},
{
"epoch": 0.73,
"learning_rate": 7.988560528041123e-05,
"loss": 3.068,
"step": 29500
},
{
"epoch": 0.75,
"learning_rate": 7.988162859281352e-05,
"loss": 3.0201,
"step": 30000
},
{
"epoch": 0.76,
"learning_rate": 7.987758406301453e-05,
"loss": 2.9901,
"step": 30500
},
{
"epoch": 0.77,
"learning_rate": 7.987347169789434e-05,
"loss": 3.0216,
"step": 31000
},
{
"epoch": 0.78,
"learning_rate": 7.986929150444845e-05,
"loss": 2.9896,
"step": 31500
},
{
"epoch": 0.8,
"learning_rate": 7.986504348978775e-05,
"loss": 2.9584,
"step": 32000
},
{
"epoch": 0.81,
"learning_rate": 7.986072766113848e-05,
"loss": 3.1194,
"step": 32500
},
{
"epoch": 0.82,
"learning_rate": 7.985634402584225e-05,
"loss": 3.0539,
"step": 33000
},
{
"epoch": 0.83,
"learning_rate": 7.985189259135603e-05,
"loss": 3.025,
"step": 33500
},
{
"epoch": 0.85,
"learning_rate": 7.98473733652521e-05,
"loss": 3.0168,
"step": 34000
},
{
"epoch": 0.86,
"learning_rate": 7.984278635521804e-05,
"loss": 3.0642,
"step": 34500
},
{
"epoch": 0.87,
"learning_rate": 7.98381315690568e-05,
"loss": 3.1025,
"step": 35000
},
{
"epoch": 0.88,
"learning_rate": 7.983340901468657e-05,
"loss": 2.9948,
"step": 35500
},
{
"epoch": 0.9,
"learning_rate": 7.982861870014082e-05,
"loss": 3.1125,
"step": 36000
},
{
"epoch": 0.91,
"learning_rate": 7.982376063356834e-05,
"loss": 2.9866,
"step": 36500
},
{
"epoch": 0.92,
"learning_rate": 7.981883482323309e-05,
"loss": 3.0354,
"step": 37000
},
{
"epoch": 0.93,
"learning_rate": 7.981384127751434e-05,
"loss": 3.0398,
"step": 37500
},
{
"epoch": 0.95,
"learning_rate": 7.980878000490655e-05,
"loss": 2.9966,
"step": 38000
},
{
"epoch": 0.96,
"learning_rate": 7.98036510140194e-05,
"loss": 3.003,
"step": 38500
},
{
"epoch": 0.97,
"learning_rate": 7.979845431357774e-05,
"loss": 3.0215,
"step": 39000
},
{
"epoch": 0.98,
"learning_rate": 7.979318991242163e-05,
"loss": 3.0306,
"step": 39500
},
{
"epoch": 1.0,
"learning_rate": 7.978785781950629e-05,
"loss": 3.1022,
"step": 40000
},
{
"epoch": 1.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.873922109603882,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 4569600,
"eval_runtime": 7627.4866,
"eval_samples_per_second": 1.17,
"eval_steps_per_second": 0.585,
"eval_translation_length": 4569600,
"step": 40162
},
{
"epoch": 1.01,
"learning_rate": 7.97824580439021e-05,
"loss": 3.068,
"step": 40500
},
{
"epoch": 1.02,
"learning_rate": 7.977699059479452e-05,
"loss": 3.0099,
"step": 41000
},
{
"epoch": 1.03,
"learning_rate": 7.977145548148421e-05,
"loss": 3.0815,
"step": 41500
},
{
"epoch": 1.05,
"learning_rate": 7.976585271338687e-05,
"loss": 3.0333,
"step": 42000
},
{
"epoch": 1.06,
"learning_rate": 7.97601823000333e-05,
"loss": 3.0443,
"step": 42500
},
{
"epoch": 1.07,
"learning_rate": 7.975444425106939e-05,
"loss": 3.0719,
"step": 43000
},
{
"epoch": 1.08,
"learning_rate": 7.974863857625606e-05,
"loss": 2.9082,
"step": 43500
},
{
"epoch": 1.1,
"learning_rate": 7.974276528546931e-05,
"loss": 3.0596,
"step": 44000
},
{
"epoch": 1.11,
"learning_rate": 7.97368243887001e-05,
"loss": 3.0265,
"step": 44500
},
{
"epoch": 1.12,
"learning_rate": 7.973081589605442e-05,
"loss": 2.9658,
"step": 45000
},
{
"epoch": 1.13,
"learning_rate": 7.972473981775327e-05,
"loss": 3.0211,
"step": 45500
},
{
"epoch": 1.15,
"learning_rate": 7.971859616413261e-05,
"loss": 2.994,
"step": 46000
},
{
"epoch": 1.16,
"learning_rate": 7.971238494564331e-05,
"loss": 3.117,
"step": 46500
},
{
"epoch": 1.17,
"learning_rate": 7.970610617285123e-05,
"loss": 3.0394,
"step": 47000
},
{
"epoch": 1.18,
"learning_rate": 7.969975985643711e-05,
"loss": 3.0046,
"step": 47500
},
{
"epoch": 1.2,
"learning_rate": 7.96933460071966e-05,
"loss": 3.0941,
"step": 48000
},
{
"epoch": 1.21,
"learning_rate": 7.968686463604024e-05,
"loss": 3.0726,
"step": 48500
},
{
"epoch": 1.22,
"learning_rate": 7.968031575399341e-05,
"loss": 3.0133,
"step": 49000
},
{
"epoch": 1.23,
"learning_rate": 7.967369937219634e-05,
"loss": 3.0032,
"step": 49500
},
{
"epoch": 1.24,
"learning_rate": 7.966701550190408e-05,
"loss": 2.957,
"step": 50000
},
{
"epoch": 1.26,
"learning_rate": 7.966026415448651e-05,
"loss": 2.9868,
"step": 50500
},
{
"epoch": 1.27,
"learning_rate": 7.965344534142825e-05,
"loss": 3.0476,
"step": 51000
},
{
"epoch": 1.28,
"learning_rate": 7.964655907432872e-05,
"loss": 3.0908,
"step": 51500
},
{
"epoch": 1.29,
"learning_rate": 7.963960536490207e-05,
"loss": 3.0189,
"step": 52000
},
{
"epoch": 1.31,
"learning_rate": 7.963258422497717e-05,
"loss": 3.0105,
"step": 52500
},
{
"epoch": 1.32,
"learning_rate": 7.962549566649763e-05,
"loss": 3.0428,
"step": 53000
},
{
"epoch": 1.33,
"learning_rate": 7.96183397015217e-05,
"loss": 3.071,
"step": 53500
},
{
"epoch": 1.34,
"learning_rate": 7.961111634222231e-05,
"loss": 3.0913,
"step": 54000
},
{
"epoch": 1.36,
"learning_rate": 7.960382560088704e-05,
"loss": 3.019,
"step": 54500
},
{
"epoch": 1.37,
"learning_rate": 7.95964674899181e-05,
"loss": 3.0614,
"step": 55000
},
{
"epoch": 1.38,
"learning_rate": 7.958904202183229e-05,
"loss": 2.971,
"step": 55500
},
{
"epoch": 1.39,
"learning_rate": 7.958154920926098e-05,
"loss": 3.0,
"step": 56000
},
{
"epoch": 1.41,
"learning_rate": 7.957398906495013e-05,
"loss": 2.9365,
"step": 56500
},
{
"epoch": 1.42,
"learning_rate": 7.956636160176019e-05,
"loss": 3.0992,
"step": 57000
},
{
"epoch": 1.43,
"learning_rate": 7.955866683266618e-05,
"loss": 3.0592,
"step": 57500
},
{
"epoch": 1.44,
"learning_rate": 7.955090477075758e-05,
"loss": 3.0338,
"step": 58000
},
{
"epoch": 1.46,
"learning_rate": 7.954307542923836e-05,
"loss": 2.9993,
"step": 58500
},
{
"epoch": 1.47,
"learning_rate": 7.95351788214269e-05,
"loss": 2.9913,
"step": 59000
},
{
"epoch": 1.48,
"learning_rate": 7.952721496075605e-05,
"loss": 3.0729,
"step": 59500
},
{
"epoch": 1.49,
"learning_rate": 7.951918386077305e-05,
"loss": 3.0712,
"step": 60000
},
{
"epoch": 1.51,
"learning_rate": 7.951108553513951e-05,
"loss": 3.0917,
"step": 60500
},
{
"epoch": 1.52,
"learning_rate": 7.950291999763139e-05,
"loss": 2.9452,
"step": 61000
},
{
"epoch": 1.53,
"learning_rate": 7.949468726213903e-05,
"loss": 3.0763,
"step": 61500
},
{
"epoch": 1.54,
"learning_rate": 7.948638734266702e-05,
"loss": 3.0274,
"step": 62000
},
{
"epoch": 1.56,
"learning_rate": 7.947802025333426e-05,
"loss": 3.0257,
"step": 62500
},
{
"epoch": 1.57,
"learning_rate": 7.946958600837391e-05,
"loss": 3.067,
"step": 63000
},
{
"epoch": 1.58,
"learning_rate": 7.946108462213341e-05,
"loss": 3.0223,
"step": 63500
},
{
"epoch": 1.59,
"learning_rate": 7.945251610907434e-05,
"loss": 3.0854,
"step": 64000
},
{
"epoch": 1.61,
"learning_rate": 7.94438804837725e-05,
"loss": 3.042,
"step": 64500
},
{
"epoch": 1.62,
"learning_rate": 7.943517776091788e-05,
"loss": 3.2018,
"step": 65000
},
{
"epoch": 1.63,
"learning_rate": 7.942640795531457e-05,
"loss": 3.0192,
"step": 65500
},
{
"epoch": 1.64,
"learning_rate": 7.94175710818808e-05,
"loss": 3.0514,
"step": 66000
},
{
"epoch": 1.66,
"learning_rate": 7.940866715564888e-05,
"loss": 3.02,
"step": 66500
},
{
"epoch": 1.67,
"learning_rate": 7.939969619176517e-05,
"loss": 3.0115,
"step": 67000
},
{
"epoch": 1.68,
"learning_rate": 7.939065820549008e-05,
"loss": 3.0097,
"step": 67500
},
{
"epoch": 1.69,
"learning_rate": 7.938155321219805e-05,
"loss": 3.063,
"step": 68000
},
{
"epoch": 1.71,
"learning_rate": 7.937238122737746e-05,
"loss": 2.9444,
"step": 68500
},
{
"epoch": 1.72,
"learning_rate": 7.936314226663066e-05,
"loss": 3.0293,
"step": 69000
},
{
"epoch": 1.73,
"learning_rate": 7.9353836345674e-05,
"loss": 3.0708,
"step": 69500
},
{
"epoch": 1.74,
"learning_rate": 7.934446348033764e-05,
"loss": 2.9991,
"step": 70000
},
{
"epoch": 1.76,
"learning_rate": 7.933502368656565e-05,
"loss": 3.0074,
"step": 70500
},
{
"epoch": 1.77,
"learning_rate": 7.932551698041597e-05,
"loss": 3.0553,
"step": 71000
},
{
"epoch": 1.78,
"learning_rate": 7.931594337806037e-05,
"loss": 3.0395,
"step": 71500
},
{
"epoch": 1.79,
"learning_rate": 7.930630289578438e-05,
"loss": 3.0792,
"step": 72000
},
{
"epoch": 1.81,
"learning_rate": 7.929659554998734e-05,
"loss": 3.0536,
"step": 72500
},
{
"epoch": 1.82,
"learning_rate": 7.928682135718229e-05,
"loss": 3.0239,
"step": 73000
},
{
"epoch": 1.83,
"learning_rate": 7.927698033399599e-05,
"loss": 3.0623,
"step": 73500
},
{
"epoch": 1.84,
"learning_rate": 7.926707249716894e-05,
"loss": 3.0992,
"step": 74000
},
{
"epoch": 1.85,
"learning_rate": 7.925709786355524e-05,
"loss": 3.0043,
"step": 74500
},
{
"epoch": 1.87,
"learning_rate": 7.924705645012258e-05,
"loss": 3.0002,
"step": 75000
},
{
"epoch": 1.88,
"learning_rate": 7.923694827395235e-05,
"loss": 2.9573,
"step": 75500
},
{
"epoch": 1.89,
"learning_rate": 7.922677335223946e-05,
"loss": 3.0187,
"step": 76000
},
{
"epoch": 1.9,
"learning_rate": 7.921653170229231e-05,
"loss": 3.0719,
"step": 76500
},
{
"epoch": 1.92,
"learning_rate": 7.920622334153292e-05,
"loss": 2.9811,
"step": 77000
},
{
"epoch": 1.93,
"learning_rate": 7.919584828749669e-05,
"loss": 3.0582,
"step": 77500
},
{
"epoch": 1.94,
"learning_rate": 7.91854065578325e-05,
"loss": 2.9818,
"step": 78000
},
{
"epoch": 1.95,
"learning_rate": 7.917489817030271e-05,
"loss": 2.9759,
"step": 78500
},
{
"epoch": 1.97,
"learning_rate": 7.916432314278299e-05,
"loss": 3.0651,
"step": 79000
},
{
"epoch": 1.98,
"learning_rate": 7.91536814932624e-05,
"loss": 2.9599,
"step": 79500
},
{
"epoch": 1.99,
"learning_rate": 7.914297323984334e-05,
"loss": 3.0147,
"step": 80000
},
{
"epoch": 2.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.873922109603882,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 4569600,
"eval_runtime": 7859.9692,
"eval_samples_per_second": 1.136,
"eval_steps_per_second": 0.568,
"eval_translation_length": 4569600,
"step": 80324
},
{
"epoch": 2.0,
"learning_rate": 7.913219840074152e-05,
"loss": 3.0052,
"step": 80500
},
{
"epoch": 2.02,
"learning_rate": 7.912135699428588e-05,
"loss": 2.9191,
"step": 81000
},
{
"epoch": 2.03,
"learning_rate": 7.91104490389186e-05,
"loss": 3.0454,
"step": 81500
},
{
"epoch": 2.04,
"learning_rate": 7.909947455319513e-05,
"loss": 3.0362,
"step": 82000
},
{
"epoch": 2.05,
"learning_rate": 7.908843355578402e-05,
"loss": 3.016,
"step": 82500
},
{
"epoch": 2.07,
"learning_rate": 7.907732606546699e-05,
"loss": 3.0735,
"step": 83000
},
{
"epoch": 2.08,
"learning_rate": 7.906615210113886e-05,
"loss": 3.0626,
"step": 83500
},
{
"epoch": 2.09,
"learning_rate": 7.905491168180757e-05,
"loss": 2.9955,
"step": 84000
},
{
"epoch": 2.1,
"learning_rate": 7.904360482659405e-05,
"loss": 3.0104,
"step": 84500
},
{
"epoch": 2.12,
"learning_rate": 7.903223155473227e-05,
"loss": 3.0618,
"step": 85000
},
{
"epoch": 2.13,
"learning_rate": 7.902079188556919e-05,
"loss": 3.0146,
"step": 85500
},
{
"epoch": 2.14,
"learning_rate": 7.900928583856469e-05,
"loss": 3.0075,
"step": 86000
},
{
"epoch": 2.15,
"learning_rate": 7.899771343329159e-05,
"loss": 2.9807,
"step": 86500
},
{
"epoch": 2.17,
"learning_rate": 7.898607468943558e-05,
"loss": 3.0609,
"step": 87000
},
{
"epoch": 2.18,
"learning_rate": 7.89743696267952e-05,
"loss": 2.9379,
"step": 87500
},
{
"epoch": 2.19,
"learning_rate": 7.896259826528179e-05,
"loss": 3.0411,
"step": 88000
},
{
"epoch": 2.2,
"learning_rate": 7.89507606249195e-05,
"loss": 3.0788,
"step": 88500
},
{
"epoch": 2.22,
"learning_rate": 7.893885672584521e-05,
"loss": 3.115,
"step": 89000
},
{
"epoch": 2.23,
"learning_rate": 7.892688658830852e-05,
"loss": 3.0927,
"step": 89500
},
{
"epoch": 2.24,
"learning_rate": 7.891485023267167e-05,
"loss": 2.9474,
"step": 90000
},
{
"epoch": 2.25,
"learning_rate": 7.890274767940961e-05,
"loss": 3.0862,
"step": 90500
},
{
"epoch": 2.27,
"learning_rate": 7.889057894910983e-05,
"loss": 3.0357,
"step": 91000
},
{
"epoch": 2.28,
"learning_rate": 7.887834406247244e-05,
"loss": 3.0165,
"step": 91500
},
{
"epoch": 2.29,
"learning_rate": 7.886604304031007e-05,
"loss": 3.0627,
"step": 92000
},
{
"epoch": 2.3,
"learning_rate": 7.885367590354785e-05,
"loss": 3.0763,
"step": 92500
},
{
"epoch": 2.32,
"learning_rate": 7.88412426732234e-05,
"loss": 3.0735,
"step": 93000
},
{
"epoch": 2.33,
"learning_rate": 7.882874337048669e-05,
"loss": 2.9523,
"step": 93500
},
{
"epoch": 2.34,
"learning_rate": 7.881617801660023e-05,
"loss": 2.9885,
"step": 94000
},
{
"epoch": 2.35,
"learning_rate": 7.880354663293875e-05,
"loss": 2.9825,
"step": 94500
},
{
"epoch": 2.37,
"learning_rate": 7.879084924098938e-05,
"loss": 2.9537,
"step": 95000
},
{
"epoch": 2.38,
"learning_rate": 7.877808586235151e-05,
"loss": 3.0941,
"step": 95500
},
{
"epoch": 2.39,
"learning_rate": 7.876525651873679e-05,
"loss": 3.0817,
"step": 96000
},
{
"epoch": 2.4,
"learning_rate": 7.875236123196905e-05,
"loss": 3.0927,
"step": 96500
},
{
"epoch": 2.42,
"learning_rate": 7.873940002398435e-05,
"loss": 2.9777,
"step": 97000
},
{
"epoch": 2.43,
"learning_rate": 7.872637291683084e-05,
"loss": 3.0594,
"step": 97500
},
{
"epoch": 2.44,
"learning_rate": 7.871327993266882e-05,
"loss": 3.0813,
"step": 98000
},
{
"epoch": 2.45,
"learning_rate": 7.870012109377058e-05,
"loss": 3.0248,
"step": 98500
},
{
"epoch": 2.47,
"learning_rate": 7.868689642252051e-05,
"loss": 2.9885,
"step": 99000
},
{
"epoch": 2.48,
"learning_rate": 7.867360594141493e-05,
"loss": 3.037,
"step": 99500
},
{
"epoch": 2.49,
"learning_rate": 7.866024967306214e-05,
"loss": 3.1189,
"step": 100000
},
{
"epoch": 2.5,
"learning_rate": 7.864682764018236e-05,
"loss": 2.9825,
"step": 100500
},
{
"epoch": 2.51,
"learning_rate": 7.863333986560763e-05,
"loss": 3.0277,
"step": 101000
},
{
"epoch": 2.53,
"learning_rate": 7.861978637228187e-05,
"loss": 3.1551,
"step": 101500
},
{
"epoch": 2.54,
"learning_rate": 7.860616718326079e-05,
"loss": 3.0137,
"step": 102000
},
{
"epoch": 2.55,
"learning_rate": 7.859248232171184e-05,
"loss": 3.0373,
"step": 102500
},
{
"epoch": 2.56,
"learning_rate": 7.857873181091416e-05,
"loss": 3.0931,
"step": 103000
},
{
"epoch": 2.58,
"learning_rate": 7.856491567425863e-05,
"loss": 3.0635,
"step": 103500
},
{
"epoch": 2.59,
"learning_rate": 7.85510339352477e-05,
"loss": 3.0677,
"step": 104000
},
{
"epoch": 2.6,
"learning_rate": 7.853708661749548e-05,
"loss": 3.0483,
"step": 104500
},
{
"epoch": 2.61,
"learning_rate": 7.852307374472755e-05,
"loss": 2.9974,
"step": 105000
},
{
"epoch": 2.63,
"learning_rate": 7.850899534078111e-05,
"loss": 3.0567,
"step": 105500
},
{
"epoch": 2.64,
"learning_rate": 7.849485142960473e-05,
"loss": 3.0159,
"step": 106000
},
{
"epoch": 2.65,
"learning_rate": 7.848064203525849e-05,
"loss": 3.0221,
"step": 106500
},
{
"epoch": 2.66,
"learning_rate": 7.846636718191382e-05,
"loss": 2.9791,
"step": 107000
},
{
"epoch": 2.68,
"learning_rate": 7.845202689385353e-05,
"loss": 2.9952,
"step": 107500
},
{
"epoch": 2.69,
"learning_rate": 7.843762119547172e-05,
"loss": 2.997,
"step": 108000
},
{
"epoch": 2.7,
"learning_rate": 7.842315011127377e-05,
"loss": 2.9545,
"step": 108500
},
{
"epoch": 2.71,
"learning_rate": 7.840861366587628e-05,
"loss": 3.0655,
"step": 109000
},
{
"epoch": 2.73,
"learning_rate": 7.839401188400704e-05,
"loss": 3.0671,
"step": 109500
},
{
"epoch": 2.74,
"learning_rate": 7.837934479050496e-05,
"loss": 3.0327,
"step": 110000
},
{
"epoch": 2.75,
"learning_rate": 7.836461241032011e-05,
"loss": 3.0038,
"step": 110500
},
{
"epoch": 2.76,
"learning_rate": 7.834981476851354e-05,
"loss": 3.0443,
"step": 111000
},
{
"epoch": 2.78,
"learning_rate": 7.83349518902574e-05,
"loss": 2.9654,
"step": 111500
},
{
"epoch": 2.79,
"learning_rate": 7.832002380083474e-05,
"loss": 3.0335,
"step": 112000
},
{
"epoch": 2.8,
"learning_rate": 7.830503052563956e-05,
"loss": 3.0491,
"step": 112500
},
{
"epoch": 2.81,
"learning_rate": 7.828997209017678e-05,
"loss": 3.0643,
"step": 113000
},
{
"epoch": 2.83,
"learning_rate": 7.827484852006215e-05,
"loss": 3.1021,
"step": 113500
},
{
"epoch": 2.84,
"learning_rate": 7.825965984102216e-05,
"loss": 3.0631,
"step": 114000
},
{
"epoch": 2.85,
"learning_rate": 7.824440607889415e-05,
"loss": 2.9952,
"step": 114500
},
{
"epoch": 2.86,
"learning_rate": 7.822908725962612e-05,
"loss": 3.0093,
"step": 115000
},
{
"epoch": 2.88,
"learning_rate": 7.821370340927674e-05,
"loss": 2.9657,
"step": 115500
},
{
"epoch": 2.89,
"learning_rate": 7.819825455401529e-05,
"loss": 3.0453,
"step": 116000
},
{
"epoch": 2.9,
"learning_rate": 7.818274072012168e-05,
"loss": 3.0879,
"step": 116500
},
{
"epoch": 2.91,
"learning_rate": 7.81671619339863e-05,
"loss": 2.9641,
"step": 117000
},
{
"epoch": 2.93,
"learning_rate": 7.815151822211007e-05,
"loss": 3.0363,
"step": 117500
},
{
"epoch": 2.94,
"learning_rate": 7.813580961110435e-05,
"loss": 3.051,
"step": 118000
},
{
"epoch": 2.95,
"learning_rate": 7.812003612769084e-05,
"loss": 3.0135,
"step": 118500
},
{
"epoch": 2.96,
"learning_rate": 7.810419779870166e-05,
"loss": 2.9968,
"step": 119000
},
{
"epoch": 2.98,
"learning_rate": 7.808829465107923e-05,
"loss": 3.0569,
"step": 119500
},
{
"epoch": 2.99,
"learning_rate": 7.80723267118762e-05,
"loss": 3.0546,
"step": 120000
},
{
"epoch": 3.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.873922109603882,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 4569600,
"eval_runtime": 7544.7431,
"eval_samples_per_second": 1.183,
"eval_steps_per_second": 0.592,
"eval_translation_length": 4569600,
"step": 120486
}
],
"logging_steps": 500,
"max_steps": 1204860,
"num_train_epochs": 30,
"save_steps": 1000,
"total_flos": 2.0678546883965092e+18,
"trial_name": null,
"trial_params": null
}