AlienKevin's picture
Upload 67 files
99fd8af
raw
history blame
10.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.827521206409048,
"global_step": 12000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 5e-06,
"loss": 0.8357,
"step": 200
},
{
"epoch": 0.09,
"learning_rate": 1e-05,
"loss": 0.6021,
"step": 400
},
{
"epoch": 0.14,
"learning_rate": 1.5e-05,
"loss": 0.216,
"step": 600
},
{
"epoch": 0.19,
"learning_rate": 2e-05,
"loss": 0.1557,
"step": 800
},
{
"epoch": 0.24,
"learning_rate": 2.5e-05,
"loss": 0.1325,
"step": 1000
},
{
"epoch": 0.24,
"eval_bleu": 8.163188643432274,
"eval_chrf": 9.012540691377044,
"eval_loss": 0.21054820716381073,
"eval_runtime": 641.8265,
"eval_samples_per_second": 2.493,
"eval_steps_per_second": 0.078,
"step": 1000
},
{
"epoch": 0.28,
"learning_rate": 3e-05,
"loss": 0.1126,
"step": 1200
},
{
"epoch": 0.33,
"learning_rate": 3.5e-05,
"loss": 0.1059,
"step": 1400
},
{
"epoch": 0.38,
"learning_rate": 4e-05,
"loss": 0.096,
"step": 1600
},
{
"epoch": 0.42,
"learning_rate": 4.5e-05,
"loss": 0.0972,
"step": 1800
},
{
"epoch": 0.47,
"learning_rate": 5e-05,
"loss": 0.0895,
"step": 2000
},
{
"epoch": 0.47,
"eval_bleu": 12.747283674419554,
"eval_chrf": 14.975219232972814,
"eval_loss": 0.2011401355266571,
"eval_runtime": 899.369,
"eval_samples_per_second": 1.779,
"eval_steps_per_second": 0.056,
"step": 2000
},
{
"epoch": 0.52,
"learning_rate": 4.906820723071189e-05,
"loss": 0.0782,
"step": 2200
},
{
"epoch": 0.57,
"learning_rate": 4.813641446142379e-05,
"loss": 0.0775,
"step": 2400
},
{
"epoch": 0.61,
"learning_rate": 4.720462169213567e-05,
"loss": 0.0754,
"step": 2600
},
{
"epoch": 0.66,
"learning_rate": 4.627282892284756e-05,
"loss": 0.0698,
"step": 2800
},
{
"epoch": 0.71,
"learning_rate": 4.5341036153559454e-05,
"loss": 0.0682,
"step": 3000
},
{
"epoch": 0.71,
"eval_bleu": 6.876683844974673,
"eval_chrf": 14.013775496967607,
"eval_loss": 0.2018859088420868,
"eval_runtime": 978.3085,
"eval_samples_per_second": 1.635,
"eval_steps_per_second": 0.051,
"step": 3000
},
{
"epoch": 0.75,
"learning_rate": 4.440924338427134e-05,
"loss": 0.0633,
"step": 3200
},
{
"epoch": 0.8,
"learning_rate": 4.347745061498323e-05,
"loss": 0.0625,
"step": 3400
},
{
"epoch": 0.85,
"learning_rate": 4.254565784569512e-05,
"loss": 0.0627,
"step": 3600
},
{
"epoch": 0.9,
"learning_rate": 4.161386507640701e-05,
"loss": 0.0595,
"step": 3800
},
{
"epoch": 0.94,
"learning_rate": 4.06820723071189e-05,
"loss": 0.0591,
"step": 4000
},
{
"epoch": 0.94,
"eval_bleu": 28.04277575880573,
"eval_chrf": 24.223488772329105,
"eval_loss": 0.19976669549942017,
"eval_runtime": 250.605,
"eval_samples_per_second": 6.385,
"eval_steps_per_second": 0.2,
"step": 4000
},
{
"epoch": 0.99,
"learning_rate": 3.975027953783079e-05,
"loss": 0.0583,
"step": 4200
},
{
"epoch": 1.04,
"learning_rate": 3.8818486768542676e-05,
"loss": 0.0528,
"step": 4400
},
{
"epoch": 1.08,
"learning_rate": 3.788669399925457e-05,
"loss": 0.0511,
"step": 4600
},
{
"epoch": 1.13,
"learning_rate": 3.695490122996646e-05,
"loss": 0.0512,
"step": 4800
},
{
"epoch": 1.18,
"learning_rate": 3.602310846067834e-05,
"loss": 0.0482,
"step": 5000
},
{
"epoch": 1.18,
"eval_bleu": 28.9415072135438,
"eval_chrf": 24.984505458577555,
"eval_loss": 0.19697046279907227,
"eval_runtime": 264.9755,
"eval_samples_per_second": 6.038,
"eval_steps_per_second": 0.189,
"step": 5000
},
{
"epoch": 1.23,
"learning_rate": 3.509131569139024e-05,
"loss": 0.0477,
"step": 5200
},
{
"epoch": 1.27,
"learning_rate": 3.415952292210213e-05,
"loss": 0.0475,
"step": 5400
},
{
"epoch": 1.32,
"learning_rate": 3.3227730152814016e-05,
"loss": 0.0477,
"step": 5600
},
{
"epoch": 1.37,
"learning_rate": 3.2295937383525905e-05,
"loss": 0.0471,
"step": 5800
},
{
"epoch": 1.41,
"learning_rate": 3.1364144614237794e-05,
"loss": 0.0461,
"step": 6000
},
{
"epoch": 1.41,
"eval_bleu": 29.631223435519598,
"eval_chrf": 25.61588898393464,
"eval_loss": 0.19584685564041138,
"eval_runtime": 306.0261,
"eval_samples_per_second": 5.228,
"eval_steps_per_second": 0.163,
"step": 6000
},
{
"epoch": 1.46,
"learning_rate": 3.0432351844949686e-05,
"loss": 0.0465,
"step": 6200
},
{
"epoch": 1.51,
"learning_rate": 2.950055907566158e-05,
"loss": 0.0458,
"step": 6400
},
{
"epoch": 1.56,
"learning_rate": 2.8568766306373464e-05,
"loss": 0.0433,
"step": 6600
},
{
"epoch": 1.6,
"learning_rate": 2.7636973537085353e-05,
"loss": 0.045,
"step": 6800
},
{
"epoch": 1.65,
"learning_rate": 2.6705180767797245e-05,
"loss": 0.044,
"step": 7000
},
{
"epoch": 1.65,
"eval_bleu": 29.440869914401336,
"eval_chrf": 25.503480748268714,
"eval_loss": 0.201187402009964,
"eval_runtime": 327.7594,
"eval_samples_per_second": 4.882,
"eval_steps_per_second": 0.153,
"step": 7000
},
{
"epoch": 1.7,
"learning_rate": 2.5773387998509134e-05,
"loss": 0.0434,
"step": 7200
},
{
"epoch": 1.74,
"learning_rate": 2.4841595229221023e-05,
"loss": 0.0431,
"step": 7400
},
{
"epoch": 1.79,
"learning_rate": 2.3909802459932912e-05,
"loss": 0.0429,
"step": 7600
},
{
"epoch": 1.84,
"learning_rate": 2.2978009690644804e-05,
"loss": 0.0411,
"step": 7800
},
{
"epoch": 1.89,
"learning_rate": 2.204621692135669e-05,
"loss": 0.0397,
"step": 8000
},
{
"epoch": 1.89,
"eval_bleu": 29.715590270005077,
"eval_chrf": 25.651817457402736,
"eval_loss": 0.19735974073410034,
"eval_runtime": 382.7369,
"eval_samples_per_second": 4.18,
"eval_steps_per_second": 0.131,
"step": 8000
},
{
"epoch": 1.93,
"learning_rate": 2.1114424152068582e-05,
"loss": 0.0416,
"step": 8200
},
{
"epoch": 1.98,
"learning_rate": 2.018263138278047e-05,
"loss": 0.0404,
"step": 8400
},
{
"epoch": 2.03,
"learning_rate": 1.925083861349236e-05,
"loss": 0.0398,
"step": 8600
},
{
"epoch": 2.07,
"learning_rate": 1.831904584420425e-05,
"loss": 0.0371,
"step": 8800
},
{
"epoch": 2.12,
"learning_rate": 1.738725307491614e-05,
"loss": 0.0367,
"step": 9000
},
{
"epoch": 2.12,
"eval_bleu": 29.539694856534723,
"eval_chrf": 25.49563217007088,
"eval_loss": 0.20109611749649048,
"eval_runtime": 343.2907,
"eval_samples_per_second": 4.661,
"eval_steps_per_second": 0.146,
"step": 9000
},
{
"epoch": 2.17,
"learning_rate": 1.645546030562803e-05,
"loss": 0.0357,
"step": 9200
},
{
"epoch": 2.21,
"learning_rate": 1.552366753633992e-05,
"loss": 0.0348,
"step": 9400
},
{
"epoch": 2.26,
"learning_rate": 1.4591874767051808e-05,
"loss": 0.0365,
"step": 9600
},
{
"epoch": 2.31,
"learning_rate": 1.36600819977637e-05,
"loss": 0.0356,
"step": 9800
},
{
"epoch": 2.36,
"learning_rate": 1.2728289228475587e-05,
"loss": 0.0361,
"step": 10000
},
{
"epoch": 2.36,
"eval_bleu": 29.780602519540743,
"eval_chrf": 25.721793630268674,
"eval_loss": 0.20224666595458984,
"eval_runtime": 349.4231,
"eval_samples_per_second": 4.579,
"eval_steps_per_second": 0.143,
"step": 10000
},
{
"epoch": 2.4,
"learning_rate": 1.1796496459187478e-05,
"loss": 0.0354,
"step": 10200
},
{
"epoch": 2.45,
"learning_rate": 1.0864703689899367e-05,
"loss": 0.0349,
"step": 10400
},
{
"epoch": 2.5,
"learning_rate": 9.932910920611255e-06,
"loss": 0.0352,
"step": 10600
},
{
"epoch": 2.54,
"learning_rate": 9.001118151323146e-06,
"loss": 0.0353,
"step": 10800
},
{
"epoch": 2.59,
"learning_rate": 8.069325382035035e-06,
"loss": 0.0357,
"step": 11000
},
{
"epoch": 2.59,
"eval_bleu": 30.453580686642933,
"eval_chrf": 26.259452504202525,
"eval_loss": 0.19840951263904572,
"eval_runtime": 395.5462,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 0.126,
"step": 11000
},
{
"epoch": 2.64,
"learning_rate": 7.137532612746925e-06,
"loss": 0.0352,
"step": 11200
},
{
"epoch": 2.69,
"learning_rate": 6.205739843458815e-06,
"loss": 0.0344,
"step": 11400
},
{
"epoch": 2.73,
"learning_rate": 5.273947074170705e-06,
"loss": 0.0335,
"step": 11600
},
{
"epoch": 2.78,
"learning_rate": 4.342154304882595e-06,
"loss": 0.0346,
"step": 11800
},
{
"epoch": 2.83,
"learning_rate": 3.410361535594484e-06,
"loss": 0.0352,
"step": 12000
},
{
"epoch": 2.83,
"eval_bleu": 30.412569602104455,
"eval_chrf": 26.262864851975078,
"eval_loss": 0.1988079845905304,
"eval_runtime": 424.6307,
"eval_samples_per_second": 3.768,
"eval_steps_per_second": 0.118,
"step": 12000
}
],
"max_steps": 12732,
"num_train_epochs": 3,
"total_flos": 2.926491104968704e+16,
"trial_name": null,
"trial_params": null
}