Hanzalwi's picture
Training in progress, step 2100, checkpoint
1b91b09
{
"best_metric": 0.9542251825332642,
"best_model_checkpoint": "./outputs/checkpoint-2100",
"epoch": 2.9829545454545454,
"eval_steps": 100,
"global_step": 2100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"learning_rate": 0.0002,
"loss": 1.5836,
"step": 100
},
{
"epoch": 0.14,
"eval_loss": 1.0926024913787842,
"eval_runtime": 92.4288,
"eval_samples_per_second": 16.142,
"eval_steps_per_second": 2.023,
"step": 100
},
{
"epoch": 0.28,
"learning_rate": 0.0002,
"loss": 1.2272,
"step": 200
},
{
"epoch": 0.28,
"eval_loss": 1.071556806564331,
"eval_runtime": 89.865,
"eval_samples_per_second": 16.603,
"eval_steps_per_second": 2.081,
"step": 200
},
{
"epoch": 0.43,
"learning_rate": 0.0002,
"loss": 1.2134,
"step": 300
},
{
"epoch": 0.43,
"eval_loss": 1.0566277503967285,
"eval_runtime": 89.873,
"eval_samples_per_second": 16.601,
"eval_steps_per_second": 2.081,
"step": 300
},
{
"epoch": 0.57,
"learning_rate": 0.0002,
"loss": 1.1959,
"step": 400
},
{
"epoch": 0.57,
"eval_loss": 1.0465874671936035,
"eval_runtime": 89.8186,
"eval_samples_per_second": 16.611,
"eval_steps_per_second": 2.082,
"step": 400
},
{
"epoch": 0.71,
"learning_rate": 0.0002,
"loss": 1.1861,
"step": 500
},
{
"epoch": 0.71,
"eval_loss": 1.0348858833312988,
"eval_runtime": 89.7272,
"eval_samples_per_second": 16.628,
"eval_steps_per_second": 2.084,
"step": 500
},
{
"epoch": 0.85,
"learning_rate": 0.0002,
"loss": 1.1735,
"step": 600
},
{
"epoch": 0.85,
"eval_loss": 1.0253515243530273,
"eval_runtime": 89.7076,
"eval_samples_per_second": 16.632,
"eval_steps_per_second": 2.085,
"step": 600
},
{
"epoch": 0.99,
"learning_rate": 0.0002,
"loss": 1.1615,
"step": 700
},
{
"epoch": 0.99,
"eval_loss": 1.0192995071411133,
"eval_runtime": 89.8909,
"eval_samples_per_second": 16.598,
"eval_steps_per_second": 2.08,
"step": 700
},
{
"epoch": 1.14,
"learning_rate": 0.0002,
"loss": 1.1509,
"step": 800
},
{
"epoch": 1.14,
"eval_loss": 1.0122530460357666,
"eval_runtime": 89.7843,
"eval_samples_per_second": 16.618,
"eval_steps_per_second": 2.083,
"step": 800
},
{
"epoch": 1.28,
"learning_rate": 0.0002,
"loss": 1.1265,
"step": 900
},
{
"epoch": 1.28,
"eval_loss": 1.0055540800094604,
"eval_runtime": 89.7773,
"eval_samples_per_second": 16.619,
"eval_steps_per_second": 2.083,
"step": 900
},
{
"epoch": 1.42,
"learning_rate": 0.0002,
"loss": 1.1298,
"step": 1000
},
{
"epoch": 1.42,
"eval_loss": 1.0025967359542847,
"eval_runtime": 89.9303,
"eval_samples_per_second": 16.591,
"eval_steps_per_second": 2.079,
"step": 1000
},
{
"epoch": 1.56,
"learning_rate": 0.0002,
"loss": 1.1323,
"step": 1100
},
{
"epoch": 1.56,
"eval_loss": 0.9946721792221069,
"eval_runtime": 90.1252,
"eval_samples_per_second": 16.555,
"eval_steps_per_second": 2.075,
"step": 1100
},
{
"epoch": 1.7,
"learning_rate": 0.0002,
"loss": 1.1193,
"step": 1200
},
{
"epoch": 1.7,
"eval_loss": 0.9902428984642029,
"eval_runtime": 89.9449,
"eval_samples_per_second": 16.588,
"eval_steps_per_second": 2.079,
"step": 1200
},
{
"epoch": 1.85,
"learning_rate": 0.0002,
"loss": 1.1111,
"step": 1300
},
{
"epoch": 1.85,
"eval_loss": 0.9833947420120239,
"eval_runtime": 90.3396,
"eval_samples_per_second": 16.515,
"eval_steps_per_second": 2.07,
"step": 1300
},
{
"epoch": 1.99,
"learning_rate": 0.0002,
"loss": 1.1141,
"step": 1400
},
{
"epoch": 1.99,
"eval_loss": 0.9805576205253601,
"eval_runtime": 89.7687,
"eval_samples_per_second": 16.62,
"eval_steps_per_second": 2.083,
"step": 1400
},
{
"epoch": 2.13,
"learning_rate": 0.0002,
"loss": 1.0911,
"step": 1500
},
{
"epoch": 2.13,
"eval_loss": 0.9768579602241516,
"eval_runtime": 89.855,
"eval_samples_per_second": 16.605,
"eval_steps_per_second": 2.081,
"step": 1500
},
{
"epoch": 2.27,
"learning_rate": 0.0002,
"loss": 1.0919,
"step": 1600
},
{
"epoch": 2.27,
"eval_loss": 0.9718981981277466,
"eval_runtime": 89.8866,
"eval_samples_per_second": 16.599,
"eval_steps_per_second": 2.08,
"step": 1600
},
{
"epoch": 2.41,
"learning_rate": 0.0002,
"loss": 1.073,
"step": 1700
},
{
"epoch": 2.41,
"eval_loss": 0.9693555235862732,
"eval_runtime": 89.9595,
"eval_samples_per_second": 16.585,
"eval_steps_per_second": 2.079,
"step": 1700
},
{
"epoch": 2.56,
"learning_rate": 0.0002,
"loss": 1.0759,
"step": 1800
},
{
"epoch": 2.56,
"eval_loss": 0.9647061228752136,
"eval_runtime": 90.1576,
"eval_samples_per_second": 16.549,
"eval_steps_per_second": 2.074,
"step": 1800
},
{
"epoch": 2.7,
"learning_rate": 0.0002,
"loss": 1.0784,
"step": 1900
},
{
"epoch": 2.7,
"eval_loss": 0.961346447467804,
"eval_runtime": 89.8749,
"eval_samples_per_second": 16.601,
"eval_steps_per_second": 2.081,
"step": 1900
},
{
"epoch": 2.84,
"learning_rate": 0.0002,
"loss": 1.0755,
"step": 2000
},
{
"epoch": 2.84,
"eval_loss": 0.9575291275978088,
"eval_runtime": 89.8309,
"eval_samples_per_second": 16.609,
"eval_steps_per_second": 2.082,
"step": 2000
},
{
"epoch": 2.98,
"learning_rate": 0.0002,
"loss": 1.0721,
"step": 2100
},
{
"epoch": 2.98,
"eval_loss": 0.9542251825332642,
"eval_runtime": 89.9742,
"eval_samples_per_second": 16.583,
"eval_steps_per_second": 2.078,
"step": 2100
}
],
"logging_steps": 100,
"max_steps": 2112,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 1.1673223042295808e+17,
"trial_name": null,
"trial_params": null
}