ramdhanfirdaus's picture
Training in progress, step 4100, checkpoint
5d63732
{
"best_metric": 1.2147547006607056,
"best_model_checkpoint": "./outputs/checkpoint-4100",
"epoch": 2.987249544626594,
"eval_steps": 100,
"global_step": 4100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 0.0002,
"loss": 2.1908,
"step": 100
},
{
"epoch": 0.07,
"eval_loss": 2.0238757133483887,
"eval_runtime": 146.4096,
"eval_samples_per_second": 42.852,
"eval_steps_per_second": 5.362,
"step": 100
},
{
"epoch": 0.15,
"learning_rate": 0.0002,
"loss": 1.9736,
"step": 200
},
{
"epoch": 0.15,
"eval_loss": 1.9310673475265503,
"eval_runtime": 159.7901,
"eval_samples_per_second": 39.264,
"eval_steps_per_second": 4.913,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 0.0002,
"loss": 1.9004,
"step": 300
},
{
"epoch": 0.22,
"eval_loss": 1.8707531690597534,
"eval_runtime": 143.355,
"eval_samples_per_second": 43.765,
"eval_steps_per_second": 5.476,
"step": 300
},
{
"epoch": 0.29,
"learning_rate": 0.0002,
"loss": 1.8488,
"step": 400
},
{
"epoch": 0.29,
"eval_loss": 1.825339436531067,
"eval_runtime": 143.4115,
"eval_samples_per_second": 43.748,
"eval_steps_per_second": 5.474,
"step": 400
},
{
"epoch": 0.36,
"learning_rate": 0.0002,
"loss": 1.7978,
"step": 500
},
{
"epoch": 0.36,
"eval_loss": 1.7843894958496094,
"eval_runtime": 143.3888,
"eval_samples_per_second": 43.755,
"eval_steps_per_second": 5.475,
"step": 500
},
{
"epoch": 0.44,
"learning_rate": 0.0002,
"loss": 1.7696,
"step": 600
},
{
"epoch": 0.44,
"eval_loss": 1.7558746337890625,
"eval_runtime": 143.3583,
"eval_samples_per_second": 43.764,
"eval_steps_per_second": 5.476,
"step": 600
},
{
"epoch": 0.51,
"learning_rate": 0.0002,
"loss": 1.7307,
"step": 700
},
{
"epoch": 0.51,
"eval_loss": 1.7239936590194702,
"eval_runtime": 146.9189,
"eval_samples_per_second": 42.704,
"eval_steps_per_second": 5.343,
"step": 700
},
{
"epoch": 0.58,
"learning_rate": 0.0002,
"loss": 1.7092,
"step": 800
},
{
"epoch": 0.58,
"eval_loss": 1.695021390914917,
"eval_runtime": 143.2343,
"eval_samples_per_second": 43.802,
"eval_steps_per_second": 5.481,
"step": 800
},
{
"epoch": 0.66,
"learning_rate": 0.0002,
"loss": 1.6739,
"step": 900
},
{
"epoch": 0.66,
"eval_loss": 1.6701653003692627,
"eval_runtime": 143.2538,
"eval_samples_per_second": 43.796,
"eval_steps_per_second": 5.48,
"step": 900
},
{
"epoch": 0.73,
"learning_rate": 0.0002,
"loss": 1.6592,
"step": 1000
},
{
"epoch": 0.73,
"eval_loss": 1.6471772193908691,
"eval_runtime": 143.2508,
"eval_samples_per_second": 43.797,
"eval_steps_per_second": 5.48,
"step": 1000
},
{
"epoch": 0.8,
"learning_rate": 0.0002,
"loss": 1.6386,
"step": 1100
},
{
"epoch": 0.8,
"eval_loss": 1.624908447265625,
"eval_runtime": 143.2378,
"eval_samples_per_second": 43.801,
"eval_steps_per_second": 5.48,
"step": 1100
},
{
"epoch": 0.87,
"learning_rate": 0.0002,
"loss": 1.604,
"step": 1200
},
{
"epoch": 0.87,
"eval_loss": 1.607008934020996,
"eval_runtime": 143.2934,
"eval_samples_per_second": 43.784,
"eval_steps_per_second": 5.478,
"step": 1200
},
{
"epoch": 0.95,
"learning_rate": 0.0002,
"loss": 1.6109,
"step": 1300
},
{
"epoch": 0.95,
"eval_loss": 1.584315299987793,
"eval_runtime": 143.3844,
"eval_samples_per_second": 43.757,
"eval_steps_per_second": 5.475,
"step": 1300
},
{
"epoch": 1.02,
"learning_rate": 0.0002,
"loss": 1.5645,
"step": 1400
},
{
"epoch": 1.02,
"eval_loss": 1.5640443563461304,
"eval_runtime": 143.4327,
"eval_samples_per_second": 43.742,
"eval_steps_per_second": 5.473,
"step": 1400
},
{
"epoch": 1.09,
"learning_rate": 0.0002,
"loss": 1.5361,
"step": 1500
},
{
"epoch": 1.09,
"eval_loss": 1.5473461151123047,
"eval_runtime": 143.4327,
"eval_samples_per_second": 43.742,
"eval_steps_per_second": 5.473,
"step": 1500
},
{
"epoch": 1.17,
"learning_rate": 0.0002,
"loss": 1.5177,
"step": 1600
},
{
"epoch": 1.17,
"eval_loss": 1.529100775718689,
"eval_runtime": 143.2933,
"eval_samples_per_second": 43.784,
"eval_steps_per_second": 5.478,
"step": 1600
},
{
"epoch": 1.24,
"learning_rate": 0.0002,
"loss": 1.5157,
"step": 1700
},
{
"epoch": 1.24,
"eval_loss": 1.512880802154541,
"eval_runtime": 143.3422,
"eval_samples_per_second": 43.769,
"eval_steps_per_second": 5.476,
"step": 1700
},
{
"epoch": 1.31,
"learning_rate": 0.0002,
"loss": 1.4888,
"step": 1800
},
{
"epoch": 1.31,
"eval_loss": 1.4976742267608643,
"eval_runtime": 143.3396,
"eval_samples_per_second": 43.77,
"eval_steps_per_second": 5.477,
"step": 1800
},
{
"epoch": 1.38,
"learning_rate": 0.0002,
"loss": 1.4783,
"step": 1900
},
{
"epoch": 1.38,
"eval_loss": 1.4826266765594482,
"eval_runtime": 143.3758,
"eval_samples_per_second": 43.759,
"eval_steps_per_second": 5.475,
"step": 1900
},
{
"epoch": 1.46,
"learning_rate": 0.0002,
"loss": 1.4618,
"step": 2000
},
{
"epoch": 1.46,
"eval_loss": 1.4686999320983887,
"eval_runtime": 143.318,
"eval_samples_per_second": 43.777,
"eval_steps_per_second": 5.477,
"step": 2000
},
{
"epoch": 1.53,
"learning_rate": 0.0002,
"loss": 1.4472,
"step": 2100
},
{
"epoch": 1.53,
"eval_loss": 1.4544299840927124,
"eval_runtime": 143.2701,
"eval_samples_per_second": 43.791,
"eval_steps_per_second": 5.479,
"step": 2100
},
{
"epoch": 1.6,
"learning_rate": 0.0002,
"loss": 1.422,
"step": 2200
},
{
"epoch": 1.6,
"eval_loss": 1.4396847486495972,
"eval_runtime": 143.3,
"eval_samples_per_second": 43.782,
"eval_steps_per_second": 5.478,
"step": 2200
},
{
"epoch": 1.68,
"learning_rate": 0.0002,
"loss": 1.4211,
"step": 2300
},
{
"epoch": 1.68,
"eval_loss": 1.4261609315872192,
"eval_runtime": 143.4641,
"eval_samples_per_second": 43.732,
"eval_steps_per_second": 5.472,
"step": 2300
},
{
"epoch": 1.75,
"learning_rate": 0.0002,
"loss": 1.4204,
"step": 2400
},
{
"epoch": 1.75,
"eval_loss": 1.4128549098968506,
"eval_runtime": 143.3377,
"eval_samples_per_second": 43.771,
"eval_steps_per_second": 5.477,
"step": 2400
},
{
"epoch": 1.82,
"learning_rate": 0.0002,
"loss": 1.3915,
"step": 2500
},
{
"epoch": 1.82,
"eval_loss": 1.4013662338256836,
"eval_runtime": 143.3012,
"eval_samples_per_second": 43.782,
"eval_steps_per_second": 5.478,
"step": 2500
},
{
"epoch": 1.89,
"learning_rate": 0.0002,
"loss": 1.3818,
"step": 2600
},
{
"epoch": 1.89,
"eval_loss": 1.3869951963424683,
"eval_runtime": 161.3719,
"eval_samples_per_second": 38.879,
"eval_steps_per_second": 4.865,
"step": 2600
},
{
"epoch": 1.97,
"learning_rate": 0.0002,
"loss": 1.3698,
"step": 2700
},
{
"epoch": 1.97,
"eval_loss": 1.3734662532806396,
"eval_runtime": 143.5647,
"eval_samples_per_second": 43.702,
"eval_steps_per_second": 5.468,
"step": 2700
},
{
"epoch": 2.04,
"learning_rate": 0.0002,
"loss": 1.341,
"step": 2800
},
{
"epoch": 2.04,
"eval_loss": 1.3625913858413696,
"eval_runtime": 150.8911,
"eval_samples_per_second": 41.58,
"eval_steps_per_second": 5.202,
"step": 2800
},
{
"epoch": 2.11,
"learning_rate": 0.0002,
"loss": 1.3155,
"step": 2900
},
{
"epoch": 2.11,
"eval_loss": 1.3497363328933716,
"eval_runtime": 150.1999,
"eval_samples_per_second": 41.771,
"eval_steps_per_second": 5.226,
"step": 2900
},
{
"epoch": 2.19,
"learning_rate": 0.0002,
"loss": 1.3161,
"step": 3000
},
{
"epoch": 2.19,
"eval_loss": 1.3376851081848145,
"eval_runtime": 143.5158,
"eval_samples_per_second": 43.716,
"eval_steps_per_second": 5.47,
"step": 3000
},
{
"epoch": 2.26,
"learning_rate": 0.0002,
"loss": 1.2955,
"step": 3100
},
{
"epoch": 2.26,
"eval_loss": 1.3271404504776,
"eval_runtime": 143.547,
"eval_samples_per_second": 43.707,
"eval_steps_per_second": 5.469,
"step": 3100
},
{
"epoch": 2.33,
"learning_rate": 0.0002,
"loss": 1.2998,
"step": 3200
},
{
"epoch": 2.33,
"eval_loss": 1.3150701522827148,
"eval_runtime": 143.5206,
"eval_samples_per_second": 43.715,
"eval_steps_per_second": 5.47,
"step": 3200
},
{
"epoch": 2.4,
"learning_rate": 0.0002,
"loss": 1.2812,
"step": 3300
},
{
"epoch": 2.4,
"eval_loss": 1.3021332025527954,
"eval_runtime": 143.6618,
"eval_samples_per_second": 43.672,
"eval_steps_per_second": 5.464,
"step": 3300
},
{
"epoch": 2.48,
"learning_rate": 0.0002,
"loss": 1.2634,
"step": 3400
},
{
"epoch": 2.48,
"eval_loss": 1.2930917739868164,
"eval_runtime": 143.6376,
"eval_samples_per_second": 43.679,
"eval_steps_per_second": 5.465,
"step": 3400
},
{
"epoch": 2.55,
"learning_rate": 0.0002,
"loss": 1.2634,
"step": 3500
},
{
"epoch": 2.55,
"eval_loss": 1.2815780639648438,
"eval_runtime": 143.69,
"eval_samples_per_second": 43.663,
"eval_steps_per_second": 5.463,
"step": 3500
},
{
"epoch": 2.62,
"learning_rate": 0.0002,
"loss": 1.2516,
"step": 3600
},
{
"epoch": 2.62,
"eval_loss": 1.270579218864441,
"eval_runtime": 143.5786,
"eval_samples_per_second": 43.697,
"eval_steps_per_second": 5.467,
"step": 3600
},
{
"epoch": 2.7,
"learning_rate": 0.0002,
"loss": 1.2358,
"step": 3700
},
{
"epoch": 2.7,
"eval_loss": 1.2581326961517334,
"eval_runtime": 143.5689,
"eval_samples_per_second": 43.7,
"eval_steps_per_second": 5.468,
"step": 3700
},
{
"epoch": 2.77,
"learning_rate": 0.0002,
"loss": 1.2124,
"step": 3800
},
{
"epoch": 2.77,
"eval_loss": 1.2480058670043945,
"eval_runtime": 143.4136,
"eval_samples_per_second": 43.748,
"eval_steps_per_second": 5.474,
"step": 3800
},
{
"epoch": 2.84,
"learning_rate": 0.0002,
"loss": 1.2226,
"step": 3900
},
{
"epoch": 2.84,
"eval_loss": 1.235005497932434,
"eval_runtime": 143.4509,
"eval_samples_per_second": 43.736,
"eval_steps_per_second": 5.472,
"step": 3900
},
{
"epoch": 2.91,
"learning_rate": 0.0002,
"loss": 1.2043,
"step": 4000
},
{
"epoch": 2.91,
"eval_loss": 1.227269172668457,
"eval_runtime": 173.3331,
"eval_samples_per_second": 36.196,
"eval_steps_per_second": 4.529,
"step": 4000
},
{
"epoch": 2.99,
"learning_rate": 0.0002,
"loss": 1.2023,
"step": 4100
},
{
"epoch": 2.99,
"eval_loss": 1.2147547006607056,
"eval_runtime": 143.5554,
"eval_samples_per_second": 43.704,
"eval_steps_per_second": 5.468,
"step": 4100
}
],
"logging_steps": 100,
"max_steps": 4116,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 2.4438234279579648e+17,
"trial_name": null,
"trial_params": null
}