sedrickkeh's picture
End of training
5b4053e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9957446808510637,
"eval_steps": 500,
"global_step": 264,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11347517730496454,
"grad_norm": 2.358112635127941,
"learning_rate": 5e-06,
"loss": 1.045,
"step": 10
},
{
"epoch": 0.22695035460992907,
"grad_norm": 2.2025871190711044,
"learning_rate": 5e-06,
"loss": 0.939,
"step": 20
},
{
"epoch": 0.3404255319148936,
"grad_norm": 1.183824809408691,
"learning_rate": 5e-06,
"loss": 0.8991,
"step": 30
},
{
"epoch": 0.45390070921985815,
"grad_norm": 1.2551021476147783,
"learning_rate": 5e-06,
"loss": 0.8706,
"step": 40
},
{
"epoch": 0.5673758865248227,
"grad_norm": 1.6978646589888085,
"learning_rate": 5e-06,
"loss": 0.8565,
"step": 50
},
{
"epoch": 0.6808510638297872,
"grad_norm": 2.13778699873673,
"learning_rate": 5e-06,
"loss": 0.8386,
"step": 60
},
{
"epoch": 0.7943262411347518,
"grad_norm": 0.993295806091264,
"learning_rate": 5e-06,
"loss": 0.8309,
"step": 70
},
{
"epoch": 0.9078014184397163,
"grad_norm": 1.1398762974593635,
"learning_rate": 5e-06,
"loss": 0.823,
"step": 80
},
{
"epoch": 0.9985815602836879,
"eval_loss": 0.8090236783027649,
"eval_runtime": 63.3465,
"eval_samples_per_second": 37.445,
"eval_steps_per_second": 0.6,
"step": 88
},
{
"epoch": 1.0212765957446808,
"grad_norm": 1.013500095467207,
"learning_rate": 5e-06,
"loss": 0.8742,
"step": 90
},
{
"epoch": 1.1347517730496455,
"grad_norm": 1.056225323349834,
"learning_rate": 5e-06,
"loss": 0.7667,
"step": 100
},
{
"epoch": 1.24822695035461,
"grad_norm": 0.7290196034792423,
"learning_rate": 5e-06,
"loss": 0.755,
"step": 110
},
{
"epoch": 1.3617021276595744,
"grad_norm": 0.8838498260846974,
"learning_rate": 5e-06,
"loss": 0.7554,
"step": 120
},
{
"epoch": 1.475177304964539,
"grad_norm": 0.821991213787815,
"learning_rate": 5e-06,
"loss": 0.7556,
"step": 130
},
{
"epoch": 1.5886524822695036,
"grad_norm": 0.9855152726966359,
"learning_rate": 5e-06,
"loss": 0.7493,
"step": 140
},
{
"epoch": 1.702127659574468,
"grad_norm": 0.6490086567527167,
"learning_rate": 5e-06,
"loss": 0.7477,
"step": 150
},
{
"epoch": 1.8156028368794326,
"grad_norm": 1.0694149262660388,
"learning_rate": 5e-06,
"loss": 0.7414,
"step": 160
},
{
"epoch": 1.9290780141843973,
"grad_norm": 0.9645011140855406,
"learning_rate": 5e-06,
"loss": 0.7481,
"step": 170
},
{
"epoch": 1.9971631205673759,
"eval_loss": 0.7898643016815186,
"eval_runtime": 62.2492,
"eval_samples_per_second": 38.105,
"eval_steps_per_second": 0.61,
"step": 176
},
{
"epoch": 2.0425531914893615,
"grad_norm": 1.581859190270789,
"learning_rate": 5e-06,
"loss": 0.7818,
"step": 180
},
{
"epoch": 2.1560283687943262,
"grad_norm": 1.0466470957786433,
"learning_rate": 5e-06,
"loss": 0.6863,
"step": 190
},
{
"epoch": 2.269503546099291,
"grad_norm": 0.9663026123669691,
"learning_rate": 5e-06,
"loss": 0.6798,
"step": 200
},
{
"epoch": 2.382978723404255,
"grad_norm": 0.8243226264574698,
"learning_rate": 5e-06,
"loss": 0.6826,
"step": 210
},
{
"epoch": 2.49645390070922,
"grad_norm": 1.0907354136557872,
"learning_rate": 5e-06,
"loss": 0.6839,
"step": 220
},
{
"epoch": 2.6099290780141846,
"grad_norm": 0.7996806357479502,
"learning_rate": 5e-06,
"loss": 0.687,
"step": 230
},
{
"epoch": 2.723404255319149,
"grad_norm": 0.9108831837931511,
"learning_rate": 5e-06,
"loss": 0.6902,
"step": 240
},
{
"epoch": 2.8368794326241136,
"grad_norm": 0.8473372600949097,
"learning_rate": 5e-06,
"loss": 0.6873,
"step": 250
},
{
"epoch": 2.950354609929078,
"grad_norm": 0.8244777156304377,
"learning_rate": 5e-06,
"loss": 0.6866,
"step": 260
},
{
"epoch": 2.9957446808510637,
"eval_loss": 0.7845782041549683,
"eval_runtime": 58.8988,
"eval_samples_per_second": 40.272,
"eval_steps_per_second": 0.645,
"step": 264
},
{
"epoch": 2.9957446808510637,
"step": 264,
"total_flos": 442000453140480.0,
"train_loss": 0.7778021304896383,
"train_runtime": 8882.1716,
"train_samples_per_second": 15.22,
"train_steps_per_second": 0.03
}
],
"logging_steps": 10,
"max_steps": 264,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 442000453140480.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}