File size: 2,387 Bytes
9c5668e b933dc2 9c5668e b933dc2 9c5668e b933dc2 7b8ddf5 9c5668e b933dc2 7b8ddf5 9c5668e b933dc2 7b8ddf5 9c5668e b933dc2 7b8ddf5 9c5668e b933dc2 7b8ddf5 9c5668e b933dc2 7b8ddf5 9c5668e b933dc2 7b8ddf5 9c5668e b933dc2 7b8ddf5 9c5668e b933dc2 7b8ddf5 9c5668e b933dc2 9c5668e b933dc2 9c5668e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 2060,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4854368932038835,
"grad_norm": 1.1003823280334473,
"learning_rate": 8.794946550048592e-05,
"loss": 0.8551,
"step": 250
},
{
"epoch": 0.970873786407767,
"grad_norm": 0.7180963158607483,
"learning_rate": 7.580174927113704e-05,
"loss": 0.4137,
"step": 500
},
{
"epoch": 1.4563106796116505,
"grad_norm": 0.6523966789245605,
"learning_rate": 6.365403304178815e-05,
"loss": 0.3735,
"step": 750
},
{
"epoch": 1.941747572815534,
"grad_norm": 0.6745087504386902,
"learning_rate": 5.150631681243926e-05,
"loss": 0.3583,
"step": 1000
},
{
"epoch": 2.4271844660194173,
"grad_norm": 0.6706854104995728,
"learning_rate": 3.9358600583090386e-05,
"loss": 0.345,
"step": 1250
},
{
"epoch": 2.912621359223301,
"grad_norm": 0.7672198414802551,
"learning_rate": 2.72108843537415e-05,
"loss": 0.333,
"step": 1500
},
{
"epoch": 3.3980582524271843,
"grad_norm": 0.7942991256713867,
"learning_rate": 1.5063168124392615e-05,
"loss": 0.3142,
"step": 1750
},
{
"epoch": 3.883495145631068,
"grad_norm": 0.7696407437324524,
"learning_rate": 2.915451895043732e-06,
"loss": 0.302,
"step": 2000
},
{
"epoch": 4.0,
"step": 2060,
"total_flos": 1.4445804612483994e+18,
"train_loss": 0.40855656966422366,
"train_runtime": 23649.5822,
"train_samples_per_second": 22.298,
"train_steps_per_second": 0.087
}
],
"logging_steps": 250,
"max_steps": 2060,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.4445804612483994e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}
|