|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 604, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.033112582781456956, |
|
"grad_norm": 2.8630754947662354, |
|
"learning_rate": 8.264462809917356e-07, |
|
"loss": 2.5, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06622516556291391, |
|
"grad_norm": 2.044926643371582, |
|
"learning_rate": 1.6528925619834712e-06, |
|
"loss": 2.5625, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09933774834437085, |
|
"grad_norm": 2.0647263526916504, |
|
"learning_rate": 2.479338842975207e-06, |
|
"loss": 2.475, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13245033112582782, |
|
"grad_norm": 1.9722806215286255, |
|
"learning_rate": 3.3057851239669424e-06, |
|
"loss": 2.375, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16556291390728478, |
|
"grad_norm": 1.7632310390472412, |
|
"learning_rate": 4.132231404958678e-06, |
|
"loss": 2.225, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1986754966887417, |
|
"grad_norm": 2.126565456390381, |
|
"learning_rate": 4.958677685950414e-06, |
|
"loss": 1.9, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23178807947019867, |
|
"grad_norm": 1.3362902402877808, |
|
"learning_rate": 4.980933547537104e-06, |
|
"loss": 1.4625, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26490066225165565, |
|
"grad_norm": 1.6563653945922852, |
|
"learning_rate": 4.919995460276783e-06, |
|
"loss": 1.25, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2980132450331126, |
|
"grad_norm": 1.2334599494934082, |
|
"learning_rate": 4.81816262909214e-06, |
|
"loss": 1.0375, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.33112582781456956, |
|
"grad_norm": 1.2277352809906006, |
|
"learning_rate": 4.677155895043723e-06, |
|
"loss": 1.05, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.36423841059602646, |
|
"grad_norm": 1.204990267753601, |
|
"learning_rate": 4.499358086684381e-06, |
|
"loss": 0.975, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3973509933774834, |
|
"grad_norm": 1.0835106372833252, |
|
"learning_rate": 4.287773753387249e-06, |
|
"loss": 0.8875, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4304635761589404, |
|
"grad_norm": 1.209863305091858, |
|
"learning_rate": 4.045978392408671e-06, |
|
"loss": 0.8938, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.46357615894039733, |
|
"grad_norm": 1.1028498411178589, |
|
"learning_rate": 3.778058027682004e-06, |
|
"loss": 0.9125, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4966887417218543, |
|
"grad_norm": 1.186710238456726, |
|
"learning_rate": 3.488540161381304e-06, |
|
"loss": 0.8625, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5298013245033113, |
|
"grad_norm": 1.2400007247924805, |
|
"learning_rate": 3.18231726508275e-06, |
|
"loss": 0.8812, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5629139072847682, |
|
"grad_norm": 1.3776289224624634, |
|
"learning_rate": 2.8645641034226584e-06, |
|
"loss": 0.7625, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5960264900662252, |
|
"grad_norm": 1.7026677131652832, |
|
"learning_rate": 2.5406502873736693e-06, |
|
"loss": 0.8313, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6291390728476821, |
|
"grad_norm": 1.2438005208969116, |
|
"learning_rate": 2.2160495348738127e-06, |
|
"loss": 0.8187, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6622516556291391, |
|
"grad_norm": 1.2019314765930176, |
|
"learning_rate": 1.8962471721846555e-06, |
|
"loss": 0.825, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.695364238410596, |
|
"grad_norm": 1.3416401147842407, |
|
"learning_rate": 1.5866474390840126e-06, |
|
"loss": 0.7875, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7284768211920529, |
|
"grad_norm": 1.214216709136963, |
|
"learning_rate": 1.2924821643137226e-06, |
|
"loss": 0.7625, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7615894039735099, |
|
"grad_norm": 2.329277753829956, |
|
"learning_rate": 1.018722354547402e-06, |
|
"loss": 0.8125, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7947019867549668, |
|
"grad_norm": 1.8247755765914917, |
|
"learning_rate": 7.69994190908499e-07, |
|
"loss": 0.7844, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8278145695364238, |
|
"grad_norm": 1.354269027709961, |
|
"learning_rate": 5.505008525871183e-07, |
|
"loss": 0.8313, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8609271523178808, |
|
"grad_norm": 1.941784143447876, |
|
"learning_rate": 3.639514886337786e-07, |
|
"loss": 0.7844, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8940397350993378, |
|
"grad_norm": 1.4848183393478394, |
|
"learning_rate": 2.1349853821348797e-07, |
|
"loss": 0.8125, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9271523178807947, |
|
"grad_norm": 1.269209384918213, |
|
"learning_rate": 1.0168445852548142e-07, |
|
"loss": 0.8187, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9602649006622517, |
|
"grad_norm": 1.4907937049865723, |
|
"learning_rate": 3.0398760616796306e-08, |
|
"loss": 0.8313, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9933774834437086, |
|
"grad_norm": 1.2604339122772217, |
|
"learning_rate": 8.460791279910064e-10, |
|
"loss": 0.7156, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 604, |
|
"total_flos": 3210595501381632.0, |
|
"train_loss": 1.177695571192053, |
|
"train_runtime": 189.533, |
|
"train_samples_per_second": 12.742, |
|
"train_steps_per_second": 3.187 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 604, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3210595501381632.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|