|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 130, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007692307692307693, |
|
"grad_norm": 1824.0493249890733, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 28.7448, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 338.1051064004804, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 21.1097, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 60.27893910460336, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 13.8645, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11538461538461539, |
|
"grad_norm": 19.31150319920863, |
|
"learning_rate": 1.9985583705641418e-05, |
|
"loss": 5.697, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 69.03270795451746, |
|
"learning_rate": 1.9823877374156647e-05, |
|
"loss": 2.3869, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 76.45052582249555, |
|
"learning_rate": 1.9485364419471454e-05, |
|
"loss": 1.7302, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"grad_norm": 18.876039351066055, |
|
"learning_rate": 1.8976137276390145e-05, |
|
"loss": 1.5732, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2692307692307692, |
|
"grad_norm": 23.947629924642698, |
|
"learning_rate": 1.8305360832480118e-05, |
|
"loss": 1.3462, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 24.85999234906976, |
|
"learning_rate": 1.7485107481711014e-05, |
|
"loss": 1.2076, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34615384615384615, |
|
"grad_norm": 16.833283516923675, |
|
"learning_rate": 1.653013984983585e-05, |
|
"loss": 1.1067, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 5.260723361500702, |
|
"learning_rate": 1.5457645101945046e-05, |
|
"loss": 1.0689, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4230769230769231, |
|
"grad_norm": 8.067805674932265, |
|
"learning_rate": 1.4286925614030542e-05, |
|
"loss": 1.0097, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 14.86263828160759, |
|
"learning_rate": 1.303905157574247e-05, |
|
"loss": 0.978, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 6.459278631918247, |
|
"learning_rate": 1.1736481776669307e-05, |
|
"loss": 0.9093, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5384615384615384, |
|
"grad_norm": 2.9078399705453846, |
|
"learning_rate": 1.0402659401094154e-05, |
|
"loss": 0.8944, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 7.660903004613179, |
|
"learning_rate": 9.061590105968208e-06, |
|
"loss": 0.8973, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 5.302988730231839, |
|
"learning_rate": 7.73740997570278e-06, |
|
"loss": 0.8763, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6538461538461539, |
|
"grad_norm": 4.5745469835172825, |
|
"learning_rate": 6.453951129574644e-06, |
|
"loss": 0.85, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.6923076923076923, |
|
"grad_norm": 2.9833461950622047, |
|
"learning_rate": 5.234312799786921e-06, |
|
"loss": 0.8283, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7307692307692307, |
|
"grad_norm": 1.3718638590118337, |
|
"learning_rate": 4.100445599768774e-06, |
|
"loss": 0.8142, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 1.0682148259587978, |
|
"learning_rate": 3.0727564649040066e-06, |
|
"loss": 0.8049, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8076923076923077, |
|
"grad_norm": 0.9650280690913742, |
|
"learning_rate": 2.1697413758237785e-06, |
|
"loss": 0.8029, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.8461538461538461, |
|
"grad_norm": 1.0735752809829815, |
|
"learning_rate": 1.407652474377832e-06, |
|
"loss": 0.793, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8846153846153846, |
|
"grad_norm": 0.7668233578378257, |
|
"learning_rate": 8.002055634117578e-07, |
|
"loss": 0.7941, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 0.9841012840352416, |
|
"learning_rate": 3.5833325466437697e-07, |
|
"loss": 0.7823, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 0.7158242209277041, |
|
"learning_rate": 8.99882075409153e-08, |
|
"loss": 0.776, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.8276248743240251, |
|
"learning_rate": 0.0, |
|
"loss": 0.7805, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.284010648727417, |
|
"eval_runtime": 0.4599, |
|
"eval_samples_per_second": 26.092, |
|
"eval_steps_per_second": 2.174, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 130, |
|
"total_flos": 17867533713408.0, |
|
"train_loss": 2.5464949974646935, |
|
"train_runtime": 907.3672, |
|
"train_samples_per_second": 36.642, |
|
"train_steps_per_second": 0.143 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 130, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 17867533713408.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|