|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 83, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012048192771084338, |
|
"grad_norm": 5.144875488861497, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 5.2383, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.060240963855421686, |
|
"grad_norm": 4.400640807642775, |
|
"learning_rate": 0.00011111111111111112, |
|
"loss": 5.1426, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.12048192771084337, |
|
"grad_norm": 2.8424185774279995, |
|
"learning_rate": 0.00019990989662046818, |
|
"loss": 4.9195, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18072289156626506, |
|
"grad_norm": 2.1742733252180533, |
|
"learning_rate": 0.0001967732946933499, |
|
"loss": 4.5773, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.24096385542168675, |
|
"grad_norm": 2.0811257888315406, |
|
"learning_rate": 0.00018929258581495685, |
|
"loss": 4.3453, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.30120481927710846, |
|
"grad_norm": 1.3196766642254727, |
|
"learning_rate": 0.00017780357543184397, |
|
"loss": 4.1766, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3614457831325301, |
|
"grad_norm": 1.7074769394742433, |
|
"learning_rate": 0.00016282199972956425, |
|
"loss": 4.0355, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.42168674698795183, |
|
"grad_norm": 1.786794986516103, |
|
"learning_rate": 0.00014502037448176734, |
|
"loss": 3.9418, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.4819277108433735, |
|
"grad_norm": 1.712555578478064, |
|
"learning_rate": 0.00012519780613851254, |
|
"loss": 3.8637, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5421686746987951, |
|
"grad_norm": 1.1317257705596584, |
|
"learning_rate": 0.00010424412031961484, |
|
"loss": 3.8164, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6024096385542169, |
|
"grad_norm": 0.8808395413039651, |
|
"learning_rate": 8.309991796781511e-05, |
|
"loss": 3.7855, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6626506024096386, |
|
"grad_norm": 0.9070705182297121, |
|
"learning_rate": 6.271435222196916e-05, |
|
"loss": 3.7594, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7228915662650602, |
|
"grad_norm": 0.800430133148163, |
|
"learning_rate": 4.4002521386240466e-05, |
|
"loss": 3.723, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7831325301204819, |
|
"grad_norm": 0.9084524552496485, |
|
"learning_rate": 2.7804390604547557e-05, |
|
"loss": 3.7258, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.8433734939759037, |
|
"grad_norm": 0.7470706061804011, |
|
"learning_rate": 1.4847086226668872e-05, |
|
"loss": 3.6902, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9036144578313253, |
|
"grad_norm": 0.697773024233771, |
|
"learning_rate": 5.71225545389158e-06, |
|
"loss": 3.691, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.963855421686747, |
|
"grad_norm": 0.70777044124217, |
|
"learning_rate": 8.099564741123166e-07, |
|
"loss": 3.6707, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 83, |
|
"total_flos": 7920590782464.0, |
|
"train_loss": 4.042498117469879, |
|
"train_runtime": 34.3031, |
|
"train_samples_per_second": 309.71, |
|
"train_steps_per_second": 2.42 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 83, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7920590782464.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|