|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9961802902979373, |
|
"eval_steps": 500, |
|
"global_step": 163, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.061115355233002294, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 1.2797, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12223071046600459, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 4.994792902163481e-05, |
|
"loss": 1.1373, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18334606569900688, |
|
"grad_norm": 5.375, |
|
"learning_rate": 4.902824459680752e-05, |
|
"loss": 1.1783, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24446142093200918, |
|
"grad_norm": 3.1875, |
|
"learning_rate": 4.7000305099338396e-05, |
|
"loss": 1.1725, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.30557677616501144, |
|
"grad_norm": 2.5625, |
|
"learning_rate": 4.395764521196406e-05, |
|
"loss": 1.1683, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36669213139801377, |
|
"grad_norm": 2.6875, |
|
"learning_rate": 4.004060158062306e-05, |
|
"loss": 1.1519, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42780748663101603, |
|
"grad_norm": 2.53125, |
|
"learning_rate": 3.542984006530792e-05, |
|
"loss": 1.1442, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48892284186401835, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 3.0338022885994904e-05, |
|
"loss": 1.1354, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5500381970970206, |
|
"grad_norm": 1.90625, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.1259, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6111535523300229, |
|
"grad_norm": 1.8828125, |
|
"learning_rate": 1.9661977114005098e-05, |
|
"loss": 1.0983, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6722689075630253, |
|
"grad_norm": 1.6953125, |
|
"learning_rate": 1.4570159934692085e-05, |
|
"loss": 1.0884, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7333842627960275, |
|
"grad_norm": 1.7265625, |
|
"learning_rate": 9.959398419376932e-06, |
|
"loss": 1.0599, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7944996180290298, |
|
"grad_norm": 1.625, |
|
"learning_rate": 6.042354788035942e-06, |
|
"loss": 1.0558, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8556149732620321, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 2.9996949006616094e-06, |
|
"loss": 1.0508, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9167303284950343, |
|
"grad_norm": 1.6640625, |
|
"learning_rate": 9.71755403192484e-07, |
|
"loss": 1.0264, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9778456837280367, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 5.20709783651957e-08, |
|
"loss": 1.0426, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9961802902979373, |
|
"step": 163, |
|
"total_flos": 9.115042887803863e+17, |
|
"train_loss": 1.117896075629018, |
|
"train_runtime": 6585.7608, |
|
"train_samples_per_second": 3.18, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 163, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.115042887803863e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|