sam2ai's picture
Upload folder using huggingface_hub
c7e469e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 2,
"global_step": 19,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05263157894736842,
"grad_norm": 11935214993408.0,
"learning_rate": 1e-05,
"loss": 0.7163,
"step": 1
},
{
"epoch": 0.10526315789473684,
"grad_norm": 2096675946496.0,
"learning_rate": 9.924038765061042e-06,
"loss": 0.6972,
"step": 2
},
{
"epoch": 0.10526315789473684,
"eval_loss": 0.5687638521194458,
"eval_runtime": 0.2981,
"eval_samples_per_second": 6.709,
"eval_steps_per_second": 3.354,
"step": 2
},
{
"epoch": 0.15789473684210525,
"grad_norm": 7390838128640.0,
"learning_rate": 9.698463103929542e-06,
"loss": 0.7795,
"step": 3
},
{
"epoch": 0.21052631578947367,
"grad_norm": 3194177454080.0,
"learning_rate": 9.330127018922195e-06,
"loss": 0.6915,
"step": 4
},
{
"epoch": 0.21052631578947367,
"eval_loss": 0.5684303045272827,
"eval_runtime": 0.2966,
"eval_samples_per_second": 6.743,
"eval_steps_per_second": 3.371,
"step": 4
},
{
"epoch": 0.2631578947368421,
"grad_norm": 131253493760.0,
"learning_rate": 8.83022221559489e-06,
"loss": 0.6744,
"step": 5
},
{
"epoch": 0.3157894736842105,
"grad_norm": 491539005440.0,
"learning_rate": 8.213938048432697e-06,
"loss": 0.7911,
"step": 6
},
{
"epoch": 0.3157894736842105,
"eval_loss": 0.5686608552932739,
"eval_runtime": 0.2967,
"eval_samples_per_second": 6.742,
"eval_steps_per_second": 3.371,
"step": 6
},
{
"epoch": 0.3684210526315789,
"grad_norm": 235763302400.0,
"learning_rate": 7.500000000000001e-06,
"loss": 0.5594,
"step": 7
},
{
"epoch": 0.42105263157894735,
"grad_norm": 279532568576.0,
"learning_rate": 6.710100716628345e-06,
"loss": 0.7261,
"step": 8
},
{
"epoch": 0.42105263157894735,
"eval_loss": 0.5699889063835144,
"eval_runtime": 0.2966,
"eval_samples_per_second": 6.744,
"eval_steps_per_second": 3.372,
"step": 8
},
{
"epoch": 0.47368421052631576,
"grad_norm": 175990996992.0,
"learning_rate": 5.8682408883346535e-06,
"loss": 0.6122,
"step": 9
},
{
"epoch": 0.5263157894736842,
"grad_norm": 10130321047552.0,
"learning_rate": 5e-06,
"loss": 0.86,
"step": 10
},
{
"epoch": 0.5263157894736842,
"eval_loss": 0.568690836429596,
"eval_runtime": 0.2978,
"eval_samples_per_second": 6.715,
"eval_steps_per_second": 3.358,
"step": 10
},
{
"epoch": 0.5789473684210527,
"grad_norm": 3898177486848.0,
"learning_rate": 4.131759111665349e-06,
"loss": 0.7364,
"step": 11
},
{
"epoch": 0.631578947368421,
"grad_norm": 1318930219008.0,
"learning_rate": 3.289899283371657e-06,
"loss": 0.6903,
"step": 12
},
{
"epoch": 0.631578947368421,
"eval_loss": 0.5691469311714172,
"eval_runtime": 0.2973,
"eval_samples_per_second": 6.728,
"eval_steps_per_second": 3.364,
"step": 12
},
{
"epoch": 0.6842105263157895,
"grad_norm": 137136914432.0,
"learning_rate": 2.5000000000000015e-06,
"loss": 0.663,
"step": 13
},
{
"epoch": 0.7368421052631579,
"grad_norm": 1604217339904.0,
"learning_rate": 1.7860619515673034e-06,
"loss": 0.5994,
"step": 14
},
{
"epoch": 0.7368421052631579,
"eval_loss": 0.5684089064598083,
"eval_runtime": 0.2979,
"eval_samples_per_second": 6.714,
"eval_steps_per_second": 3.357,
"step": 14
},
{
"epoch": 0.7894736842105263,
"grad_norm": 73108439040.0,
"learning_rate": 1.1697777844051105e-06,
"loss": 0.7457,
"step": 15
},
{
"epoch": 0.8421052631578947,
"grad_norm": 361255075840.0,
"learning_rate": 6.698729810778065e-07,
"loss": 0.7792,
"step": 16
},
{
"epoch": 0.8421052631578947,
"eval_loss": 0.5695986747741699,
"eval_runtime": 0.2976,
"eval_samples_per_second": 6.72,
"eval_steps_per_second": 3.36,
"step": 16
},
{
"epoch": 0.8947368421052632,
"grad_norm": 222161354752.0,
"learning_rate": 3.015368960704584e-07,
"loss": 0.6121,
"step": 17
},
{
"epoch": 0.9473684210526315,
"grad_norm": 4849853267968.0,
"learning_rate": 7.59612349389599e-08,
"loss": 0.7023,
"step": 18
},
{
"epoch": 0.9473684210526315,
"eval_loss": 0.5688631534576416,
"eval_runtime": 0.298,
"eval_samples_per_second": 6.711,
"eval_steps_per_second": 3.355,
"step": 18
},
{
"epoch": 1.0,
"grad_norm": 994095661056.0,
"learning_rate": 0.0,
"loss": 0.7789,
"step": 19
},
{
"epoch": 1.0,
"step": 19,
"total_flos": 6341035089199104.0,
"train_loss": 0.7060548631768477,
"train_runtime": 53.6484,
"train_samples_per_second": 2.759,
"train_steps_per_second": 0.354
}
],
"logging_steps": 1,
"max_steps": 19,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 19,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6341035089199104.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}