|
{ |
|
"best_metric": 0.9463087248322147, |
|
"best_model_checkpoint": "finetuned-for-YogaPoses/checkpoint-200", |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 212, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18867924528301888, |
|
"grad_norm": 4.57135009765625, |
|
"learning_rate": 0.00019056603773584906, |
|
"loss": 0.2108, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.37735849056603776, |
|
"grad_norm": 2.5890703201293945, |
|
"learning_rate": 0.00018113207547169812, |
|
"loss": 0.2414, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5660377358490566, |
|
"grad_norm": 8.661531448364258, |
|
"learning_rate": 0.00017169811320754717, |
|
"loss": 0.2201, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7547169811320755, |
|
"grad_norm": 10.479974746704102, |
|
"learning_rate": 0.00016226415094339625, |
|
"loss": 0.3454, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9433962264150944, |
|
"grad_norm": 11.653122901916504, |
|
"learning_rate": 0.0001528301886792453, |
|
"loss": 0.1532, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1320754716981132, |
|
"grad_norm": 14.78357982635498, |
|
"learning_rate": 0.00014339622641509434, |
|
"loss": 0.4086, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.320754716981132, |
|
"grad_norm": 4.90693473815918, |
|
"learning_rate": 0.0001339622641509434, |
|
"loss": 0.2329, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.509433962264151, |
|
"grad_norm": 9.73343563079834, |
|
"learning_rate": 0.00012452830188679244, |
|
"loss": 0.2904, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.6981132075471699, |
|
"grad_norm": 10.988191604614258, |
|
"learning_rate": 0.00011509433962264151, |
|
"loss": 0.162, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.8867924528301887, |
|
"grad_norm": 8.248613357543945, |
|
"learning_rate": 0.00010566037735849057, |
|
"loss": 0.2812, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8867924528301887, |
|
"eval_accuracy": 0.9328859060402684, |
|
"eval_loss": 0.18304018676280975, |
|
"eval_runtime": 19.9096, |
|
"eval_samples_per_second": 7.484, |
|
"eval_steps_per_second": 0.954, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0754716981132075, |
|
"grad_norm": 0.6969878673553467, |
|
"learning_rate": 9.622641509433963e-05, |
|
"loss": 0.1697, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.2641509433962264, |
|
"grad_norm": 13.04124927520752, |
|
"learning_rate": 8.679245283018869e-05, |
|
"loss": 0.2429, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.452830188679245, |
|
"grad_norm": 0.741187334060669, |
|
"learning_rate": 7.735849056603774e-05, |
|
"loss": 0.2734, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.641509433962264, |
|
"grad_norm": 2.852130889892578, |
|
"learning_rate": 6.79245283018868e-05, |
|
"loss": 0.1988, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.830188679245283, |
|
"grad_norm": 3.7858972549438477, |
|
"learning_rate": 5.849056603773585e-05, |
|
"loss": 0.2767, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.018867924528302, |
|
"grad_norm": 15.999140739440918, |
|
"learning_rate": 4.9056603773584906e-05, |
|
"loss": 0.233, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.207547169811321, |
|
"grad_norm": 8.683842658996582, |
|
"learning_rate": 3.962264150943397e-05, |
|
"loss": 0.1651, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.3962264150943398, |
|
"grad_norm": 13.785407066345215, |
|
"learning_rate": 3.018867924528302e-05, |
|
"loss": 0.139, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.5849056603773586, |
|
"grad_norm": 10.78865909576416, |
|
"learning_rate": 2.0754716981132076e-05, |
|
"loss": 0.2001, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.7735849056603774, |
|
"grad_norm": 7.327868461608887, |
|
"learning_rate": 1.1320754716981132e-05, |
|
"loss": 0.1828, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.7735849056603774, |
|
"eval_accuracy": 0.9463087248322147, |
|
"eval_loss": 0.22337837517261505, |
|
"eval_runtime": 19.9353, |
|
"eval_samples_per_second": 7.474, |
|
"eval_steps_per_second": 0.953, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.9622641509433962, |
|
"grad_norm": 16.639904022216797, |
|
"learning_rate": 1.8867924528301887e-06, |
|
"loss": 0.2423, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 212, |
|
"total_flos": 6760040527650816.0, |
|
"train_loss": 0.23189341304999478, |
|
"train_runtime": 970.6721, |
|
"train_samples_per_second": 3.457, |
|
"train_steps_per_second": 0.218 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 212, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6760040527650816.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|