flan-base-sql / trainer_state.json
kyryl-georgian's picture
End of training
6da15f5 verified
raw
history blame
4.74 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 5530,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9,
"grad_norm": 0.1664367914199829,
"learning_rate": 0.0009095840867992767,
"loss": 0.4003,
"step": 500
},
{
"epoch": 0.9,
"eval_loss": 0.162927508354187,
"eval_runtime": 3.0318,
"eval_samples_per_second": 2591.854,
"eval_steps_per_second": 20.45,
"step": 500
},
{
"epoch": 1.81,
"grad_norm": 0.1386815905570984,
"learning_rate": 0.0008191681735985533,
"loss": 0.2314,
"step": 1000
},
{
"epoch": 1.81,
"eval_loss": 0.1386137157678604,
"eval_runtime": 3.0434,
"eval_samples_per_second": 2582.007,
"eval_steps_per_second": 20.372,
"step": 1000
},
{
"epoch": 2.71,
"grad_norm": 0.1781063824892044,
"learning_rate": 0.0007287522603978301,
"loss": 0.2065,
"step": 1500
},
{
"epoch": 2.71,
"eval_loss": 0.1289130598306656,
"eval_runtime": 3.0677,
"eval_samples_per_second": 2561.542,
"eval_steps_per_second": 20.211,
"step": 1500
},
{
"epoch": 3.62,
"grad_norm": 0.15570929646492004,
"learning_rate": 0.0006383363471971068,
"loss": 0.187,
"step": 2000
},
{
"epoch": 3.62,
"eval_loss": 0.12326223403215408,
"eval_runtime": 3.0605,
"eval_samples_per_second": 2567.579,
"eval_steps_per_second": 20.258,
"step": 2000
},
{
"epoch": 4.52,
"grad_norm": 0.16776247322559357,
"learning_rate": 0.0005479204339963833,
"loss": 0.1791,
"step": 2500
},
{
"epoch": 4.52,
"eval_loss": 0.1168670803308487,
"eval_runtime": 3.0473,
"eval_samples_per_second": 2578.705,
"eval_steps_per_second": 20.346,
"step": 2500
},
{
"epoch": 5.42,
"grad_norm": 0.1355486512184143,
"learning_rate": 0.0004575045207956601,
"loss": 0.1713,
"step": 3000
},
{
"epoch": 5.42,
"eval_loss": 0.11528698354959488,
"eval_runtime": 3.0013,
"eval_samples_per_second": 2618.163,
"eval_steps_per_second": 20.657,
"step": 3000
},
{
"epoch": 6.33,
"grad_norm": 0.16372531652450562,
"learning_rate": 0.0003670886075949367,
"loss": 0.1661,
"step": 3500
},
{
"epoch": 6.33,
"eval_loss": 0.11218289285898209,
"eval_runtime": 2.9586,
"eval_samples_per_second": 2655.959,
"eval_steps_per_second": 20.956,
"step": 3500
},
{
"epoch": 7.23,
"grad_norm": 0.1596778929233551,
"learning_rate": 0.0002766726943942134,
"loss": 0.1604,
"step": 4000
},
{
"epoch": 7.23,
"eval_loss": 0.1085081547498703,
"eval_runtime": 2.9539,
"eval_samples_per_second": 2660.243,
"eval_steps_per_second": 20.989,
"step": 4000
},
{
"epoch": 8.14,
"grad_norm": 0.15582768619060516,
"learning_rate": 0.00018625678119349006,
"loss": 0.1574,
"step": 4500
},
{
"epoch": 8.14,
"eval_loss": 0.1098729744553566,
"eval_runtime": 2.9739,
"eval_samples_per_second": 2642.311,
"eval_steps_per_second": 20.848,
"step": 4500
},
{
"epoch": 9.04,
"grad_norm": 0.15063905715942383,
"learning_rate": 9.584086799276672e-05,
"loss": 0.1541,
"step": 5000
},
{
"epoch": 9.04,
"eval_loss": 0.10638037323951721,
"eval_runtime": 3.0665,
"eval_samples_per_second": 2562.534,
"eval_steps_per_second": 20.219,
"step": 5000
},
{
"epoch": 9.95,
"grad_norm": 0.14130930602550507,
"learning_rate": 5.4249547920433995e-06,
"loss": 0.1521,
"step": 5500
},
{
"epoch": 9.95,
"eval_loss": 0.1071261540055275,
"eval_runtime": 3.024,
"eval_samples_per_second": 2598.51,
"eval_steps_per_second": 20.502,
"step": 5500
},
{
"epoch": 10.0,
"step": 5530,
"total_flos": 2.2872619342626816e+16,
"train_loss": 0.19658087959772425,
"train_runtime": 755.9435,
"train_samples_per_second": 935.506,
"train_steps_per_second": 7.315
}
],
"logging_steps": 500,
"max_steps": 5530,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2.2872619342626816e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}