|
{ |
|
"best_metric": 0.017509404569864273, |
|
"best_model_checkpoint": "/kaggle/working/output/checkpoint-77", |
|
"epoch": 26.857142857142858, |
|
"eval_steps": 500, |
|
"global_step": 94, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"eval_LCC": 0.1969856506206926, |
|
"eval_SROCC": 0.10444177671068428, |
|
"eval_loss": 0.14835341274738312, |
|
"eval_runtime": 35.5463, |
|
"eval_samples_per_second": 1.407, |
|
"eval_steps_per_second": 0.056, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_LCC": 0.1712787386505908, |
|
"eval_SROCC": 0.11068427370948379, |
|
"eval_loss": 0.0292142815887928, |
|
"eval_runtime": 34.8973, |
|
"eval_samples_per_second": 1.433, |
|
"eval_steps_per_second": 0.057, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 7.9004807472229, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.1442, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"eval_LCC": 0.11784921848546775, |
|
"eval_SROCC": 0.027899159663865542, |
|
"eval_loss": 0.05484883487224579, |
|
"eval_runtime": 35.127, |
|
"eval_samples_per_second": 1.423, |
|
"eval_steps_per_second": 0.057, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_LCC": 0.06886859888985782, |
|
"eval_SROCC": -0.06276110444177671, |
|
"eval_loss": 0.030970515683293343, |
|
"eval_runtime": 34.9536, |
|
"eval_samples_per_second": 1.43, |
|
"eval_steps_per_second": 0.057, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"eval_LCC": 0.0550252891230114, |
|
"eval_SROCC": -0.06180072028811524, |
|
"eval_loss": 0.04757826402783394, |
|
"eval_runtime": 35.0545, |
|
"eval_samples_per_second": 1.426, |
|
"eval_steps_per_second": 0.057, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 2.093867301940918, |
|
"learning_rate": 1.925925925925926e-05, |
|
"loss": 0.0425, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_LCC": 0.005142275506506862, |
|
"eval_SROCC": -0.09781512605042017, |
|
"eval_loss": 0.029744217172265053, |
|
"eval_runtime": 34.9107, |
|
"eval_samples_per_second": 1.432, |
|
"eval_steps_per_second": 0.057, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 6.857142857142857, |
|
"eval_LCC": -0.0038686428258717766, |
|
"eval_SROCC": -0.09061224489795917, |
|
"eval_loss": 0.025907116010785103, |
|
"eval_runtime": 34.9938, |
|
"eval_samples_per_second": 1.429, |
|
"eval_steps_per_second": 0.057, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_LCC": 0.007887024520170878, |
|
"eval_SROCC": -0.027034813925570226, |
|
"eval_loss": 0.029399558901786804, |
|
"eval_runtime": 34.9296, |
|
"eval_samples_per_second": 1.431, |
|
"eval_steps_per_second": 0.057, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"grad_norm": 1.0898091793060303, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.0242, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.857142857142858, |
|
"eval_LCC": 0.032333468238550225, |
|
"eval_SROCC": -0.014645858343337334, |
|
"eval_loss": 0.023686964064836502, |
|
"eval_runtime": 35.0539, |
|
"eval_samples_per_second": 1.426, |
|
"eval_steps_per_second": 0.057, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_LCC": 0.08086510887243141, |
|
"eval_SROCC": 0.015222088835534215, |
|
"eval_loss": 0.02260431833565235, |
|
"eval_runtime": 35.1001, |
|
"eval_samples_per_second": 1.424, |
|
"eval_steps_per_second": 0.057, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 10.857142857142858, |
|
"eval_LCC": 0.11583787359004848, |
|
"eval_SROCC": 0.03385354141656663, |
|
"eval_loss": 0.023615412414073944, |
|
"eval_runtime": 34.9014, |
|
"eval_samples_per_second": 1.433, |
|
"eval_steps_per_second": 0.057, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 0.41205185651779175, |
|
"learning_rate": 1.6296296296296297e-05, |
|
"loss": 0.0146, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_LCC": 0.17529865995122038, |
|
"eval_SROCC": 0.05623049219687875, |
|
"eval_loss": 0.021260255947709084, |
|
"eval_runtime": 34.8745, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.057, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 12.857142857142858, |
|
"eval_LCC": 0.22410038594433127, |
|
"eval_SROCC": 0.06775510204081632, |
|
"eval_loss": 0.019894730299711227, |
|
"eval_runtime": 34.9972, |
|
"eval_samples_per_second": 1.429, |
|
"eval_steps_per_second": 0.057, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_LCC": 0.27017897046913114, |
|
"eval_SROCC": 0.0981032412965186, |
|
"eval_loss": 0.020490916445851326, |
|
"eval_runtime": 34.9294, |
|
"eval_samples_per_second": 1.431, |
|
"eval_steps_per_second": 0.057, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 0.5113194584846497, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.0116, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 14.857142857142858, |
|
"eval_LCC": 0.3000317546087649, |
|
"eval_SROCC": 0.12451380552220888, |
|
"eval_loss": 0.019025439396500587, |
|
"eval_runtime": 35.0048, |
|
"eval_samples_per_second": 1.428, |
|
"eval_steps_per_second": 0.057, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_LCC": 0.35113038666324353, |
|
"eval_SROCC": 0.1594717887154862, |
|
"eval_loss": 0.019477322697639465, |
|
"eval_runtime": 34.8356, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.057, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 16.857142857142858, |
|
"eval_LCC": 0.3803764092219919, |
|
"eval_SROCC": 0.1829051620648259, |
|
"eval_loss": 0.019361073151230812, |
|
"eval_runtime": 34.9637, |
|
"eval_samples_per_second": 1.43, |
|
"eval_steps_per_second": 0.057, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 0.8568110466003418, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0096, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_LCC": 0.4030357760137721, |
|
"eval_SROCC": 0.21440576230492195, |
|
"eval_loss": 0.018257679417729378, |
|
"eval_runtime": 34.9221, |
|
"eval_samples_per_second": 1.432, |
|
"eval_steps_per_second": 0.057, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 18.857142857142858, |
|
"eval_LCC": 0.4085909204754596, |
|
"eval_SROCC": 0.21200480192076832, |
|
"eval_loss": 0.01950741559267044, |
|
"eval_runtime": 34.7591, |
|
"eval_samples_per_second": 1.438, |
|
"eval_steps_per_second": 0.058, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.403394877910614, |
|
"learning_rate": 1.1851851851851852e-05, |
|
"loss": 0.0075, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_LCC": 0.41267287372763267, |
|
"eval_SROCC": 0.216422569027611, |
|
"eval_loss": 0.018830113112926483, |
|
"eval_runtime": 34.862, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.057, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 20.857142857142858, |
|
"eval_LCC": 0.4223673737494469, |
|
"eval_SROCC": 0.2221848739495798, |
|
"eval_loss": 0.020868387073278427, |
|
"eval_runtime": 35.0558, |
|
"eval_samples_per_second": 1.426, |
|
"eval_steps_per_second": 0.057, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_LCC": 0.43043752892141784, |
|
"eval_SROCC": 0.2288115246098439, |
|
"eval_loss": 0.017509404569864273, |
|
"eval_runtime": 34.8413, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.057, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"grad_norm": 0.4386737048625946, |
|
"learning_rate": 1.037037037037037e-05, |
|
"loss": 0.0076, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"eval_LCC": 0.4326080523141859, |
|
"eval_SROCC": 0.24321728691476588, |
|
"eval_loss": 0.021145235747098923, |
|
"eval_runtime": 34.9315, |
|
"eval_samples_per_second": 1.431, |
|
"eval_steps_per_second": 0.057, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_LCC": 0.43274315513728545, |
|
"eval_SROCC": 0.23457382953181274, |
|
"eval_loss": 0.018896810710430145, |
|
"eval_runtime": 35.0475, |
|
"eval_samples_per_second": 1.427, |
|
"eval_steps_per_second": 0.057, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 24.857142857142858, |
|
"eval_LCC": 0.4313378675752595, |
|
"eval_SROCC": 0.2293877551020408, |
|
"eval_loss": 0.018819700926542282, |
|
"eval_runtime": 34.8537, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.057, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"grad_norm": 0.4886440336704254, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.006, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_LCC": 0.43430975581338493, |
|
"eval_SROCC": 0.23899159663865546, |
|
"eval_loss": 0.022326407954096794, |
|
"eval_runtime": 35.0745, |
|
"eval_samples_per_second": 1.426, |
|
"eval_steps_per_second": 0.057, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 26.857142857142858, |
|
"eval_LCC": 0.4398680198859831, |
|
"eval_SROCC": 0.2510924369747899, |
|
"eval_loss": 0.0202498622238636, |
|
"eval_runtime": 34.9908, |
|
"eval_samples_per_second": 1.429, |
|
"eval_steps_per_second": 0.057, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 26.857142857142858, |
|
"step": 94, |
|
"total_flos": 7.519124344516116e+17, |
|
"train_loss": 0.028724998592379244, |
|
"train_runtime": 4511.5827, |
|
"train_samples_per_second": 2.372, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.519124344516116e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|