{ "best_metric": 0.017509404569864273, "best_model_checkpoint": "/kaggle/working/output/checkpoint-77", "epoch": 26.857142857142858, "eval_steps": 500, "global_step": 94, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8571428571428571, "eval_LCC": 0.1969856506206926, "eval_SROCC": 0.10444177671068428, "eval_loss": 0.14835341274738312, "eval_runtime": 35.5463, "eval_samples_per_second": 1.407, "eval_steps_per_second": 0.056, "step": 3 }, { "epoch": 2.0, "eval_LCC": 0.1712787386505908, "eval_SROCC": 0.11068427370948379, "eval_loss": 0.0292142815887928, "eval_runtime": 34.8973, "eval_samples_per_second": 1.433, "eval_steps_per_second": 0.057, "step": 7 }, { "epoch": 2.857142857142857, "grad_norm": 7.9004807472229, "learning_rate": 1.3333333333333333e-05, "loss": 0.1442, "step": 10 }, { "epoch": 2.857142857142857, "eval_LCC": 0.11784921848546775, "eval_SROCC": 0.027899159663865542, "eval_loss": 0.05484883487224579, "eval_runtime": 35.127, "eval_samples_per_second": 1.423, "eval_steps_per_second": 0.057, "step": 10 }, { "epoch": 4.0, "eval_LCC": 0.06886859888985782, "eval_SROCC": -0.06276110444177671, "eval_loss": 0.030970515683293343, "eval_runtime": 34.9536, "eval_samples_per_second": 1.43, "eval_steps_per_second": 0.057, "step": 14 }, { "epoch": 4.857142857142857, "eval_LCC": 0.0550252891230114, "eval_SROCC": -0.06180072028811524, "eval_loss": 0.04757826402783394, "eval_runtime": 35.0545, "eval_samples_per_second": 1.426, "eval_steps_per_second": 0.057, "step": 17 }, { "epoch": 5.714285714285714, "grad_norm": 2.093867301940918, "learning_rate": 1.925925925925926e-05, "loss": 0.0425, "step": 20 }, { "epoch": 6.0, "eval_LCC": 0.005142275506506862, "eval_SROCC": -0.09781512605042017, "eval_loss": 0.029744217172265053, "eval_runtime": 34.9107, "eval_samples_per_second": 1.432, "eval_steps_per_second": 0.057, "step": 21 }, { "epoch": 6.857142857142857, "eval_LCC": -0.0038686428258717766, "eval_SROCC": -0.09061224489795917, "eval_loss": 0.025907116010785103, "eval_runtime": 34.9938, "eval_samples_per_second": 1.429, "eval_steps_per_second": 0.057, "step": 24 }, { "epoch": 8.0, "eval_LCC": 0.007887024520170878, "eval_SROCC": -0.027034813925570226, "eval_loss": 0.029399558901786804, "eval_runtime": 34.9296, "eval_samples_per_second": 1.431, "eval_steps_per_second": 0.057, "step": 28 }, { "epoch": 8.571428571428571, "grad_norm": 1.0898091793060303, "learning_rate": 1.7777777777777777e-05, "loss": 0.0242, "step": 30 }, { "epoch": 8.857142857142858, "eval_LCC": 0.032333468238550225, "eval_SROCC": -0.014645858343337334, "eval_loss": 0.023686964064836502, "eval_runtime": 35.0539, "eval_samples_per_second": 1.426, "eval_steps_per_second": 0.057, "step": 31 }, { "epoch": 10.0, "eval_LCC": 0.08086510887243141, "eval_SROCC": 0.015222088835534215, "eval_loss": 0.02260431833565235, "eval_runtime": 35.1001, "eval_samples_per_second": 1.424, "eval_steps_per_second": 0.057, "step": 35 }, { "epoch": 10.857142857142858, "eval_LCC": 0.11583787359004848, "eval_SROCC": 0.03385354141656663, "eval_loss": 0.023615412414073944, "eval_runtime": 34.9014, "eval_samples_per_second": 1.433, "eval_steps_per_second": 0.057, "step": 38 }, { "epoch": 11.428571428571429, "grad_norm": 0.41205185651779175, "learning_rate": 1.6296296296296297e-05, "loss": 0.0146, "step": 40 }, { "epoch": 12.0, "eval_LCC": 0.17529865995122038, "eval_SROCC": 0.05623049219687875, "eval_loss": 0.021260255947709084, "eval_runtime": 34.8745, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.057, "step": 42 }, { "epoch": 12.857142857142858, "eval_LCC": 0.22410038594433127, "eval_SROCC": 0.06775510204081632, "eval_loss": 0.019894730299711227, "eval_runtime": 34.9972, "eval_samples_per_second": 1.429, "eval_steps_per_second": 0.057, "step": 45 }, { "epoch": 14.0, "eval_LCC": 0.27017897046913114, "eval_SROCC": 0.0981032412965186, "eval_loss": 0.020490916445851326, "eval_runtime": 34.9294, "eval_samples_per_second": 1.431, "eval_steps_per_second": 0.057, "step": 49 }, { "epoch": 14.285714285714286, "grad_norm": 0.5113194584846497, "learning_rate": 1.4814814814814815e-05, "loss": 0.0116, "step": 50 }, { "epoch": 14.857142857142858, "eval_LCC": 0.3000317546087649, "eval_SROCC": 0.12451380552220888, "eval_loss": 0.019025439396500587, "eval_runtime": 35.0048, "eval_samples_per_second": 1.428, "eval_steps_per_second": 0.057, "step": 52 }, { "epoch": 16.0, "eval_LCC": 0.35113038666324353, "eval_SROCC": 0.1594717887154862, "eval_loss": 0.019477322697639465, "eval_runtime": 34.8356, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.057, "step": 56 }, { "epoch": 16.857142857142858, "eval_LCC": 0.3803764092219919, "eval_SROCC": 0.1829051620648259, "eval_loss": 0.019361073151230812, "eval_runtime": 34.9637, "eval_samples_per_second": 1.43, "eval_steps_per_second": 0.057, "step": 59 }, { "epoch": 17.142857142857142, "grad_norm": 0.8568110466003418, "learning_rate": 1.3333333333333333e-05, "loss": 0.0096, "step": 60 }, { "epoch": 18.0, "eval_LCC": 0.4030357760137721, "eval_SROCC": 0.21440576230492195, "eval_loss": 0.018257679417729378, "eval_runtime": 34.9221, "eval_samples_per_second": 1.432, "eval_steps_per_second": 0.057, "step": 63 }, { "epoch": 18.857142857142858, "eval_LCC": 0.4085909204754596, "eval_SROCC": 0.21200480192076832, "eval_loss": 0.01950741559267044, "eval_runtime": 34.7591, "eval_samples_per_second": 1.438, "eval_steps_per_second": 0.058, "step": 66 }, { "epoch": 20.0, "grad_norm": 0.403394877910614, "learning_rate": 1.1851851851851852e-05, "loss": 0.0075, "step": 70 }, { "epoch": 20.0, "eval_LCC": 0.41267287372763267, "eval_SROCC": 0.216422569027611, "eval_loss": 0.018830113112926483, "eval_runtime": 34.862, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.057, "step": 70 }, { "epoch": 20.857142857142858, "eval_LCC": 0.4223673737494469, "eval_SROCC": 0.2221848739495798, "eval_loss": 0.020868387073278427, "eval_runtime": 35.0558, "eval_samples_per_second": 1.426, "eval_steps_per_second": 0.057, "step": 73 }, { "epoch": 22.0, "eval_LCC": 0.43043752892141784, "eval_SROCC": 0.2288115246098439, "eval_loss": 0.017509404569864273, "eval_runtime": 34.8413, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.057, "step": 77 }, { "epoch": 22.857142857142858, "grad_norm": 0.4386737048625946, "learning_rate": 1.037037037037037e-05, "loss": 0.0076, "step": 80 }, { "epoch": 22.857142857142858, "eval_LCC": 0.4326080523141859, "eval_SROCC": 0.24321728691476588, "eval_loss": 0.021145235747098923, "eval_runtime": 34.9315, "eval_samples_per_second": 1.431, "eval_steps_per_second": 0.057, "step": 80 }, { "epoch": 24.0, "eval_LCC": 0.43274315513728545, "eval_SROCC": 0.23457382953181274, "eval_loss": 0.018896810710430145, "eval_runtime": 35.0475, "eval_samples_per_second": 1.427, "eval_steps_per_second": 0.057, "step": 84 }, { "epoch": 24.857142857142858, "eval_LCC": 0.4313378675752595, "eval_SROCC": 0.2293877551020408, "eval_loss": 0.018819700926542282, "eval_runtime": 34.8537, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.057, "step": 87 }, { "epoch": 25.714285714285715, "grad_norm": 0.4886440336704254, "learning_rate": 8.888888888888888e-06, "loss": 0.006, "step": 90 }, { "epoch": 26.0, "eval_LCC": 0.43430975581338493, "eval_SROCC": 0.23899159663865546, "eval_loss": 0.022326407954096794, "eval_runtime": 35.0745, "eval_samples_per_second": 1.426, "eval_steps_per_second": 0.057, "step": 91 }, { "epoch": 26.857142857142858, "eval_LCC": 0.4398680198859831, "eval_SROCC": 0.2510924369747899, "eval_loss": 0.0202498622238636, "eval_runtime": 34.9908, "eval_samples_per_second": 1.429, "eval_steps_per_second": 0.057, "step": 94 }, { "epoch": 26.857142857142858, "step": 94, "total_flos": 7.519124344516116e+17, "train_loss": 0.028724998592379244, "train_runtime": 4511.5827, "train_samples_per_second": 2.372, "train_steps_per_second": 0.033 } ], "logging_steps": 10, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.519124344516116e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }