{ "best_metric": 0.021110303699970245, "best_model_checkpoint": "/kaggle/working/output/checkpoint-56", "epoch": 20.857142857142858, "eval_steps": 500, "global_step": 73, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8571428571428571, "eval_LCC": 0.19761129838843355, "eval_SROCC": 0.1086674669867947, "eval_loss": 0.17467159032821655, "eval_runtime": 35.5195, "eval_samples_per_second": 1.408, "eval_steps_per_second": 0.056, "step": 3 }, { "epoch": 2.0, "eval_LCC": 0.1898407031739211, "eval_SROCC": 0.10424969987995197, "eval_loss": 0.056969162076711655, "eval_runtime": 35.417, "eval_samples_per_second": 1.412, "eval_steps_per_second": 0.056, "step": 7 }, { "epoch": 2.857142857142857, "grad_norm": 3.666994094848633, "learning_rate": 6.666666666666667e-06, "loss": 0.1599, "step": 10 }, { "epoch": 2.857142857142857, "eval_LCC": 0.1686459812348507, "eval_SROCC": 0.11260504201680673, "eval_loss": 0.031988270580768585, "eval_runtime": 35.3188, "eval_samples_per_second": 1.416, "eval_steps_per_second": 0.057, "step": 10 }, { "epoch": 4.0, "eval_LCC": 0.12351226835758868, "eval_SROCC": 0.02645858343337335, "eval_loss": 0.0510590560734272, "eval_runtime": 35.2516, "eval_samples_per_second": 1.418, "eval_steps_per_second": 0.057, "step": 14 }, { "epoch": 4.857142857142857, "eval_LCC": 0.1029176675867897, "eval_SROCC": -0.00043217286914765904, "eval_loss": 0.02736870013177395, "eval_runtime": 35.3522, "eval_samples_per_second": 1.414, "eval_steps_per_second": 0.057, "step": 17 }, { "epoch": 5.714285714285714, "grad_norm": 2.3310303688049316, "learning_rate": 9.966191788709716e-06, "loss": 0.0602, "step": 20 }, { "epoch": 6.0, "eval_LCC": 0.08996101890143099, "eval_SROCC": -0.04057623049219687, "eval_loss": 0.03745032474398613, "eval_runtime": 35.2712, "eval_samples_per_second": 1.418, "eval_steps_per_second": 0.057, "step": 21 }, { "epoch": 6.857142857142857, "eval_LCC": 0.08304152415159055, "eval_SROCC": -0.05334933973589436, "eval_loss": 0.03062591142952442, "eval_runtime": 35.089, "eval_samples_per_second": 1.425, "eval_steps_per_second": 0.057, "step": 24 }, { "epoch": 8.0, "eval_LCC": 0.07141231150294015, "eval_SROCC": -0.07255702280912364, "eval_loss": 0.02552003413438797, "eval_runtime": 35.0759, "eval_samples_per_second": 1.425, "eval_steps_per_second": 0.057, "step": 28 }, { "epoch": 8.571428571428571, "grad_norm": 1.1723262071609497, "learning_rate": 9.698463103929542e-06, "loss": 0.029, "step": 30 }, { "epoch": 8.857142857142858, "eval_LCC": 0.07341013768251957, "eval_SROCC": -0.056806722689075634, "eval_loss": 0.024663101881742477, "eval_runtime": 35.4424, "eval_samples_per_second": 1.411, "eval_steps_per_second": 0.056, "step": 31 }, { "epoch": 10.0, "eval_LCC": 0.09004418149637736, "eval_SROCC": -0.042881152460984395, "eval_loss": 0.02926880680024624, "eval_runtime": 35.3398, "eval_samples_per_second": 1.415, "eval_steps_per_second": 0.057, "step": 35 }, { "epoch": 10.857142857142858, "eval_LCC": 0.09815228490732507, "eval_SROCC": -0.03174069627851141, "eval_loss": 0.025934694334864616, "eval_runtime": 35.299, "eval_samples_per_second": 1.416, "eval_steps_per_second": 0.057, "step": 38 }, { "epoch": 11.428571428571429, "grad_norm": 0.726739764213562, "learning_rate": 9.177439057064684e-06, "loss": 0.0199, "step": 40 }, { "epoch": 12.0, "eval_LCC": 0.12879803314702723, "eval_SROCC": -0.007250900360144057, "eval_loss": 0.02379768155515194, "eval_runtime": 35.6028, "eval_samples_per_second": 1.404, "eval_steps_per_second": 0.056, "step": 42 }, { "epoch": 12.857142857142858, "eval_LCC": 0.15936463360358263, "eval_SROCC": 0.02156062424969988, "eval_loss": 0.02426682412624359, "eval_runtime": 35.4009, "eval_samples_per_second": 1.412, "eval_steps_per_second": 0.056, "step": 45 }, { "epoch": 14.0, "eval_LCC": 0.18103273074335755, "eval_SROCC": 0.0453781512605042, "eval_loss": 0.02589680254459381, "eval_runtime": 35.429, "eval_samples_per_second": 1.411, "eval_steps_per_second": 0.056, "step": 49 }, { "epoch": 14.285714285714286, "grad_norm": 0.8898158669471741, "learning_rate": 8.43120818934367e-06, "loss": 0.0161, "step": 50 }, { "epoch": 14.857142857142858, "eval_LCC": 0.1954172655149359, "eval_SROCC": 0.056806722689075634, "eval_loss": 0.02237752452492714, "eval_runtime": 35.3401, "eval_samples_per_second": 1.415, "eval_steps_per_second": 0.057, "step": 52 }, { "epoch": 16.0, "eval_LCC": 0.23162353354414347, "eval_SROCC": 0.08955582232893158, "eval_loss": 0.021110303699970245, "eval_runtime": 35.3825, "eval_samples_per_second": 1.413, "eval_steps_per_second": 0.057, "step": 56 }, { "epoch": 16.857142857142858, "eval_LCC": 0.25437943511040334, "eval_SROCC": 0.10012004801920767, "eval_loss": 0.02233021892607212, "eval_runtime": 35.3868, "eval_samples_per_second": 1.413, "eval_steps_per_second": 0.057, "step": 59 }, { "epoch": 17.142857142857142, "grad_norm": 1.1253899335861206, "learning_rate": 7.500000000000001e-06, "loss": 0.0132, "step": 60 }, { "epoch": 18.0, "eval_LCC": 0.2680706396992355, "eval_SROCC": 0.0981032412965186, "eval_loss": 0.02166852541267872, "eval_runtime": 35.412, "eval_samples_per_second": 1.412, "eval_steps_per_second": 0.056, "step": 63 }, { "epoch": 18.857142857142858, "eval_LCC": 0.27459457148225125, "eval_SROCC": 0.11548619447779111, "eval_loss": 0.022057028487324715, "eval_runtime": 35.3202, "eval_samples_per_second": 1.416, "eval_steps_per_second": 0.057, "step": 66 }, { "epoch": 20.0, "grad_norm": 0.4893428087234497, "learning_rate": 6.434016163555452e-06, "loss": 0.0103, "step": 70 }, { "epoch": 20.0, "eval_LCC": 0.28310987276640803, "eval_SROCC": 0.12297719087635053, "eval_loss": 0.022805728018283844, "eval_runtime": 35.2653, "eval_samples_per_second": 1.418, "eval_steps_per_second": 0.057, "step": 70 }, { "epoch": 20.857142857142858, "eval_LCC": 0.2943832703125549, "eval_SROCC": 0.13267707082833133, "eval_loss": 0.024481065571308136, "eval_runtime": 35.4504, "eval_samples_per_second": 1.41, "eval_steps_per_second": 0.056, "step": 73 }, { "epoch": 20.857142857142858, "step": 73, "total_flos": 5.848207823512535e+17, "train_loss": 0.042697800195788685, "train_runtime": 3517.7769, "train_samples_per_second": 3.042, "train_steps_per_second": 0.043 } ], "logging_steps": 10, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.848207823512535e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }