|
{ |
|
"best_metric": 83.6764, |
|
"best_model_checkpoint": "models/one_alignment_vs_summary/flan-t5-large/flan_t5_large_alignment_first/checkpoint-6500", |
|
"epoch": 0.9742206235011991, |
|
"eval_steps": 100, |
|
"global_step": 6500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.992505995203837e-05, |
|
"loss": 0.5753, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_F1": 46.8005, |
|
"eval_Precision": 65.9011, |
|
"eval_Recall": 36.284, |
|
"eval_accuracy": 58.536, |
|
"eval_accuracy_negative": 80.7879, |
|
"eval_accuracy_positive": 36.284, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.3608146607875824, |
|
"eval_runtime": 77.8641, |
|
"eval_samples_per_second": 52.81, |
|
"eval_steps_per_second": 3.301, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9850119904076745e-05, |
|
"loss": 0.3529, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_F1": 75.56, |
|
"eval_Precision": 63.7671, |
|
"eval_Recall": 92.7043, |
|
"eval_accuracy": 69.9903, |
|
"eval_accuracy_negative": 47.2763, |
|
"eval_accuracy_positive": 92.7043, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.3245851397514343, |
|
"eval_runtime": 78.1082, |
|
"eval_samples_per_second": 52.645, |
|
"eval_steps_per_second": 3.29, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.977517985611511e-05, |
|
"loss": 0.2974, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_F1": 78.279, |
|
"eval_Precision": 68.5944, |
|
"eval_Recall": 91.1479, |
|
"eval_accuracy": 74.392, |
|
"eval_accuracy_negative": 57.6362, |
|
"eval_accuracy_positive": 91.1479, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.41329512000083923, |
|
"eval_runtime": 78.0252, |
|
"eval_samples_per_second": 52.701, |
|
"eval_steps_per_second": 3.294, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.970023980815348e-05, |
|
"loss": 0.2854, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_F1": 79.6539, |
|
"eval_Precision": 73.2354, |
|
"eval_Recall": 87.3054, |
|
"eval_accuracy": 77.3346, |
|
"eval_accuracy_negative": 67.3638, |
|
"eval_accuracy_positive": 87.3054, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2850329875946045, |
|
"eval_runtime": 77.9552, |
|
"eval_samples_per_second": 52.748, |
|
"eval_steps_per_second": 3.297, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.9625299760191855e-05, |
|
"loss": 0.2569, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_F1": 79.9727, |
|
"eval_Precision": 75.225, |
|
"eval_Recall": 85.3599, |
|
"eval_accuracy": 78.2588, |
|
"eval_accuracy_negative": 71.1576, |
|
"eval_accuracy_positive": 85.3599, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.31440889835357666, |
|
"eval_runtime": 77.9127, |
|
"eval_samples_per_second": 52.777, |
|
"eval_steps_per_second": 3.299, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9550359712230215e-05, |
|
"loss": 0.273, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_F1": 80.6036, |
|
"eval_Precision": 74.1224, |
|
"eval_Recall": 88.3268, |
|
"eval_accuracy": 78.6722, |
|
"eval_accuracy_negative": 69.0175, |
|
"eval_accuracy_positive": 88.3268, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2862711548805237, |
|
"eval_runtime": 77.9247, |
|
"eval_samples_per_second": 52.769, |
|
"eval_steps_per_second": 3.298, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.947541966426858e-05, |
|
"loss": 0.2769, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_F1": 79.2214, |
|
"eval_Precision": 79.26, |
|
"eval_Recall": 79.1829, |
|
"eval_accuracy": 78.4776, |
|
"eval_accuracy_negative": 77.7724, |
|
"eval_accuracy_positive": 79.1829, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.26657262444496155, |
|
"eval_runtime": 77.9599, |
|
"eval_samples_per_second": 52.745, |
|
"eval_steps_per_second": 3.297, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.940047961630696e-05, |
|
"loss": 0.288, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_F1": 80.125, |
|
"eval_Precision": 79.1924, |
|
"eval_Recall": 81.0798, |
|
"eval_accuracy": 79.7909, |
|
"eval_accuracy_negative": 78.5019, |
|
"eval_accuracy_positive": 81.0798, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23273229598999023, |
|
"eval_runtime": 77.9321, |
|
"eval_samples_per_second": 52.764, |
|
"eval_steps_per_second": 3.298, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9325539568345325e-05, |
|
"loss": 0.2597, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_F1": 79.5494, |
|
"eval_Precision": 80.1184, |
|
"eval_Recall": 78.9883, |
|
"eval_accuracy": 78.7451, |
|
"eval_accuracy_negative": 78.5019, |
|
"eval_accuracy_positive": 78.9883, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.22254109382629395, |
|
"eval_runtime": 77.9631, |
|
"eval_samples_per_second": 52.743, |
|
"eval_steps_per_second": 3.296, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.925059952038369e-05, |
|
"loss": 0.2735, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_F1": 81.095, |
|
"eval_Precision": 78.1335, |
|
"eval_Recall": 84.2899, |
|
"eval_accuracy": 79.9368, |
|
"eval_accuracy_negative": 75.5837, |
|
"eval_accuracy_positive": 84.2899, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23284350335597992, |
|
"eval_runtime": 77.9328, |
|
"eval_samples_per_second": 52.763, |
|
"eval_steps_per_second": 3.298, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.917565947242207e-05, |
|
"loss": 0.2591, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_F1": 81.521, |
|
"eval_Precision": 77.891, |
|
"eval_Recall": 85.5058, |
|
"eval_accuracy": 80.5934, |
|
"eval_accuracy_negative": 75.6809, |
|
"eval_accuracy_positive": 85.5058, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.26482418179512024, |
|
"eval_runtime": 78.0339, |
|
"eval_samples_per_second": 52.695, |
|
"eval_steps_per_second": 3.293, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9100719424460435e-05, |
|
"loss": 0.2675, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_F1": 80.1508, |
|
"eval_Precision": 82.9002, |
|
"eval_Recall": 77.5778, |
|
"eval_accuracy": 80.7879, |
|
"eval_accuracy_negative": 83.9981, |
|
"eval_accuracy_positive": 77.5778, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23910629749298096, |
|
"eval_runtime": 78.0093, |
|
"eval_samples_per_second": 52.712, |
|
"eval_steps_per_second": 3.294, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.90257793764988e-05, |
|
"loss": 0.2549, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_F1": 81.6754, |
|
"eval_Precision": 79.9627, |
|
"eval_Recall": 83.463, |
|
"eval_accuracy": 81.1041, |
|
"eval_accuracy_negative": 78.7451, |
|
"eval_accuracy_positive": 83.463, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.22716127336025238, |
|
"eval_runtime": 78.0476, |
|
"eval_samples_per_second": 52.686, |
|
"eval_steps_per_second": 3.293, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.895083932853717e-05, |
|
"loss": 0.2338, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_F1": 80.1995, |
|
"eval_Precision": 82.2927, |
|
"eval_Recall": 78.2101, |
|
"eval_accuracy": 80.4475, |
|
"eval_accuracy_negative": 82.6848, |
|
"eval_accuracy_positive": 78.2101, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.24518640339374542, |
|
"eval_runtime": 77.9981, |
|
"eval_samples_per_second": 52.719, |
|
"eval_steps_per_second": 3.295, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.8875899280575545e-05, |
|
"loss": 0.2747, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_F1": 81.9309, |
|
"eval_Precision": 75.2033, |
|
"eval_Recall": 89.9805, |
|
"eval_accuracy": 79.4504, |
|
"eval_accuracy_negative": 68.9202, |
|
"eval_accuracy_positive": 89.9805, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2351878583431244, |
|
"eval_runtime": 77.8581, |
|
"eval_samples_per_second": 52.814, |
|
"eval_steps_per_second": 3.301, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.880095923261391e-05, |
|
"loss": 0.2389, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_F1": 77.4047, |
|
"eval_Precision": 80.4724, |
|
"eval_Recall": 74.5623, |
|
"eval_accuracy": 76.143, |
|
"eval_accuracy_negative": 77.7237, |
|
"eval_accuracy_positive": 74.5623, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.240137979388237, |
|
"eval_runtime": 77.9704, |
|
"eval_samples_per_second": 52.738, |
|
"eval_steps_per_second": 3.296, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.872601918465228e-05, |
|
"loss": 0.2486, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_F1": 74.3833, |
|
"eval_Precision": 84.0171, |
|
"eval_Recall": 66.7315, |
|
"eval_accuracy": 73.5165, |
|
"eval_accuracy_negative": 80.3016, |
|
"eval_accuracy_positive": 66.7315, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23113004863262177, |
|
"eval_runtime": 77.9471, |
|
"eval_samples_per_second": 52.754, |
|
"eval_steps_per_second": 3.297, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.865107913669065e-05, |
|
"loss": 0.2558, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_F1": 75.5311, |
|
"eval_Precision": 81.9579, |
|
"eval_Recall": 70.0389, |
|
"eval_accuracy": 71.644, |
|
"eval_accuracy_negative": 73.249, |
|
"eval_accuracy_positive": 70.0389, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2416938990354538, |
|
"eval_runtime": 77.8981, |
|
"eval_samples_per_second": 52.787, |
|
"eval_steps_per_second": 3.299, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.8576139088729016e-05, |
|
"loss": 0.2521, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_F1": 75.8988, |
|
"eval_Precision": 83.8729, |
|
"eval_Recall": 69.3093, |
|
"eval_accuracy": 72.0574, |
|
"eval_accuracy_negative": 74.8054, |
|
"eval_accuracy_positive": 69.3093, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2378227710723877, |
|
"eval_runtime": 78.1302, |
|
"eval_samples_per_second": 52.63, |
|
"eval_steps_per_second": 3.289, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.8501199040767384e-05, |
|
"loss": 0.2487, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_F1": 82.5998, |
|
"eval_Precision": 77.7587, |
|
"eval_Recall": 88.0837, |
|
"eval_accuracy": 72.5438, |
|
"eval_accuracy_negative": 57.0039, |
|
"eval_accuracy_positive": 88.0837, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.31695932149887085, |
|
"eval_runtime": 78.0522, |
|
"eval_samples_per_second": 52.683, |
|
"eval_steps_per_second": 3.293, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.842625899280576e-05, |
|
"loss": 0.246, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_F1": 81.1168, |
|
"eval_Precision": 81.6971, |
|
"eval_Recall": 80.5447, |
|
"eval_accuracy": 66.3911, |
|
"eval_accuracy_negative": 52.2374, |
|
"eval_accuracy_positive": 80.5447, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.25484344363212585, |
|
"eval_runtime": 78.0548, |
|
"eval_samples_per_second": 52.681, |
|
"eval_steps_per_second": 3.293, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.8351318944844126e-05, |
|
"loss": 0.2407, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_F1": 74.0905, |
|
"eval_Precision": 86.343, |
|
"eval_Recall": 64.8833, |
|
"eval_accuracy": 71.7656, |
|
"eval_accuracy_negative": 78.6479, |
|
"eval_accuracy_positive": 64.8833, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.26473525166511536, |
|
"eval_runtime": 77.9999, |
|
"eval_samples_per_second": 52.718, |
|
"eval_steps_per_second": 3.295, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.8276378896882494e-05, |
|
"loss": 0.2293, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_F1": 76.173, |
|
"eval_Precision": 82.6038, |
|
"eval_Recall": 70.6712, |
|
"eval_accuracy": 72.7383, |
|
"eval_accuracy_negative": 74.8054, |
|
"eval_accuracy_positive": 70.6712, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23481938242912292, |
|
"eval_runtime": 78.0051, |
|
"eval_samples_per_second": 52.714, |
|
"eval_steps_per_second": 3.295, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.820143884892087e-05, |
|
"loss": 0.199, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_F1": 81.2854, |
|
"eval_Precision": 79.0441, |
|
"eval_Recall": 83.6576, |
|
"eval_accuracy": 77.8453, |
|
"eval_accuracy_negative": 72.0331, |
|
"eval_accuracy_positive": 83.6576, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.29514577984809875, |
|
"eval_runtime": 77.9919, |
|
"eval_samples_per_second": 52.723, |
|
"eval_steps_per_second": 3.295, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8126498800959236e-05, |
|
"loss": 0.247, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_F1": 67.5548, |
|
"eval_Precision": 86.3086, |
|
"eval_Recall": 55.4961, |
|
"eval_accuracy": 70.3551, |
|
"eval_accuracy_negative": 85.214, |
|
"eval_accuracy_positive": 55.4961, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23327696323394775, |
|
"eval_runtime": 78.0614, |
|
"eval_samples_per_second": 52.677, |
|
"eval_steps_per_second": 3.292, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.8051558752997604e-05, |
|
"loss": 0.2659, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_F1": 77.5335, |
|
"eval_Precision": 80.6944, |
|
"eval_Recall": 74.6109, |
|
"eval_accuracy": 68.6284, |
|
"eval_accuracy_negative": 62.6459, |
|
"eval_accuracy_positive": 74.6109, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.26498734951019287, |
|
"eval_runtime": 77.9945, |
|
"eval_samples_per_second": 52.722, |
|
"eval_steps_per_second": 3.295, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.797661870503598e-05, |
|
"loss": 0.2445, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_F1": 81.6411, |
|
"eval_Precision": 77.1998, |
|
"eval_Recall": 86.6245, |
|
"eval_accuracy": 78.0156, |
|
"eval_accuracy_negative": 69.4066, |
|
"eval_accuracy_positive": 86.6245, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.25022661685943604, |
|
"eval_runtime": 78.0413, |
|
"eval_samples_per_second": 52.69, |
|
"eval_steps_per_second": 3.293, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.7901678657074346e-05, |
|
"loss": 0.2397, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_F1": 81.1614, |
|
"eval_Precision": 77.679, |
|
"eval_Recall": 84.9708, |
|
"eval_accuracy": 75.9241, |
|
"eval_accuracy_negative": 66.8774, |
|
"eval_accuracy_positive": 84.9708, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2765290439128876, |
|
"eval_runtime": 77.9454, |
|
"eval_samples_per_second": 52.755, |
|
"eval_steps_per_second": 3.297, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.7826738609112713e-05, |
|
"loss": 0.2134, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_F1": 81.6516, |
|
"eval_Precision": 78.0488, |
|
"eval_Recall": 85.6031, |
|
"eval_accuracy": 77.9426, |
|
"eval_accuracy_negative": 70.2821, |
|
"eval_accuracy_positive": 85.6031, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2863008975982666, |
|
"eval_runtime": 78.0388, |
|
"eval_samples_per_second": 52.692, |
|
"eval_steps_per_second": 3.293, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.775179856115108e-05, |
|
"loss": 0.2349, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_F1": 81.0536, |
|
"eval_Precision": 80.5476, |
|
"eval_Recall": 81.5661, |
|
"eval_accuracy": 77.8696, |
|
"eval_accuracy_negative": 74.1732, |
|
"eval_accuracy_positive": 81.5661, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23327529430389404, |
|
"eval_runtime": 77.998, |
|
"eval_samples_per_second": 52.719, |
|
"eval_steps_per_second": 3.295, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.767685851318945e-05, |
|
"loss": 0.2031, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_F1": 80.2638, |
|
"eval_Precision": 83.8812, |
|
"eval_Recall": 76.9455, |
|
"eval_accuracy": 78.429, |
|
"eval_accuracy_negative": 79.9125, |
|
"eval_accuracy_positive": 76.9455, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2780900001525879, |
|
"eval_runtime": 78.0382, |
|
"eval_samples_per_second": 52.692, |
|
"eval_steps_per_second": 3.293, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.7601918465227817e-05, |
|
"loss": 0.2704, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_F1": 80.8479, |
|
"eval_Precision": 82.958, |
|
"eval_Recall": 78.8424, |
|
"eval_accuracy": 79.8881, |
|
"eval_accuracy_negative": 80.9339, |
|
"eval_accuracy_positive": 78.8424, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.22213193774223328, |
|
"eval_runtime": 77.9705, |
|
"eval_samples_per_second": 52.738, |
|
"eval_steps_per_second": 3.296, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.752697841726619e-05, |
|
"loss": 0.2413, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_F1": 82.2274, |
|
"eval_Precision": 78.6158, |
|
"eval_Recall": 86.1868, |
|
"eval_accuracy": 79.3045, |
|
"eval_accuracy_negative": 72.4222, |
|
"eval_accuracy_positive": 86.1868, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.21565163135528564, |
|
"eval_runtime": 77.9735, |
|
"eval_samples_per_second": 52.736, |
|
"eval_steps_per_second": 3.296, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.745203836930456e-05, |
|
"loss": 0.2549, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_F1": 80.991, |
|
"eval_Precision": 70.8045, |
|
"eval_Recall": 94.6012, |
|
"eval_accuracy": 75.5107, |
|
"eval_accuracy_negative": 56.4202, |
|
"eval_accuracy_positive": 94.6012, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.3653569221496582, |
|
"eval_runtime": 77.9428, |
|
"eval_samples_per_second": 52.757, |
|
"eval_steps_per_second": 3.297, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.7377098321342926e-05, |
|
"loss": 0.2327, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_F1": 80.6348, |
|
"eval_Precision": 82.2458, |
|
"eval_Recall": 79.0856, |
|
"eval_accuracy": 78.7938, |
|
"eval_accuracy_negative": 78.5019, |
|
"eval_accuracy_positive": 79.0856, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.22318707406520844, |
|
"eval_runtime": 77.9908, |
|
"eval_samples_per_second": 52.724, |
|
"eval_steps_per_second": 3.295, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.7302158273381294e-05, |
|
"loss": 0.234, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_F1": 82.8591, |
|
"eval_Precision": 76.8176, |
|
"eval_Recall": 89.9319, |
|
"eval_accuracy": 77.5535, |
|
"eval_accuracy_negative": 65.1751, |
|
"eval_accuracy_positive": 89.9319, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.28604593873023987, |
|
"eval_runtime": 77.9808, |
|
"eval_samples_per_second": 52.731, |
|
"eval_steps_per_second": 3.296, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.722721822541967e-05, |
|
"loss": 0.258, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_F1": 74.4311, |
|
"eval_Precision": 82.7874, |
|
"eval_Recall": 67.607, |
|
"eval_accuracy": 67.9718, |
|
"eval_accuracy_negative": 68.3366, |
|
"eval_accuracy_positive": 67.607, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2335677146911621, |
|
"eval_runtime": 78.0681, |
|
"eval_samples_per_second": 52.672, |
|
"eval_steps_per_second": 3.292, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.7152278177458036e-05, |
|
"loss": 0.2367, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_F1": 69.2561, |
|
"eval_Precision": 89.7754, |
|
"eval_Recall": 56.3716, |
|
"eval_accuracy": 67.4368, |
|
"eval_accuracy_negative": 78.5019, |
|
"eval_accuracy_positive": 56.3716, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2100234031677246, |
|
"eval_runtime": 77.9683, |
|
"eval_samples_per_second": 52.739, |
|
"eval_steps_per_second": 3.296, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.7077338129496404e-05, |
|
"loss": 0.2157, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_F1": 79.2374, |
|
"eval_Precision": 85.5612, |
|
"eval_Recall": 73.784, |
|
"eval_accuracy": 76.8482, |
|
"eval_accuracy_negative": 79.9125, |
|
"eval_accuracy_positive": 73.784, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23944681882858276, |
|
"eval_runtime": 77.9314, |
|
"eval_samples_per_second": 52.764, |
|
"eval_steps_per_second": 3.298, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.700239808153478e-05, |
|
"loss": 0.2431, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_F1": 72.7115, |
|
"eval_Precision": 89.4812, |
|
"eval_Recall": 61.2354, |
|
"eval_accuracy": 74.0272, |
|
"eval_accuracy_negative": 86.8191, |
|
"eval_accuracy_positive": 61.2354, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2376098483800888, |
|
"eval_runtime": 78.0857, |
|
"eval_samples_per_second": 52.66, |
|
"eval_steps_per_second": 3.291, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.6927458033573146e-05, |
|
"loss": 0.2442, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_F1": 82.3446, |
|
"eval_Precision": 79.7901, |
|
"eval_Recall": 85.0681, |
|
"eval_accuracy": 78.4047, |
|
"eval_accuracy_negative": 71.7412, |
|
"eval_accuracy_positive": 85.0681, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2564048767089844, |
|
"eval_runtime": 78.0736, |
|
"eval_samples_per_second": 52.668, |
|
"eval_steps_per_second": 3.292, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.685251798561151e-05, |
|
"loss": 0.2125, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_F1": 81.7357, |
|
"eval_Precision": 82.4024, |
|
"eval_Recall": 81.0798, |
|
"eval_accuracy": 77.4562, |
|
"eval_accuracy_negative": 73.8327, |
|
"eval_accuracy_positive": 81.0798, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.26422980427742004, |
|
"eval_runtime": 77.9729, |
|
"eval_samples_per_second": 52.736, |
|
"eval_steps_per_second": 3.296, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.677757793764988e-05, |
|
"loss": 0.2213, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_F1": 61.6757, |
|
"eval_Precision": 89.3813, |
|
"eval_Recall": 47.0817, |
|
"eval_accuracy": 66.5856, |
|
"eval_accuracy_negative": 86.0895, |
|
"eval_accuracy_positive": 47.0817, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2372213900089264, |
|
"eval_runtime": 78.0051, |
|
"eval_samples_per_second": 52.714, |
|
"eval_steps_per_second": 3.295, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.670263788968825e-05, |
|
"loss": 0.2277, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_F1": 65.461, |
|
"eval_Precision": 83.8269, |
|
"eval_Recall": 53.6965, |
|
"eval_accuracy": 63.2539, |
|
"eval_accuracy_negative": 72.8113, |
|
"eval_accuracy_positive": 53.6965, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.21777845919132233, |
|
"eval_runtime": 78.04, |
|
"eval_samples_per_second": 52.691, |
|
"eval_steps_per_second": 3.293, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.662769784172662e-05, |
|
"loss": 0.1893, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_F1": 61.2192, |
|
"eval_Precision": 91.8288, |
|
"eval_Recall": 45.9144, |
|
"eval_accuracy": 62.1839, |
|
"eval_accuracy_negative": 78.4533, |
|
"eval_accuracy_positive": 45.9144, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.25438836216926575, |
|
"eval_runtime": 77.9949, |
|
"eval_samples_per_second": 52.721, |
|
"eval_steps_per_second": 3.295, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.655275779376499e-05, |
|
"loss": 0.2577, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_F1": 48.5335, |
|
"eval_Precision": 86.0149, |
|
"eval_Recall": 33.8035, |
|
"eval_accuracy": 56.9309, |
|
"eval_accuracy_negative": 80.0584, |
|
"eval_accuracy_positive": 33.8035, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.20975889265537262, |
|
"eval_runtime": 77.9509, |
|
"eval_samples_per_second": 52.751, |
|
"eval_steps_per_second": 3.297, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.647781774580336e-05, |
|
"loss": 0.2255, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_F1": 48.856, |
|
"eval_Precision": 88.4076, |
|
"eval_Recall": 33.7549, |
|
"eval_accuracy": 59.3872, |
|
"eval_accuracy_negative": 85.0195, |
|
"eval_accuracy_positive": 33.7549, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2087894082069397, |
|
"eval_runtime": 78.0665, |
|
"eval_samples_per_second": 52.673, |
|
"eval_steps_per_second": 3.292, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.640287769784173e-05, |
|
"loss": 0.2052, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_F1": 62.0057, |
|
"eval_Precision": 88.6878, |
|
"eval_Recall": 47.6654, |
|
"eval_accuracy": 64.6887, |
|
"eval_accuracy_negative": 81.7121, |
|
"eval_accuracy_positive": 47.6654, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2096925675868988, |
|
"eval_runtime": 78.1278, |
|
"eval_samples_per_second": 52.632, |
|
"eval_steps_per_second": 3.289, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.63279376498801e-05, |
|
"loss": 0.1803, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_F1": 76.7251, |
|
"eval_Precision": 87.782, |
|
"eval_Recall": 68.142, |
|
"eval_accuracy": 75.8025, |
|
"eval_accuracy_negative": 83.463, |
|
"eval_accuracy_positive": 68.142, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23566679656505585, |
|
"eval_runtime": 78.1099, |
|
"eval_samples_per_second": 52.644, |
|
"eval_steps_per_second": 3.29, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.625299760191847e-05, |
|
"loss": 0.2405, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_F1": 79.0246, |
|
"eval_Precision": 85.7224, |
|
"eval_Recall": 73.2977, |
|
"eval_accuracy": 75.8998, |
|
"eval_accuracy_negative": 78.5019, |
|
"eval_accuracy_positive": 73.2977, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.22571609914302826, |
|
"eval_runtime": 77.9758, |
|
"eval_samples_per_second": 52.734, |
|
"eval_steps_per_second": 3.296, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.617805755395684e-05, |
|
"loss": 0.195, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_F1": 38.7195, |
|
"eval_Precision": 89.4366, |
|
"eval_Recall": 24.7082, |
|
"eval_accuracy": 55.0097, |
|
"eval_accuracy_negative": 85.3113, |
|
"eval_accuracy_positive": 24.7082, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.25449925661087036, |
|
"eval_runtime": 78.1451, |
|
"eval_samples_per_second": 52.62, |
|
"eval_steps_per_second": 3.289, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.610311750599521e-05, |
|
"loss": 0.2136, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_F1": 72.9004, |
|
"eval_Precision": 89.638, |
|
"eval_Recall": 61.43, |
|
"eval_accuracy": 70.1605, |
|
"eval_accuracy_negative": 78.8911, |
|
"eval_accuracy_positive": 61.43, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23430722951889038, |
|
"eval_runtime": 78.0577, |
|
"eval_samples_per_second": 52.679, |
|
"eval_steps_per_second": 3.292, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.602817745803358e-05, |
|
"loss": 0.205, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_F1": 61.5385, |
|
"eval_Precision": 88.8073, |
|
"eval_Recall": 47.0817, |
|
"eval_accuracy": 63.6916, |
|
"eval_accuracy_negative": 80.3016, |
|
"eval_accuracy_positive": 47.0817, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.20516321063041687, |
|
"eval_runtime": 78.06, |
|
"eval_samples_per_second": 52.677, |
|
"eval_steps_per_second": 3.292, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.595323741007194e-05, |
|
"loss": 0.227, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_F1": 71.5575, |
|
"eval_Precision": 89.1226, |
|
"eval_Recall": 59.7763, |
|
"eval_accuracy": 70.1119, |
|
"eval_accuracy_negative": 80.4475, |
|
"eval_accuracy_positive": 59.7763, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.20273716747760773, |
|
"eval_runtime": 78.0817, |
|
"eval_samples_per_second": 52.663, |
|
"eval_steps_per_second": 3.291, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5878297362110315e-05, |
|
"loss": 0.221, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_F1": 70.1068, |
|
"eval_Precision": 89.8176, |
|
"eval_Recall": 57.4903, |
|
"eval_accuracy": 68.3123, |
|
"eval_accuracy_negative": 79.1342, |
|
"eval_accuracy_positive": 57.4903, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.20614123344421387, |
|
"eval_runtime": 78.0595, |
|
"eval_samples_per_second": 52.678, |
|
"eval_steps_per_second": 3.292, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.580335731414868e-05, |
|
"loss": 0.2148, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_F1": 78.0293, |
|
"eval_Precision": 86.2272, |
|
"eval_Recall": 71.2549, |
|
"eval_accuracy": 71.6683, |
|
"eval_accuracy_negative": 72.0817, |
|
"eval_accuracy_positive": 71.2549, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23381440341472626, |
|
"eval_runtime": 78.0578, |
|
"eval_samples_per_second": 52.679, |
|
"eval_steps_per_second": 3.292, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.572841726618705e-05, |
|
"loss": 0.203, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_F1": 58.205, |
|
"eval_Precision": 93.4409, |
|
"eval_Recall": 42.2665, |
|
"eval_accuracy": 60.4572, |
|
"eval_accuracy_negative": 78.6479, |
|
"eval_accuracy_positive": 42.2665, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.2337663322687149, |
|
"eval_runtime": 78.2027, |
|
"eval_samples_per_second": 52.581, |
|
"eval_steps_per_second": 3.286, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.5653477218225424e-05, |
|
"loss": 0.1912, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_F1": 82.295, |
|
"eval_Precision": 83.3416, |
|
"eval_Recall": 81.2743, |
|
"eval_accuracy": 78.6722, |
|
"eval_accuracy_negative": 76.07, |
|
"eval_accuracy_positive": 81.2743, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23967531323432922, |
|
"eval_runtime": 78.2026, |
|
"eval_samples_per_second": 52.581, |
|
"eval_steps_per_second": 3.286, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.557853717026379e-05, |
|
"loss": 0.2292, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_F1": 77.3544, |
|
"eval_Precision": 87.8245, |
|
"eval_Recall": 69.1148, |
|
"eval_accuracy": 71.8142, |
|
"eval_accuracy_negative": 74.5136, |
|
"eval_accuracy_positive": 69.1148, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.21783529222011566, |
|
"eval_runtime": 78.0505, |
|
"eval_samples_per_second": 52.684, |
|
"eval_steps_per_second": 3.293, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.550359712230216e-05, |
|
"loss": 0.2366, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_F1": 73.1382, |
|
"eval_Precision": 90.4659, |
|
"eval_Recall": 61.3813, |
|
"eval_accuracy": 69.3337, |
|
"eval_accuracy_negative": 77.286, |
|
"eval_accuracy_positive": 61.3813, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.21324363350868225, |
|
"eval_runtime": 78.0489, |
|
"eval_samples_per_second": 52.685, |
|
"eval_steps_per_second": 3.293, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.542865707434053e-05, |
|
"loss": 0.2172, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_F1": 82.6628, |
|
"eval_Precision": 81.4151, |
|
"eval_Recall": 83.9494, |
|
"eval_accuracy": 80.0584, |
|
"eval_accuracy_negative": 76.1673, |
|
"eval_accuracy_positive": 83.9494, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.21681031584739685, |
|
"eval_runtime": 77.9889, |
|
"eval_samples_per_second": 52.725, |
|
"eval_steps_per_second": 3.295, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.53537170263789e-05, |
|
"loss": 0.2176, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_F1": 81.7284, |
|
"eval_Precision": 82.999, |
|
"eval_Recall": 80.4961, |
|
"eval_accuracy": 79.2802, |
|
"eval_accuracy_negative": 78.0642, |
|
"eval_accuracy_positive": 80.4961, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.19668923318386078, |
|
"eval_runtime": 78.1339, |
|
"eval_samples_per_second": 52.628, |
|
"eval_steps_per_second": 3.289, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.527877697841727e-05, |
|
"loss": 0.1703, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_F1": 82.7941, |
|
"eval_Precision": 83.4486, |
|
"eval_Recall": 82.1498, |
|
"eval_accuracy": 79.9125, |
|
"eval_accuracy_negative": 77.6751, |
|
"eval_accuracy_positive": 82.1498, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.23434630036354065, |
|
"eval_runtime": 77.9722, |
|
"eval_samples_per_second": 52.737, |
|
"eval_steps_per_second": 3.296, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.520383693045564e-05, |
|
"loss": 0.2234, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_F1": 83.6217, |
|
"eval_Precision": 81.5534, |
|
"eval_Recall": 85.7977, |
|
"eval_accuracy": 80.2529, |
|
"eval_accuracy_negative": 74.7082, |
|
"eval_accuracy_positive": 85.7977, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.22015611827373505, |
|
"eval_runtime": 78.0553, |
|
"eval_samples_per_second": 52.681, |
|
"eval_steps_per_second": 3.293, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.512889688249401e-05, |
|
"loss": 0.2047, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_F1": 83.6764, |
|
"eval_Precision": 82.3751, |
|
"eval_Recall": 85.0195, |
|
"eval_accuracy": 79.1586, |
|
"eval_accuracy_negative": 73.2977, |
|
"eval_accuracy_positive": 85.0195, |
|
"eval_gen_len": 3.0, |
|
"eval_loss": 0.20958371460437775, |
|
"eval_runtime": 77.9877, |
|
"eval_samples_per_second": 52.726, |
|
"eval_steps_per_second": 3.295, |
|
"step": 6500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 66720, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"total_flos": 1.83775713030144e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|