diff --git "a/checkpoint-16268/trainer_state.json" "b/checkpoint-16268/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-16268/trainer_state.json" @@ -0,0 +1,24087 @@ +{ + "best_metric": 0.1774035394191742, + "best_model_checkpoint": "Prostate158-PI-CAI-MRI-Tumor-T2W-ADC-HBV-DWI-v01/checkpoint-16268", + "epoch": 98.0, + "eval_steps": 500, + "global_step": 16268, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.030120481927710843, + "grad_norm": 5.825586318969727, + "learning_rate": 2.5100401606425703e-08, + "loss": 0.7396, + "step": 5 + }, + { + "epoch": 0.060240963855421686, + "grad_norm": 2.165442705154419, + "learning_rate": 5.0200803212851406e-08, + "loss": 0.7379, + "step": 10 + }, + { + "epoch": 0.09036144578313253, + "grad_norm": 2.5576512813568115, + "learning_rate": 7.530120481927711e-08, + "loss": 0.7364, + "step": 15 + }, + { + "epoch": 0.12048192771084337, + "grad_norm": 3.243180751800537, + "learning_rate": 1.0040160642570281e-07, + "loss": 0.7588, + "step": 20 + }, + { + "epoch": 0.15060240963855423, + "grad_norm": 2.1289498805999756, + "learning_rate": 1.2550200803212853e-07, + "loss": 0.7046, + "step": 25 + }, + { + "epoch": 0.18072289156626506, + "grad_norm": 3.678800582885742, + "learning_rate": 1.5060240963855423e-07, + "loss": 0.7478, + "step": 30 + }, + { + "epoch": 0.21084337349397592, + "grad_norm": 2.020659923553467, + "learning_rate": 1.7570281124497993e-07, + "loss": 0.7372, + "step": 35 + }, + { + "epoch": 0.24096385542168675, + "grad_norm": 3.0805442333221436, + "learning_rate": 2.0080321285140563e-07, + "loss": 0.7562, + "step": 40 + }, + { + "epoch": 0.2710843373493976, + "grad_norm": 2.0384714603424072, + "learning_rate": 2.2590361445783133e-07, + "loss": 0.728, + "step": 45 + }, + { + "epoch": 0.30120481927710846, + "grad_norm": 2.3842644691467285, + "learning_rate": 2.5100401606425705e-07, + "loss": 0.7374, + "step": 50 + }, + { + "epoch": 0.3313253012048193, + "grad_norm": 2.663119316101074, + "learning_rate": 2.761044176706827e-07, + "loss": 0.7349, + "step": 55 + }, + { + "epoch": 0.3614457831325301, + "grad_norm": 2.26052188873291, + "learning_rate": 3.0120481927710845e-07, + "loss": 0.7327, + "step": 60 + }, + { + "epoch": 0.39156626506024095, + "grad_norm": 2.991396188735962, + "learning_rate": 3.263052208835341e-07, + "loss": 0.7289, + "step": 65 + }, + { + "epoch": 0.42168674698795183, + "grad_norm": 2.195991277694702, + "learning_rate": 3.5140562248995985e-07, + "loss": 0.7252, + "step": 70 + }, + { + "epoch": 0.45180722891566266, + "grad_norm": 2.390545129776001, + "learning_rate": 3.765060240963856e-07, + "loss": 0.7333, + "step": 75 + }, + { + "epoch": 0.4819277108433735, + "grad_norm": 3.8800010681152344, + "learning_rate": 4.0160642570281125e-07, + "loss": 0.7129, + "step": 80 + }, + { + "epoch": 0.5120481927710844, + "grad_norm": 2.3233087062835693, + "learning_rate": 4.26706827309237e-07, + "loss": 0.7251, + "step": 85 + }, + { + "epoch": 0.5421686746987951, + "grad_norm": 2.914829730987549, + "learning_rate": 4.5180722891566265e-07, + "loss": 0.7321, + "step": 90 + }, + { + "epoch": 0.572289156626506, + "grad_norm": 2.2649123668670654, + "learning_rate": 4.769076305220884e-07, + "loss": 0.7101, + "step": 95 + }, + { + "epoch": 0.6024096385542169, + "grad_norm": 2.63425612449646, + "learning_rate": 5.020080321285141e-07, + "loss": 0.7302, + "step": 100 + }, + { + "epoch": 0.6325301204819277, + "grad_norm": 2.2094509601593018, + "learning_rate": 5.271084337349398e-07, + "loss": 0.7194, + "step": 105 + }, + { + "epoch": 0.6626506024096386, + "grad_norm": 2.336904287338257, + "learning_rate": 5.522088353413655e-07, + "loss": 0.7301, + "step": 110 + }, + { + "epoch": 0.6927710843373494, + "grad_norm": 3.619689464569092, + "learning_rate": 5.773092369477911e-07, + "loss": 0.6986, + "step": 115 + }, + { + "epoch": 0.7228915662650602, + "grad_norm": 2.2559053897857666, + "learning_rate": 6.024096385542169e-07, + "loss": 0.7399, + "step": 120 + }, + { + "epoch": 0.7530120481927711, + "grad_norm": 2.235473871231079, + "learning_rate": 6.275100401606426e-07, + "loss": 0.7268, + "step": 125 + }, + { + "epoch": 0.7831325301204819, + "grad_norm": 2.779989242553711, + "learning_rate": 6.526104417670682e-07, + "loss": 0.7044, + "step": 130 + }, + { + "epoch": 0.8132530120481928, + "grad_norm": 2.1756601333618164, + "learning_rate": 6.77710843373494e-07, + "loss": 0.719, + "step": 135 + }, + { + "epoch": 0.8433734939759037, + "grad_norm": 10.11095142364502, + "learning_rate": 7.028112449799197e-07, + "loss": 0.7135, + "step": 140 + }, + { + "epoch": 0.8734939759036144, + "grad_norm": 2.282147169113159, + "learning_rate": 7.279116465863455e-07, + "loss": 0.7205, + "step": 145 + }, + { + "epoch": 0.9036144578313253, + "grad_norm": 2.8020949363708496, + "learning_rate": 7.530120481927712e-07, + "loss": 0.7529, + "step": 150 + }, + { + "epoch": 0.9337349397590361, + "grad_norm": 2.5485680103302, + "learning_rate": 7.781124497991968e-07, + "loss": 0.7155, + "step": 155 + }, + { + "epoch": 0.963855421686747, + "grad_norm": 1.8938534259796143, + "learning_rate": 8.032128514056225e-07, + "loss": 0.7242, + "step": 160 + }, + { + "epoch": 0.9939759036144579, + "grad_norm": 2.8763959407806396, + "learning_rate": 8.283132530120483e-07, + "loss": 0.7217, + "step": 165 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.5422729060165159, + "eval_auc": 0.5260933335706004, + "eval_f1": 0.40551583248212464, + "eval_loss": 0.7027444243431091, + "eval_precision": 0.36725254394079554, + "eval_recall": 0.4526795895096921, + "eval_runtime": 18.6786, + "eval_samples_per_second": 136.145, + "eval_steps_per_second": 0.696, + "step": 166 + }, + { + "epoch": 1.0240963855421688, + "grad_norm": 2.2965047359466553, + "learning_rate": 8.53413654618474e-07, + "loss": 0.7055, + "step": 170 + }, + { + "epoch": 1.0542168674698795, + "grad_norm": 3.0637803077697754, + "learning_rate": 8.785140562248996e-07, + "loss": 0.7046, + "step": 175 + }, + { + "epoch": 1.0843373493975903, + "grad_norm": 2.772043466567993, + "learning_rate": 9.036144578313253e-07, + "loss": 0.6925, + "step": 180 + }, + { + "epoch": 1.1144578313253013, + "grad_norm": 2.482011556625366, + "learning_rate": 9.28714859437751e-07, + "loss": 0.7053, + "step": 185 + }, + { + "epoch": 1.144578313253012, + "grad_norm": 2.894693374633789, + "learning_rate": 9.538152610441769e-07, + "loss": 0.7386, + "step": 190 + }, + { + "epoch": 1.1746987951807228, + "grad_norm": 2.042769432067871, + "learning_rate": 9.789156626506025e-07, + "loss": 0.7142, + "step": 195 + }, + { + "epoch": 1.2048192771084336, + "grad_norm": 2.739431142807007, + "learning_rate": 1.0040160642570282e-06, + "loss": 0.7128, + "step": 200 + }, + { + "epoch": 1.2349397590361446, + "grad_norm": 2.2312636375427246, + "learning_rate": 1.0291164658634539e-06, + "loss": 0.7218, + "step": 205 + }, + { + "epoch": 1.2650602409638554, + "grad_norm": 2.0345458984375, + "learning_rate": 1.0542168674698796e-06, + "loss": 0.6982, + "step": 210 + }, + { + "epoch": 1.2951807228915664, + "grad_norm": 2.9002037048339844, + "learning_rate": 1.0793172690763054e-06, + "loss": 0.7212, + "step": 215 + }, + { + "epoch": 1.3253012048192772, + "grad_norm": 2.2102439403533936, + "learning_rate": 1.104417670682731e-06, + "loss": 0.681, + "step": 220 + }, + { + "epoch": 1.355421686746988, + "grad_norm": 2.3994767665863037, + "learning_rate": 1.1295180722891566e-06, + "loss": 0.707, + "step": 225 + }, + { + "epoch": 1.3855421686746987, + "grad_norm": 2.515076160430908, + "learning_rate": 1.1546184738955822e-06, + "loss": 0.6887, + "step": 230 + }, + { + "epoch": 1.4156626506024097, + "grad_norm": 2.2097108364105225, + "learning_rate": 1.179718875502008e-06, + "loss": 0.6966, + "step": 235 + }, + { + "epoch": 1.4457831325301205, + "grad_norm": 2.8546788692474365, + "learning_rate": 1.2048192771084338e-06, + "loss": 0.6811, + "step": 240 + }, + { + "epoch": 1.4759036144578312, + "grad_norm": 3.2900049686431885, + "learning_rate": 1.2299196787148595e-06, + "loss": 0.7053, + "step": 245 + }, + { + "epoch": 1.5060240963855422, + "grad_norm": 2.3631157875061035, + "learning_rate": 1.2550200803212852e-06, + "loss": 0.6831, + "step": 250 + }, + { + "epoch": 1.536144578313253, + "grad_norm": 1.8485504388809204, + "learning_rate": 1.2801204819277108e-06, + "loss": 0.6957, + "step": 255 + }, + { + "epoch": 1.5662650602409638, + "grad_norm": 2.748507261276245, + "learning_rate": 1.3052208835341365e-06, + "loss": 0.69, + "step": 260 + }, + { + "epoch": 1.5963855421686746, + "grad_norm": 1.7519440650939941, + "learning_rate": 1.3303212851405624e-06, + "loss": 0.6928, + "step": 265 + }, + { + "epoch": 1.6265060240963856, + "grad_norm": 1.7398183345794678, + "learning_rate": 1.355421686746988e-06, + "loss": 0.6695, + "step": 270 + }, + { + "epoch": 1.6566265060240963, + "grad_norm": 2.2140514850616455, + "learning_rate": 1.3805220883534137e-06, + "loss": 0.696, + "step": 275 + }, + { + "epoch": 1.6867469879518073, + "grad_norm": 2.0776941776275635, + "learning_rate": 1.4056224899598394e-06, + "loss": 0.6636, + "step": 280 + }, + { + "epoch": 1.716867469879518, + "grad_norm": 2.1594786643981934, + "learning_rate": 1.430722891566265e-06, + "loss": 0.7105, + "step": 285 + }, + { + "epoch": 1.7469879518072289, + "grad_norm": 2.252272844314575, + "learning_rate": 1.455823293172691e-06, + "loss": 0.6678, + "step": 290 + }, + { + "epoch": 1.7771084337349397, + "grad_norm": 2.4131171703338623, + "learning_rate": 1.4809236947791166e-06, + "loss": 0.6576, + "step": 295 + }, + { + "epoch": 1.8072289156626506, + "grad_norm": 2.6049649715423584, + "learning_rate": 1.5060240963855423e-06, + "loss": 0.6773, + "step": 300 + }, + { + "epoch": 1.8373493975903614, + "grad_norm": 2.995439052581787, + "learning_rate": 1.531124497991968e-06, + "loss": 0.6792, + "step": 305 + }, + { + "epoch": 1.8674698795180724, + "grad_norm": 1.8594603538513184, + "learning_rate": 1.5562248995983937e-06, + "loss": 0.6584, + "step": 310 + }, + { + "epoch": 1.8975903614457832, + "grad_norm": 1.6492047309875488, + "learning_rate": 1.5813253012048193e-06, + "loss": 0.669, + "step": 315 + }, + { + "epoch": 1.927710843373494, + "grad_norm": 2.0496723651885986, + "learning_rate": 1.606425702811245e-06, + "loss": 0.6582, + "step": 320 + }, + { + "epoch": 1.9578313253012047, + "grad_norm": 2.19722056388855, + "learning_rate": 1.6315261044176709e-06, + "loss": 0.6789, + "step": 325 + }, + { + "epoch": 1.9879518072289155, + "grad_norm": 2.4436256885528564, + "learning_rate": 1.6566265060240966e-06, + "loss": 0.6554, + "step": 330 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.6217066456940621, + "eval_auc": 0.6144076786929138, + "eval_f1": 0.5036119711042312, + "eval_loss": 0.653194010257721, + "eval_precision": 0.4599434495758718, + "eval_recall": 0.556442417331813, + "eval_runtime": 20.269, + "eval_samples_per_second": 125.462, + "eval_steps_per_second": 0.641, + "step": 332 + }, + { + "epoch": 2.0180722891566263, + "grad_norm": 1.9626730680465698, + "learning_rate": 1.6817269076305222e-06, + "loss": 0.6859, + "step": 335 + }, + { + "epoch": 2.0481927710843375, + "grad_norm": 3.7429897785186768, + "learning_rate": 1.706827309236948e-06, + "loss": 0.6644, + "step": 340 + }, + { + "epoch": 2.0783132530120483, + "grad_norm": 3.046361207962036, + "learning_rate": 1.7319277108433736e-06, + "loss": 0.6635, + "step": 345 + }, + { + "epoch": 2.108433734939759, + "grad_norm": 2.4592392444610596, + "learning_rate": 1.7570281124497993e-06, + "loss": 0.6551, + "step": 350 + }, + { + "epoch": 2.13855421686747, + "grad_norm": 2.031898260116577, + "learning_rate": 1.782128514056225e-06, + "loss": 0.6567, + "step": 355 + }, + { + "epoch": 2.1686746987951806, + "grad_norm": 2.472118616104126, + "learning_rate": 1.8072289156626506e-06, + "loss": 0.6759, + "step": 360 + }, + { + "epoch": 2.1987951807228914, + "grad_norm": 2.3769283294677734, + "learning_rate": 1.8323293172690763e-06, + "loss": 0.6514, + "step": 365 + }, + { + "epoch": 2.2289156626506026, + "grad_norm": 1.799721121788025, + "learning_rate": 1.857429718875502e-06, + "loss": 0.669, + "step": 370 + }, + { + "epoch": 2.2590361445783134, + "grad_norm": 1.7890353202819824, + "learning_rate": 1.882530120481928e-06, + "loss": 0.6488, + "step": 375 + }, + { + "epoch": 2.289156626506024, + "grad_norm": 2.3873209953308105, + "learning_rate": 1.9076305220883537e-06, + "loss": 0.6731, + "step": 380 + }, + { + "epoch": 2.319277108433735, + "grad_norm": 2.3185174465179443, + "learning_rate": 1.9327309236947794e-06, + "loss": 0.6583, + "step": 385 + }, + { + "epoch": 2.3493975903614457, + "grad_norm": 2.4070496559143066, + "learning_rate": 1.957831325301205e-06, + "loss": 0.6551, + "step": 390 + }, + { + "epoch": 2.3795180722891565, + "grad_norm": 2.2057552337646484, + "learning_rate": 1.9829317269076307e-06, + "loss": 0.6313, + "step": 395 + }, + { + "epoch": 2.4096385542168672, + "grad_norm": 1.937827467918396, + "learning_rate": 2.0080321285140564e-06, + "loss": 0.6206, + "step": 400 + }, + { + "epoch": 2.4397590361445785, + "grad_norm": 4.218434810638428, + "learning_rate": 2.033132530120482e-06, + "loss": 0.679, + "step": 405 + }, + { + "epoch": 2.4698795180722892, + "grad_norm": 1.721917748451233, + "learning_rate": 2.0582329317269078e-06, + "loss": 0.6457, + "step": 410 + }, + { + "epoch": 2.5, + "grad_norm": 1.8327134847640991, + "learning_rate": 2.0833333333333334e-06, + "loss": 0.6613, + "step": 415 + }, + { + "epoch": 2.5301204819277108, + "grad_norm": 2.555305242538452, + "learning_rate": 2.108433734939759e-06, + "loss": 0.6485, + "step": 420 + }, + { + "epoch": 2.5602409638554215, + "grad_norm": 2.2785534858703613, + "learning_rate": 2.133534136546185e-06, + "loss": 0.6364, + "step": 425 + }, + { + "epoch": 2.5903614457831328, + "grad_norm": 1.9908968210220337, + "learning_rate": 2.158634538152611e-06, + "loss": 0.6301, + "step": 430 + }, + { + "epoch": 2.6204819277108435, + "grad_norm": 2.1207611560821533, + "learning_rate": 2.1837349397590366e-06, + "loss": 0.6492, + "step": 435 + }, + { + "epoch": 2.6506024096385543, + "grad_norm": 2.1720712184906006, + "learning_rate": 2.208835341365462e-06, + "loss": 0.6638, + "step": 440 + }, + { + "epoch": 2.680722891566265, + "grad_norm": 2.6269476413726807, + "learning_rate": 2.2339357429718875e-06, + "loss": 0.6382, + "step": 445 + }, + { + "epoch": 2.710843373493976, + "grad_norm": 1.9714411497116089, + "learning_rate": 2.259036144578313e-06, + "loss": 0.6467, + "step": 450 + }, + { + "epoch": 2.7409638554216866, + "grad_norm": 1.945236086845398, + "learning_rate": 2.284136546184739e-06, + "loss": 0.6273, + "step": 455 + }, + { + "epoch": 2.7710843373493974, + "grad_norm": 2.1924173831939697, + "learning_rate": 2.3092369477911645e-06, + "loss": 0.6354, + "step": 460 + }, + { + "epoch": 2.8012048192771086, + "grad_norm": 2.991903066635132, + "learning_rate": 2.33433734939759e-06, + "loss": 0.655, + "step": 465 + }, + { + "epoch": 2.8313253012048194, + "grad_norm": 2.096970796585083, + "learning_rate": 2.359437751004016e-06, + "loss": 0.6365, + "step": 470 + }, + { + "epoch": 2.86144578313253, + "grad_norm": 2.686661958694458, + "learning_rate": 2.384538152610442e-06, + "loss": 0.6234, + "step": 475 + }, + { + "epoch": 2.891566265060241, + "grad_norm": 1.8316818475723267, + "learning_rate": 2.4096385542168676e-06, + "loss": 0.6229, + "step": 480 + }, + { + "epoch": 2.9216867469879517, + "grad_norm": 2.257622241973877, + "learning_rate": 2.4347389558232933e-06, + "loss": 0.6702, + "step": 485 + }, + { + "epoch": 2.9518072289156625, + "grad_norm": 2.138204574584961, + "learning_rate": 2.459839357429719e-06, + "loss": 0.6396, + "step": 490 + }, + { + "epoch": 2.9819277108433733, + "grad_norm": 1.9312093257904053, + "learning_rate": 2.4849397590361446e-06, + "loss": 0.6088, + "step": 495 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.6594573338576485, + "eval_auc": 0.6838651081869463, + "eval_f1": 0.5725567620927937, + "eval_loss": 0.6181133985519409, + "eval_precision": 0.504786771105309, + "eval_recall": 0.661345496009122, + "eval_runtime": 19.5745, + "eval_samples_per_second": 129.914, + "eval_steps_per_second": 0.664, + "step": 498 + }, + { + "epoch": 3.0120481927710845, + "grad_norm": 2.1444907188415527, + "learning_rate": 2.5100401606425703e-06, + "loss": 0.6148, + "step": 500 + }, + { + "epoch": 3.0421686746987953, + "grad_norm": 2.5095620155334473, + "learning_rate": 2.535140562248996e-06, + "loss": 0.6332, + "step": 505 + }, + { + "epoch": 3.072289156626506, + "grad_norm": 1.817351222038269, + "learning_rate": 2.5602409638554217e-06, + "loss": 0.6155, + "step": 510 + }, + { + "epoch": 3.102409638554217, + "grad_norm": 2.1711740493774414, + "learning_rate": 2.5853413654618473e-06, + "loss": 0.6248, + "step": 515 + }, + { + "epoch": 3.1325301204819276, + "grad_norm": 2.5385513305664062, + "learning_rate": 2.610441767068273e-06, + "loss": 0.5962, + "step": 520 + }, + { + "epoch": 3.1626506024096384, + "grad_norm": 3.1502151489257812, + "learning_rate": 2.635542168674699e-06, + "loss": 0.6474, + "step": 525 + }, + { + "epoch": 3.1927710843373496, + "grad_norm": 2.8188350200653076, + "learning_rate": 2.6606425702811248e-06, + "loss": 0.6637, + "step": 530 + }, + { + "epoch": 3.2228915662650603, + "grad_norm": 4.671653747558594, + "learning_rate": 2.6857429718875504e-06, + "loss": 0.6052, + "step": 535 + }, + { + "epoch": 3.253012048192771, + "grad_norm": 2.188119649887085, + "learning_rate": 2.710843373493976e-06, + "loss": 0.6422, + "step": 540 + }, + { + "epoch": 3.283132530120482, + "grad_norm": 3.3826537132263184, + "learning_rate": 2.735943775100402e-06, + "loss": 0.619, + "step": 545 + }, + { + "epoch": 3.3132530120481927, + "grad_norm": 2.027888298034668, + "learning_rate": 2.7610441767068275e-06, + "loss": 0.6217, + "step": 550 + }, + { + "epoch": 3.3433734939759034, + "grad_norm": 1.8784477710723877, + "learning_rate": 2.786144578313253e-06, + "loss": 0.6295, + "step": 555 + }, + { + "epoch": 3.3734939759036147, + "grad_norm": 2.3932571411132812, + "learning_rate": 2.811244979919679e-06, + "loss": 0.6064, + "step": 560 + }, + { + "epoch": 3.4036144578313254, + "grad_norm": 2.1809239387512207, + "learning_rate": 2.8363453815261045e-06, + "loss": 0.6289, + "step": 565 + }, + { + "epoch": 3.433734939759036, + "grad_norm": 2.666003704071045, + "learning_rate": 2.86144578313253e-06, + "loss": 0.6191, + "step": 570 + }, + { + "epoch": 3.463855421686747, + "grad_norm": 3.224411725997925, + "learning_rate": 2.8865461847389563e-06, + "loss": 0.5992, + "step": 575 + }, + { + "epoch": 3.4939759036144578, + "grad_norm": 1.847118616104126, + "learning_rate": 2.911646586345382e-06, + "loss": 0.6176, + "step": 580 + }, + { + "epoch": 3.5240963855421685, + "grad_norm": 2.127950429916382, + "learning_rate": 2.9367469879518076e-06, + "loss": 0.6184, + "step": 585 + }, + { + "epoch": 3.5542168674698793, + "grad_norm": 1.9930005073547363, + "learning_rate": 2.9618473895582333e-06, + "loss": 0.6008, + "step": 590 + }, + { + "epoch": 3.5843373493975905, + "grad_norm": 1.9236960411071777, + "learning_rate": 2.986947791164659e-06, + "loss": 0.6371, + "step": 595 + }, + { + "epoch": 3.6144578313253013, + "grad_norm": 2.96357798576355, + "learning_rate": 3.0120481927710846e-06, + "loss": 0.628, + "step": 600 + }, + { + "epoch": 3.644578313253012, + "grad_norm": 2.2546164989471436, + "learning_rate": 3.0371485943775103e-06, + "loss": 0.6089, + "step": 605 + }, + { + "epoch": 3.674698795180723, + "grad_norm": 2.080061197280884, + "learning_rate": 3.062248995983936e-06, + "loss": 0.6015, + "step": 610 + }, + { + "epoch": 3.7048192771084336, + "grad_norm": 1.9112757444381714, + "learning_rate": 3.0873493975903616e-06, + "loss": 0.6229, + "step": 615 + }, + { + "epoch": 3.734939759036145, + "grad_norm": 2.597161054611206, + "learning_rate": 3.1124497991967873e-06, + "loss": 0.6498, + "step": 620 + }, + { + "epoch": 3.765060240963855, + "grad_norm": 2.107349157333374, + "learning_rate": 3.137550200803213e-06, + "loss": 0.6175, + "step": 625 + }, + { + "epoch": 3.7951807228915664, + "grad_norm": 2.178757429122925, + "learning_rate": 3.1626506024096387e-06, + "loss": 0.6176, + "step": 630 + }, + { + "epoch": 3.825301204819277, + "grad_norm": 1.8402270078659058, + "learning_rate": 3.1877510040160643e-06, + "loss": 0.605, + "step": 635 + }, + { + "epoch": 3.855421686746988, + "grad_norm": 2.377106189727783, + "learning_rate": 3.21285140562249e-06, + "loss": 0.6259, + "step": 640 + }, + { + "epoch": 3.8855421686746987, + "grad_norm": 1.693340539932251, + "learning_rate": 3.2379518072289157e-06, + "loss": 0.6122, + "step": 645 + }, + { + "epoch": 3.9156626506024095, + "grad_norm": 1.722320556640625, + "learning_rate": 3.2630522088353418e-06, + "loss": 0.599, + "step": 650 + }, + { + "epoch": 3.9457831325301207, + "grad_norm": 2.285301685333252, + "learning_rate": 3.288152610441767e-06, + "loss": 0.5832, + "step": 655 + }, + { + "epoch": 3.9759036144578315, + "grad_norm": 2.8827247619628906, + "learning_rate": 3.313253012048193e-06, + "loss": 0.615, + "step": 660 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6940621313409359, + "eval_auc": 0.7312645696819207, + "eval_f1": 0.6245173745173745, + "eval_loss": 0.5903819799423218, + "eval_precision": 0.5414225941422595, + "eval_recall": 0.7377423033067275, + "eval_runtime": 17.3385, + "eval_samples_per_second": 146.668, + "eval_steps_per_second": 0.75, + "step": 664 + }, + { + "epoch": 4.006024096385542, + "grad_norm": 1.8333141803741455, + "learning_rate": 3.3383534136546184e-06, + "loss": 0.5948, + "step": 665 + }, + { + "epoch": 4.036144578313253, + "grad_norm": 1.953653335571289, + "learning_rate": 3.3634538152610445e-06, + "loss": 0.5846, + "step": 670 + }, + { + "epoch": 4.066265060240964, + "grad_norm": 2.1964218616485596, + "learning_rate": 3.3885542168674697e-06, + "loss": 0.6014, + "step": 675 + }, + { + "epoch": 4.096385542168675, + "grad_norm": 1.7889600992202759, + "learning_rate": 3.413654618473896e-06, + "loss": 0.5968, + "step": 680 + }, + { + "epoch": 4.126506024096385, + "grad_norm": 1.9411687850952148, + "learning_rate": 3.438755020080321e-06, + "loss": 0.6266, + "step": 685 + }, + { + "epoch": 4.156626506024097, + "grad_norm": 3.8834457397460938, + "learning_rate": 3.463855421686747e-06, + "loss": 0.589, + "step": 690 + }, + { + "epoch": 4.186746987951807, + "grad_norm": 1.8403301239013672, + "learning_rate": 3.4889558232931724e-06, + "loss": 0.6129, + "step": 695 + }, + { + "epoch": 4.216867469879518, + "grad_norm": 2.3717918395996094, + "learning_rate": 3.5140562248995985e-06, + "loss": 0.5861, + "step": 700 + }, + { + "epoch": 4.246987951807229, + "grad_norm": 1.6860854625701904, + "learning_rate": 3.5391566265060246e-06, + "loss": 0.6003, + "step": 705 + }, + { + "epoch": 4.27710843373494, + "grad_norm": 2.402580738067627, + "learning_rate": 3.56425702811245e-06, + "loss": 0.5967, + "step": 710 + }, + { + "epoch": 4.307228915662651, + "grad_norm": 2.134225845336914, + "learning_rate": 3.589357429718876e-06, + "loss": 0.6067, + "step": 715 + }, + { + "epoch": 4.337349397590361, + "grad_norm": 1.800850749015808, + "learning_rate": 3.614457831325301e-06, + "loss": 0.5971, + "step": 720 + }, + { + "epoch": 4.367469879518072, + "grad_norm": 2.6244163513183594, + "learning_rate": 3.6395582329317273e-06, + "loss": 0.6142, + "step": 725 + }, + { + "epoch": 4.397590361445783, + "grad_norm": 1.7857623100280762, + "learning_rate": 3.6646586345381526e-06, + "loss": 0.6248, + "step": 730 + }, + { + "epoch": 4.427710843373494, + "grad_norm": 2.7235026359558105, + "learning_rate": 3.6897590361445786e-06, + "loss": 0.5892, + "step": 735 + }, + { + "epoch": 4.457831325301205, + "grad_norm": 2.886775016784668, + "learning_rate": 3.714859437751004e-06, + "loss": 0.5761, + "step": 740 + }, + { + "epoch": 4.4879518072289155, + "grad_norm": 2.2062480449676514, + "learning_rate": 3.73995983935743e-06, + "loss": 0.5991, + "step": 745 + }, + { + "epoch": 4.518072289156627, + "grad_norm": 2.093432664871216, + "learning_rate": 3.765060240963856e-06, + "loss": 0.5968, + "step": 750 + }, + { + "epoch": 4.548192771084337, + "grad_norm": 2.6118621826171875, + "learning_rate": 3.7901606425702813e-06, + "loss": 0.6056, + "step": 755 + }, + { + "epoch": 4.578313253012048, + "grad_norm": 1.7923548221588135, + "learning_rate": 3.8152610441767074e-06, + "loss": 0.5834, + "step": 760 + }, + { + "epoch": 4.608433734939759, + "grad_norm": 2.803171157836914, + "learning_rate": 3.840361445783132e-06, + "loss": 0.6035, + "step": 765 + }, + { + "epoch": 4.63855421686747, + "grad_norm": 2.002159357070923, + "learning_rate": 3.865461847389559e-06, + "loss": 0.5678, + "step": 770 + }, + { + "epoch": 4.668674698795181, + "grad_norm": 1.9078603982925415, + "learning_rate": 3.890562248995984e-06, + "loss": 0.6102, + "step": 775 + }, + { + "epoch": 4.698795180722891, + "grad_norm": 2.1809115409851074, + "learning_rate": 3.91566265060241e-06, + "loss": 0.5945, + "step": 780 + }, + { + "epoch": 4.728915662650603, + "grad_norm": 2.0736207962036133, + "learning_rate": 3.940763052208835e-06, + "loss": 0.6274, + "step": 785 + }, + { + "epoch": 4.759036144578313, + "grad_norm": 2.3214478492736816, + "learning_rate": 3.9658634538152615e-06, + "loss": 0.5932, + "step": 790 + }, + { + "epoch": 4.789156626506024, + "grad_norm": 1.8077796697616577, + "learning_rate": 3.990963855421686e-06, + "loss": 0.586, + "step": 795 + }, + { + "epoch": 4.8192771084337345, + "grad_norm": 1.7880562543869019, + "learning_rate": 4.016064257028113e-06, + "loss": 0.5747, + "step": 800 + }, + { + "epoch": 4.849397590361446, + "grad_norm": 1.7393368482589722, + "learning_rate": 4.0411646586345385e-06, + "loss": 0.5664, + "step": 805 + }, + { + "epoch": 4.879518072289157, + "grad_norm": 1.9396228790283203, + "learning_rate": 4.066265060240964e-06, + "loss": 0.5854, + "step": 810 + }, + { + "epoch": 4.909638554216867, + "grad_norm": 2.0996456146240234, + "learning_rate": 4.09136546184739e-06, + "loss": 0.6016, + "step": 815 + }, + { + "epoch": 4.9397590361445785, + "grad_norm": 2.321573495864868, + "learning_rate": 4.1164658634538155e-06, + "loss": 0.5594, + "step": 820 + }, + { + "epoch": 4.969879518072289, + "grad_norm": 1.8303910493850708, + "learning_rate": 4.141566265060241e-06, + "loss": 0.5704, + "step": 825 + }, + { + "epoch": 5.0, + "grad_norm": 2.0891566276550293, + "learning_rate": 4.166666666666667e-06, + "loss": 0.5958, + "step": 830 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.7286669288242233, + "eval_auc": 0.7606243181423082, + "eval_f1": 0.654308617234469, + "eval_loss": 0.5537045001983643, + "eval_precision": 0.5835567470956211, + "eval_recall": 0.7445838084378563, + "eval_runtime": 19.5383, + "eval_samples_per_second": 130.155, + "eval_steps_per_second": 0.665, + "step": 830 + }, + { + "epoch": 5.030120481927711, + "grad_norm": 1.9920940399169922, + "learning_rate": 4.1917670682730925e-06, + "loss": 0.568, + "step": 835 + }, + { + "epoch": 5.0602409638554215, + "grad_norm": 1.9087443351745605, + "learning_rate": 4.216867469879518e-06, + "loss": 0.5665, + "step": 840 + }, + { + "epoch": 5.090361445783133, + "grad_norm": 2.4391868114471436, + "learning_rate": 4.241967871485944e-06, + "loss": 0.5713, + "step": 845 + }, + { + "epoch": 5.120481927710843, + "grad_norm": 2.1306042671203613, + "learning_rate": 4.26706827309237e-06, + "loss": 0.5881, + "step": 850 + }, + { + "epoch": 5.150602409638554, + "grad_norm": 2.5978026390075684, + "learning_rate": 4.292168674698795e-06, + "loss": 0.6233, + "step": 855 + }, + { + "epoch": 5.180722891566265, + "grad_norm": 2.448408603668213, + "learning_rate": 4.317269076305222e-06, + "loss": 0.5802, + "step": 860 + }, + { + "epoch": 5.210843373493976, + "grad_norm": 2.4194798469543457, + "learning_rate": 4.342369477911647e-06, + "loss": 0.5734, + "step": 865 + }, + { + "epoch": 5.240963855421687, + "grad_norm": 1.9208903312683105, + "learning_rate": 4.367469879518073e-06, + "loss": 0.5666, + "step": 870 + }, + { + "epoch": 5.271084337349397, + "grad_norm": 2.0400986671447754, + "learning_rate": 4.392570281124498e-06, + "loss": 0.5996, + "step": 875 + }, + { + "epoch": 5.301204819277109, + "grad_norm": 3.45426607131958, + "learning_rate": 4.417670682730924e-06, + "loss": 0.5885, + "step": 880 + }, + { + "epoch": 5.331325301204819, + "grad_norm": 2.500495433807373, + "learning_rate": 4.442771084337349e-06, + "loss": 0.554, + "step": 885 + }, + { + "epoch": 5.36144578313253, + "grad_norm": 1.7928919792175293, + "learning_rate": 4.467871485943775e-06, + "loss": 0.5855, + "step": 890 + }, + { + "epoch": 5.391566265060241, + "grad_norm": 9.793763160705566, + "learning_rate": 4.492971887550201e-06, + "loss": 0.5825, + "step": 895 + }, + { + "epoch": 5.421686746987952, + "grad_norm": 2.1107916831970215, + "learning_rate": 4.518072289156626e-06, + "loss": 0.559, + "step": 900 + }, + { + "epoch": 5.451807228915663, + "grad_norm": 2.334136962890625, + "learning_rate": 4.543172690763053e-06, + "loss": 0.532, + "step": 905 + }, + { + "epoch": 5.481927710843373, + "grad_norm": 2.731076955795288, + "learning_rate": 4.568273092369478e-06, + "loss": 0.5743, + "step": 910 + }, + { + "epoch": 5.5120481927710845, + "grad_norm": 2.3347549438476562, + "learning_rate": 4.593373493975904e-06, + "loss": 0.5561, + "step": 915 + }, + { + "epoch": 5.542168674698795, + "grad_norm": 2.748056650161743, + "learning_rate": 4.618473895582329e-06, + "loss": 0.5781, + "step": 920 + }, + { + "epoch": 5.572289156626506, + "grad_norm": 2.7957303524017334, + "learning_rate": 4.6435742971887555e-06, + "loss": 0.5558, + "step": 925 + }, + { + "epoch": 5.602409638554217, + "grad_norm": 1.5970895290374756, + "learning_rate": 4.66867469879518e-06, + "loss": 0.5483, + "step": 930 + }, + { + "epoch": 5.632530120481928, + "grad_norm": 2.1057193279266357, + "learning_rate": 4.693775100401607e-06, + "loss": 0.5366, + "step": 935 + }, + { + "epoch": 5.662650602409639, + "grad_norm": 1.8413000106811523, + "learning_rate": 4.718875502008032e-06, + "loss": 0.6011, + "step": 940 + }, + { + "epoch": 5.692771084337349, + "grad_norm": 2.379899263381958, + "learning_rate": 4.743975903614458e-06, + "loss": 0.5777, + "step": 945 + }, + { + "epoch": 5.72289156626506, + "grad_norm": 2.024463415145874, + "learning_rate": 4.769076305220884e-06, + "loss": 0.5611, + "step": 950 + }, + { + "epoch": 5.753012048192771, + "grad_norm": 2.208953619003296, + "learning_rate": 4.7941767068273095e-06, + "loss": 0.5552, + "step": 955 + }, + { + "epoch": 5.783132530120482, + "grad_norm": 2.498267650604248, + "learning_rate": 4.819277108433735e-06, + "loss": 0.5749, + "step": 960 + }, + { + "epoch": 5.813253012048193, + "grad_norm": 1.8806592226028442, + "learning_rate": 4.844377510040161e-06, + "loss": 0.5642, + "step": 965 + }, + { + "epoch": 5.843373493975903, + "grad_norm": 2.3786778450012207, + "learning_rate": 4.8694779116465866e-06, + "loss": 0.5855, + "step": 970 + }, + { + "epoch": 5.873493975903615, + "grad_norm": 1.818545937538147, + "learning_rate": 4.894578313253012e-06, + "loss": 0.5445, + "step": 975 + }, + { + "epoch": 5.903614457831325, + "grad_norm": 2.958966016769409, + "learning_rate": 4.919678714859438e-06, + "loss": 0.5695, + "step": 980 + }, + { + "epoch": 5.933734939759036, + "grad_norm": 2.4790141582489014, + "learning_rate": 4.944779116465864e-06, + "loss": 0.5498, + "step": 985 + }, + { + "epoch": 5.9638554216867465, + "grad_norm": 2.4448063373565674, + "learning_rate": 4.969879518072289e-06, + "loss": 0.5355, + "step": 990 + }, + { + "epoch": 5.993975903614458, + "grad_norm": 1.8333290815353394, + "learning_rate": 4.994979919678715e-06, + "loss": 0.5499, + "step": 995 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.739284309870232, + "eval_auc": 0.7864267713927076, + "eval_f1": 0.6475279106858054, + "eval_loss": 0.5208005309104919, + "eval_precision": 0.6065737051792829, + "eval_recall": 0.6944127708095781, + "eval_runtime": 19.8085, + "eval_samples_per_second": 128.379, + "eval_steps_per_second": 0.656, + "step": 996 + }, + { + "epoch": 6.024096385542169, + "grad_norm": 1.8895107507705688, + "learning_rate": 5.020080321285141e-06, + "loss": 0.5548, + "step": 1000 + }, + { + "epoch": 6.054216867469879, + "grad_norm": 2.6331355571746826, + "learning_rate": 5.045180722891567e-06, + "loss": 0.5754, + "step": 1005 + }, + { + "epoch": 6.0843373493975905, + "grad_norm": 2.3016576766967773, + "learning_rate": 5.070281124497992e-06, + "loss": 0.5569, + "step": 1010 + }, + { + "epoch": 6.114457831325301, + "grad_norm": 4.576088905334473, + "learning_rate": 5.0953815261044185e-06, + "loss": 0.5756, + "step": 1015 + }, + { + "epoch": 6.144578313253012, + "grad_norm": 1.9560896158218384, + "learning_rate": 5.120481927710843e-06, + "loss": 0.5583, + "step": 1020 + }, + { + "epoch": 6.174698795180723, + "grad_norm": 2.603187322616577, + "learning_rate": 5.14558232931727e-06, + "loss": 0.5079, + "step": 1025 + }, + { + "epoch": 6.204819277108434, + "grad_norm": 1.9385886192321777, + "learning_rate": 5.170682730923695e-06, + "loss": 0.5322, + "step": 1030 + }, + { + "epoch": 6.234939759036145, + "grad_norm": 2.257345199584961, + "learning_rate": 5.195783132530121e-06, + "loss": 0.594, + "step": 1035 + }, + { + "epoch": 6.265060240963855, + "grad_norm": 1.9930522441864014, + "learning_rate": 5.220883534136546e-06, + "loss": 0.529, + "step": 1040 + }, + { + "epoch": 6.295180722891566, + "grad_norm": 2.365818977355957, + "learning_rate": 5.2459839357429725e-06, + "loss": 0.5586, + "step": 1045 + }, + { + "epoch": 6.325301204819277, + "grad_norm": 2.0368270874023438, + "learning_rate": 5.271084337349398e-06, + "loss": 0.5333, + "step": 1050 + }, + { + "epoch": 6.355421686746988, + "grad_norm": 1.854719877243042, + "learning_rate": 5.296184738955824e-06, + "loss": 0.5721, + "step": 1055 + }, + { + "epoch": 6.385542168674699, + "grad_norm": 2.101360321044922, + "learning_rate": 5.3212851405622495e-06, + "loss": 0.5424, + "step": 1060 + }, + { + "epoch": 6.4156626506024095, + "grad_norm": 2.0200586318969727, + "learning_rate": 5.346385542168675e-06, + "loss": 0.5581, + "step": 1065 + }, + { + "epoch": 6.445783132530121, + "grad_norm": 1.954229474067688, + "learning_rate": 5.371485943775101e-06, + "loss": 0.546, + "step": 1070 + }, + { + "epoch": 6.475903614457831, + "grad_norm": 2.5045673847198486, + "learning_rate": 5.3965863453815266e-06, + "loss": 0.5167, + "step": 1075 + }, + { + "epoch": 6.506024096385542, + "grad_norm": 2.311288595199585, + "learning_rate": 5.421686746987952e-06, + "loss": 0.5613, + "step": 1080 + }, + { + "epoch": 6.5361445783132535, + "grad_norm": 1.9730944633483887, + "learning_rate": 5.446787148594378e-06, + "loss": 0.5447, + "step": 1085 + }, + { + "epoch": 6.566265060240964, + "grad_norm": 1.8411797285079956, + "learning_rate": 5.471887550200804e-06, + "loss": 0.5209, + "step": 1090 + }, + { + "epoch": 6.596385542168675, + "grad_norm": 3.7007882595062256, + "learning_rate": 5.496987951807229e-06, + "loss": 0.5771, + "step": 1095 + }, + { + "epoch": 6.626506024096385, + "grad_norm": 2.768594264984131, + "learning_rate": 5.522088353413655e-06, + "loss": 0.5304, + "step": 1100 + }, + { + "epoch": 6.656626506024097, + "grad_norm": 2.585475444793701, + "learning_rate": 5.547188755020081e-06, + "loss": 0.5882, + "step": 1105 + }, + { + "epoch": 6.686746987951807, + "grad_norm": 1.8738479614257812, + "learning_rate": 5.572289156626506e-06, + "loss": 0.5168, + "step": 1110 + }, + { + "epoch": 6.716867469879518, + "grad_norm": 2.3905439376831055, + "learning_rate": 5.597389558232932e-06, + "loss": 0.5593, + "step": 1115 + }, + { + "epoch": 6.746987951807229, + "grad_norm": 2.611729860305786, + "learning_rate": 5.622489959839358e-06, + "loss": 0.5324, + "step": 1120 + }, + { + "epoch": 6.77710843373494, + "grad_norm": 2.1663403511047363, + "learning_rate": 5.647590361445783e-06, + "loss": 0.5665, + "step": 1125 + }, + { + "epoch": 6.807228915662651, + "grad_norm": 4.150970935821533, + "learning_rate": 5.672690763052209e-06, + "loss": 0.5385, + "step": 1130 + }, + { + "epoch": 6.837349397590361, + "grad_norm": 2.477626323699951, + "learning_rate": 5.697791164658635e-06, + "loss": 0.5355, + "step": 1135 + }, + { + "epoch": 6.867469879518072, + "grad_norm": 1.951925277709961, + "learning_rate": 5.72289156626506e-06, + "loss": 0.5418, + "step": 1140 + }, + { + "epoch": 6.897590361445783, + "grad_norm": 2.934628963470459, + "learning_rate": 5.747991967871486e-06, + "loss": 0.5653, + "step": 1145 + }, + { + "epoch": 6.927710843373494, + "grad_norm": 2.06850004196167, + "learning_rate": 5.7730923694779125e-06, + "loss": 0.5277, + "step": 1150 + }, + { + "epoch": 6.957831325301205, + "grad_norm": 1.8905904293060303, + "learning_rate": 5.798192771084337e-06, + "loss": 0.5489, + "step": 1155 + }, + { + "epoch": 6.9879518072289155, + "grad_norm": 2.704571008682251, + "learning_rate": 5.823293172690764e-06, + "loss": 0.5467, + "step": 1160 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.7561934722768384, + "eval_auc": 0.7969008584049355, + "eval_f1": 0.6544035674470458, + "eval_loss": 0.4929519295692444, + "eval_precision": 0.6401308615049073, + "eval_recall": 0.669327251995439, + "eval_runtime": 19.5191, + "eval_samples_per_second": 130.283, + "eval_steps_per_second": 0.666, + "step": 1162 + }, + { + "epoch": 7.018072289156627, + "grad_norm": 2.4509992599487305, + "learning_rate": 5.848393574297189e-06, + "loss": 0.5328, + "step": 1165 + }, + { + "epoch": 7.048192771084337, + "grad_norm": 2.994516372680664, + "learning_rate": 5.873493975903615e-06, + "loss": 0.5498, + "step": 1170 + }, + { + "epoch": 7.078313253012048, + "grad_norm": 1.663866400718689, + "learning_rate": 5.89859437751004e-06, + "loss": 0.5231, + "step": 1175 + }, + { + "epoch": 7.108433734939759, + "grad_norm": 2.1491575241088867, + "learning_rate": 5.9236947791164665e-06, + "loss": 0.5392, + "step": 1180 + }, + { + "epoch": 7.13855421686747, + "grad_norm": 1.8787453174591064, + "learning_rate": 5.948795180722891e-06, + "loss": 0.5417, + "step": 1185 + }, + { + "epoch": 7.168674698795181, + "grad_norm": 2.0878076553344727, + "learning_rate": 5.973895582329318e-06, + "loss": 0.5329, + "step": 1190 + }, + { + "epoch": 7.198795180722891, + "grad_norm": 1.762899398803711, + "learning_rate": 5.998995983935743e-06, + "loss": 0.5118, + "step": 1195 + }, + { + "epoch": 7.228915662650603, + "grad_norm": 2.437101125717163, + "learning_rate": 6.024096385542169e-06, + "loss": 0.5524, + "step": 1200 + }, + { + "epoch": 7.259036144578313, + "grad_norm": 2.0478458404541016, + "learning_rate": 6.049196787148595e-06, + "loss": 0.5273, + "step": 1205 + }, + { + "epoch": 7.289156626506024, + "grad_norm": 2.2171614170074463, + "learning_rate": 6.074297188755021e-06, + "loss": 0.5624, + "step": 1210 + }, + { + "epoch": 7.3192771084337345, + "grad_norm": 2.5147898197174072, + "learning_rate": 6.099397590361446e-06, + "loss": 0.5172, + "step": 1215 + }, + { + "epoch": 7.349397590361446, + "grad_norm": 1.7457013130187988, + "learning_rate": 6.124497991967872e-06, + "loss": 0.5085, + "step": 1220 + }, + { + "epoch": 7.379518072289157, + "grad_norm": 2.980337142944336, + "learning_rate": 6.149598393574298e-06, + "loss": 0.5385, + "step": 1225 + }, + { + "epoch": 7.409638554216867, + "grad_norm": 2.7685298919677734, + "learning_rate": 6.174698795180723e-06, + "loss": 0.4912, + "step": 1230 + }, + { + "epoch": 7.4397590361445785, + "grad_norm": 2.4463353157043457, + "learning_rate": 6.199799196787149e-06, + "loss": 0.516, + "step": 1235 + }, + { + "epoch": 7.469879518072289, + "grad_norm": 2.2994775772094727, + "learning_rate": 6.224899598393575e-06, + "loss": 0.5013, + "step": 1240 + }, + { + "epoch": 7.5, + "grad_norm": 2.0714969635009766, + "learning_rate": 6.25e-06, + "loss": 0.5595, + "step": 1245 + }, + { + "epoch": 7.530120481927711, + "grad_norm": 1.9828314781188965, + "learning_rate": 6.275100401606426e-06, + "loss": 0.5589, + "step": 1250 + }, + { + "epoch": 7.5602409638554215, + "grad_norm": 3.4310250282287598, + "learning_rate": 6.3002008032128525e-06, + "loss": 0.5351, + "step": 1255 + }, + { + "epoch": 7.590361445783133, + "grad_norm": 3.5097134113311768, + "learning_rate": 6.325301204819277e-06, + "loss": 0.5621, + "step": 1260 + }, + { + "epoch": 7.620481927710843, + "grad_norm": 3.3920838832855225, + "learning_rate": 6.350401606425703e-06, + "loss": 0.5364, + "step": 1265 + }, + { + "epoch": 7.650602409638554, + "grad_norm": 2.096998453140259, + "learning_rate": 6.375502008032129e-06, + "loss": 0.5467, + "step": 1270 + }, + { + "epoch": 7.6807228915662655, + "grad_norm": 2.3254811763763428, + "learning_rate": 6.400602409638555e-06, + "loss": 0.5144, + "step": 1275 + }, + { + "epoch": 7.710843373493976, + "grad_norm": 2.36531400680542, + "learning_rate": 6.42570281124498e-06, + "loss": 0.5264, + "step": 1280 + }, + { + "epoch": 7.740963855421687, + "grad_norm": 2.1447479724884033, + "learning_rate": 6.450803212851406e-06, + "loss": 0.5059, + "step": 1285 + }, + { + "epoch": 7.771084337349397, + "grad_norm": 2.3711814880371094, + "learning_rate": 6.475903614457831e-06, + "loss": 0.5012, + "step": 1290 + }, + { + "epoch": 7.801204819277109, + "grad_norm": 1.8993377685546875, + "learning_rate": 6.501004016064258e-06, + "loss": 0.5117, + "step": 1295 + }, + { + "epoch": 7.831325301204819, + "grad_norm": 2.4439542293548584, + "learning_rate": 6.5261044176706836e-06, + "loss": 0.5215, + "step": 1300 + }, + { + "epoch": 7.86144578313253, + "grad_norm": 2.124606132507324, + "learning_rate": 6.551204819277108e-06, + "loss": 0.5428, + "step": 1305 + }, + { + "epoch": 7.891566265060241, + "grad_norm": 2.4091856479644775, + "learning_rate": 6.576305220883534e-06, + "loss": 0.5675, + "step": 1310 + }, + { + "epoch": 7.921686746987952, + "grad_norm": 2.2007946968078613, + "learning_rate": 6.6014056224899606e-06, + "loss": 0.5338, + "step": 1315 + }, + { + "epoch": 7.951807228915663, + "grad_norm": 2.244727373123169, + "learning_rate": 6.626506024096386e-06, + "loss": 0.5405, + "step": 1320 + }, + { + "epoch": 7.981927710843373, + "grad_norm": 2.5336217880249023, + "learning_rate": 6.651606425702811e-06, + "loss": 0.5484, + "step": 1325 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7758552890287063, + "eval_auc": 0.8200775178942729, + "eval_f1": 0.6925566343042071, + "eval_loss": 0.4728256165981293, + "eval_precision": 0.6571136131013307, + "eval_recall": 0.7320410490307868, + "eval_runtime": 19.4979, + "eval_samples_per_second": 130.424, + "eval_steps_per_second": 0.667, + "step": 1328 + }, + { + "epoch": 8.012048192771084, + "grad_norm": 2.269305944442749, + "learning_rate": 6.676706827309237e-06, + "loss": 0.4996, + "step": 1330 + }, + { + "epoch": 8.042168674698795, + "grad_norm": 2.1622703075408936, + "learning_rate": 6.701807228915663e-06, + "loss": 0.5459, + "step": 1335 + }, + { + "epoch": 8.072289156626505, + "grad_norm": 2.5812857151031494, + "learning_rate": 6.726907630522089e-06, + "loss": 0.5579, + "step": 1340 + }, + { + "epoch": 8.102409638554217, + "grad_norm": 3.5632548332214355, + "learning_rate": 6.7520080321285155e-06, + "loss": 0.4909, + "step": 1345 + }, + { + "epoch": 8.132530120481928, + "grad_norm": 1.9275950193405151, + "learning_rate": 6.7771084337349394e-06, + "loss": 0.4672, + "step": 1350 + }, + { + "epoch": 8.162650602409638, + "grad_norm": 2.0507545471191406, + "learning_rate": 6.802208835341366e-06, + "loss": 0.4888, + "step": 1355 + }, + { + "epoch": 8.19277108433735, + "grad_norm": 2.0971333980560303, + "learning_rate": 6.827309236947792e-06, + "loss": 0.5421, + "step": 1360 + }, + { + "epoch": 8.22289156626506, + "grad_norm": 2.3234474658966064, + "learning_rate": 6.852409638554218e-06, + "loss": 0.5164, + "step": 1365 + }, + { + "epoch": 8.25301204819277, + "grad_norm": 2.625166654586792, + "learning_rate": 6.877510040160642e-06, + "loss": 0.525, + "step": 1370 + }, + { + "epoch": 8.283132530120483, + "grad_norm": 3.066610097885132, + "learning_rate": 6.902610441767069e-06, + "loss": 0.4918, + "step": 1375 + }, + { + "epoch": 8.313253012048193, + "grad_norm": 2.1544108390808105, + "learning_rate": 6.927710843373494e-06, + "loss": 0.5355, + "step": 1380 + }, + { + "epoch": 8.343373493975903, + "grad_norm": 2.05570387840271, + "learning_rate": 6.95281124497992e-06, + "loss": 0.5114, + "step": 1385 + }, + { + "epoch": 8.373493975903614, + "grad_norm": 3.5560357570648193, + "learning_rate": 6.977911646586345e-06, + "loss": 0.5201, + "step": 1390 + }, + { + "epoch": 8.403614457831326, + "grad_norm": 2.2558846473693848, + "learning_rate": 7.003012048192771e-06, + "loss": 0.5337, + "step": 1395 + }, + { + "epoch": 8.433734939759036, + "grad_norm": 2.5230071544647217, + "learning_rate": 7.028112449799197e-06, + "loss": 0.5308, + "step": 1400 + }, + { + "epoch": 8.463855421686747, + "grad_norm": 2.2817351818084717, + "learning_rate": 7.053212851405623e-06, + "loss": 0.53, + "step": 1405 + }, + { + "epoch": 8.493975903614459, + "grad_norm": 1.9256802797317505, + "learning_rate": 7.078313253012049e-06, + "loss": 0.5086, + "step": 1410 + }, + { + "epoch": 8.524096385542169, + "grad_norm": 2.562375068664551, + "learning_rate": 7.103413654618474e-06, + "loss": 0.5701, + "step": 1415 + }, + { + "epoch": 8.55421686746988, + "grad_norm": 3.425631523132324, + "learning_rate": 7.1285140562249e-06, + "loss": 0.5305, + "step": 1420 + }, + { + "epoch": 8.58433734939759, + "grad_norm": 2.4099252223968506, + "learning_rate": 7.153614457831325e-06, + "loss": 0.4723, + "step": 1425 + }, + { + "epoch": 8.614457831325302, + "grad_norm": 1.8401768207550049, + "learning_rate": 7.178714859437752e-06, + "loss": 0.4943, + "step": 1430 + }, + { + "epoch": 8.644578313253012, + "grad_norm": 2.6531455516815186, + "learning_rate": 7.203815261044177e-06, + "loss": 0.4671, + "step": 1435 + }, + { + "epoch": 8.674698795180722, + "grad_norm": 1.9509261846542358, + "learning_rate": 7.228915662650602e-06, + "loss": 0.5076, + "step": 1440 + }, + { + "epoch": 8.704819277108435, + "grad_norm": 2.1042916774749756, + "learning_rate": 7.254016064257028e-06, + "loss": 0.5346, + "step": 1445 + }, + { + "epoch": 8.734939759036145, + "grad_norm": 3.418381929397583, + "learning_rate": 7.279116465863455e-06, + "loss": 0.4955, + "step": 1450 + }, + { + "epoch": 8.765060240963855, + "grad_norm": 1.7887992858886719, + "learning_rate": 7.30421686746988e-06, + "loss": 0.5363, + "step": 1455 + }, + { + "epoch": 8.795180722891565, + "grad_norm": 1.8168081045150757, + "learning_rate": 7.329317269076305e-06, + "loss": 0.5093, + "step": 1460 + }, + { + "epoch": 8.825301204819278, + "grad_norm": 2.2657430171966553, + "learning_rate": 7.354417670682731e-06, + "loss": 0.5352, + "step": 1465 + }, + { + "epoch": 8.855421686746988, + "grad_norm": 3.2351300716400146, + "learning_rate": 7.379518072289157e-06, + "loss": 0.4678, + "step": 1470 + }, + { + "epoch": 8.885542168674698, + "grad_norm": 2.0001509189605713, + "learning_rate": 7.404618473895583e-06, + "loss": 0.5276, + "step": 1475 + }, + { + "epoch": 8.91566265060241, + "grad_norm": 3.1765055656433105, + "learning_rate": 7.429718875502008e-06, + "loss": 0.5595, + "step": 1480 + }, + { + "epoch": 8.94578313253012, + "grad_norm": 2.8290090560913086, + "learning_rate": 7.4548192771084335e-06, + "loss": 0.5012, + "step": 1485 + }, + { + "epoch": 8.975903614457831, + "grad_norm": 1.8865560293197632, + "learning_rate": 7.47991967871486e-06, + "loss": 0.5722, + "step": 1490 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7896185607550138, + "eval_auc": 0.8347310417895779, + "eval_f1": 0.7055586130985141, + "eval_loss": 0.4508674442768097, + "eval_precision": 0.6819148936170213, + "eval_recall": 0.7309007981755986, + "eval_runtime": 19.0422, + "eval_samples_per_second": 133.546, + "eval_steps_per_second": 0.683, + "step": 1494 + }, + { + "epoch": 9.006024096385541, + "grad_norm": 3.6126842498779297, + "learning_rate": 7.505020080321286e-06, + "loss": 0.557, + "step": 1495 + }, + { + "epoch": 9.036144578313253, + "grad_norm": 2.2869274616241455, + "learning_rate": 7.530120481927712e-06, + "loss": 0.5249, + "step": 1500 + }, + { + "epoch": 9.066265060240964, + "grad_norm": 1.995977520942688, + "learning_rate": 7.555220883534136e-06, + "loss": 0.4981, + "step": 1505 + }, + { + "epoch": 9.096385542168674, + "grad_norm": 3.088841676712036, + "learning_rate": 7.580321285140563e-06, + "loss": 0.5257, + "step": 1510 + }, + { + "epoch": 9.126506024096386, + "grad_norm": 2.138551950454712, + "learning_rate": 7.605421686746988e-06, + "loss": 0.5238, + "step": 1515 + }, + { + "epoch": 9.156626506024097, + "grad_norm": 2.013193130493164, + "learning_rate": 7.630522088353415e-06, + "loss": 0.5156, + "step": 1520 + }, + { + "epoch": 9.186746987951807, + "grad_norm": 1.7738057374954224, + "learning_rate": 7.655622489959839e-06, + "loss": 0.4972, + "step": 1525 + }, + { + "epoch": 9.216867469879517, + "grad_norm": 2.0512638092041016, + "learning_rate": 7.680722891566265e-06, + "loss": 0.4875, + "step": 1530 + }, + { + "epoch": 9.24698795180723, + "grad_norm": 2.8747947216033936, + "learning_rate": 7.705823293172692e-06, + "loss": 0.5175, + "step": 1535 + }, + { + "epoch": 9.27710843373494, + "grad_norm": 2.159099578857422, + "learning_rate": 7.730923694779118e-06, + "loss": 0.5318, + "step": 1540 + }, + { + "epoch": 9.30722891566265, + "grad_norm": 2.510629653930664, + "learning_rate": 7.756024096385543e-06, + "loss": 0.4795, + "step": 1545 + }, + { + "epoch": 9.337349397590362, + "grad_norm": 2.352501392364502, + "learning_rate": 7.781124497991967e-06, + "loss": 0.4842, + "step": 1550 + }, + { + "epoch": 9.367469879518072, + "grad_norm": 2.652738571166992, + "learning_rate": 7.806224899598395e-06, + "loss": 0.5417, + "step": 1555 + }, + { + "epoch": 9.397590361445783, + "grad_norm": 2.8062024116516113, + "learning_rate": 7.83132530120482e-06, + "loss": 0.4939, + "step": 1560 + }, + { + "epoch": 9.427710843373493, + "grad_norm": 3.1697118282318115, + "learning_rate": 7.856425702811246e-06, + "loss": 0.4914, + "step": 1565 + }, + { + "epoch": 9.457831325301205, + "grad_norm": 2.109936237335205, + "learning_rate": 7.88152610441767e-06, + "loss": 0.5043, + "step": 1570 + }, + { + "epoch": 9.487951807228916, + "grad_norm": 1.8554120063781738, + "learning_rate": 7.906626506024097e-06, + "loss": 0.5032, + "step": 1575 + }, + { + "epoch": 9.518072289156626, + "grad_norm": 2.0720715522766113, + "learning_rate": 7.931726907630523e-06, + "loss": 0.5306, + "step": 1580 + }, + { + "epoch": 9.548192771084338, + "grad_norm": 1.990857720375061, + "learning_rate": 7.956827309236949e-06, + "loss": 0.5114, + "step": 1585 + }, + { + "epoch": 9.578313253012048, + "grad_norm": 2.719435691833496, + "learning_rate": 7.981927710843373e-06, + "loss": 0.5023, + "step": 1590 + }, + { + "epoch": 9.608433734939759, + "grad_norm": 1.8556512594223022, + "learning_rate": 8.0070281124498e-06, + "loss": 0.5046, + "step": 1595 + }, + { + "epoch": 9.638554216867469, + "grad_norm": 3.1273837089538574, + "learning_rate": 8.032128514056226e-06, + "loss": 0.5234, + "step": 1600 + }, + { + "epoch": 9.668674698795181, + "grad_norm": 2.0694265365600586, + "learning_rate": 8.057228915662651e-06, + "loss": 0.4927, + "step": 1605 + }, + { + "epoch": 9.698795180722891, + "grad_norm": 1.7405294179916382, + "learning_rate": 8.082329317269077e-06, + "loss": 0.469, + "step": 1610 + }, + { + "epoch": 9.728915662650602, + "grad_norm": 4.201533794403076, + "learning_rate": 8.107429718875503e-06, + "loss": 0.5196, + "step": 1615 + }, + { + "epoch": 9.759036144578314, + "grad_norm": 2.02896785736084, + "learning_rate": 8.132530120481928e-06, + "loss": 0.5044, + "step": 1620 + }, + { + "epoch": 9.789156626506024, + "grad_norm": 2.5728471279144287, + "learning_rate": 8.157630522088354e-06, + "loss": 0.5155, + "step": 1625 + }, + { + "epoch": 9.819277108433734, + "grad_norm": 2.0008513927459717, + "learning_rate": 8.18273092369478e-06, + "loss": 0.5, + "step": 1630 + }, + { + "epoch": 9.849397590361447, + "grad_norm": 2.3800718784332275, + "learning_rate": 8.207831325301205e-06, + "loss": 0.4815, + "step": 1635 + }, + { + "epoch": 9.879518072289157, + "grad_norm": 1.8371376991271973, + "learning_rate": 8.232931726907631e-06, + "loss": 0.4782, + "step": 1640 + }, + { + "epoch": 9.909638554216867, + "grad_norm": 3.298999309539795, + "learning_rate": 8.258032128514057e-06, + "loss": 0.5582, + "step": 1645 + }, + { + "epoch": 9.939759036144578, + "grad_norm": 2.420517683029175, + "learning_rate": 8.283132530120482e-06, + "loss": 0.4519, + "step": 1650 + }, + { + "epoch": 9.96987951807229, + "grad_norm": 2.1421029567718506, + "learning_rate": 8.308232931726908e-06, + "loss": 0.5498, + "step": 1655 + }, + { + "epoch": 10.0, + "grad_norm": 3.2856643199920654, + "learning_rate": 8.333333333333334e-06, + "loss": 0.4841, + "step": 1660 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.782147070389304, + "eval_auc": 0.8479315329324432, + "eval_f1": 0.7123572170301142, + "eval_loss": 0.44875216484069824, + "eval_precision": 0.6539561487130601, + "eval_recall": 0.7822120866590649, + "eval_runtime": 19.4068, + "eval_samples_per_second": 131.037, + "eval_steps_per_second": 0.67, + "step": 1660 + }, + { + "epoch": 10.03012048192771, + "grad_norm": 2.615487813949585, + "learning_rate": 8.35843373493976e-06, + "loss": 0.5128, + "step": 1665 + }, + { + "epoch": 10.060240963855422, + "grad_norm": 2.062527894973755, + "learning_rate": 8.383534136546185e-06, + "loss": 0.485, + "step": 1670 + }, + { + "epoch": 10.090361445783133, + "grad_norm": 2.5456483364105225, + "learning_rate": 8.40863453815261e-06, + "loss": 0.5274, + "step": 1675 + }, + { + "epoch": 10.120481927710843, + "grad_norm": 2.4075815677642822, + "learning_rate": 8.433734939759036e-06, + "loss": 0.5046, + "step": 1680 + }, + { + "epoch": 10.150602409638553, + "grad_norm": 2.2953052520751953, + "learning_rate": 8.458835341365462e-06, + "loss": 0.4945, + "step": 1685 + }, + { + "epoch": 10.180722891566266, + "grad_norm": 2.051063060760498, + "learning_rate": 8.483935742971888e-06, + "loss": 0.4961, + "step": 1690 + }, + { + "epoch": 10.210843373493976, + "grad_norm": 2.426858425140381, + "learning_rate": 8.509036144578313e-06, + "loss": 0.4804, + "step": 1695 + }, + { + "epoch": 10.240963855421686, + "grad_norm": 2.5486855506896973, + "learning_rate": 8.53413654618474e-06, + "loss": 0.4972, + "step": 1700 + }, + { + "epoch": 10.271084337349398, + "grad_norm": 2.055622100830078, + "learning_rate": 8.559236947791165e-06, + "loss": 0.4917, + "step": 1705 + }, + { + "epoch": 10.301204819277109, + "grad_norm": 1.8970571756362915, + "learning_rate": 8.58433734939759e-06, + "loss": 0.4663, + "step": 1710 + }, + { + "epoch": 10.331325301204819, + "grad_norm": 2.2279303073883057, + "learning_rate": 8.609437751004016e-06, + "loss": 0.5197, + "step": 1715 + }, + { + "epoch": 10.36144578313253, + "grad_norm": 2.70774245262146, + "learning_rate": 8.634538152610444e-06, + "loss": 0.4919, + "step": 1720 + }, + { + "epoch": 10.391566265060241, + "grad_norm": 2.5405726432800293, + "learning_rate": 8.659638554216867e-06, + "loss": 0.4688, + "step": 1725 + }, + { + "epoch": 10.421686746987952, + "grad_norm": 2.2970728874206543, + "learning_rate": 8.684738955823293e-06, + "loss": 0.4864, + "step": 1730 + }, + { + "epoch": 10.451807228915662, + "grad_norm": 2.4628303050994873, + "learning_rate": 8.709839357429719e-06, + "loss": 0.476, + "step": 1735 + }, + { + "epoch": 10.481927710843374, + "grad_norm": 2.983382225036621, + "learning_rate": 8.734939759036146e-06, + "loss": 0.5057, + "step": 1740 + }, + { + "epoch": 10.512048192771084, + "grad_norm": 2.126538038253784, + "learning_rate": 8.760040160642572e-06, + "loss": 0.5023, + "step": 1745 + }, + { + "epoch": 10.542168674698795, + "grad_norm": 2.560659408569336, + "learning_rate": 8.785140562248996e-06, + "loss": 0.4987, + "step": 1750 + }, + { + "epoch": 10.572289156626507, + "grad_norm": 2.0805766582489014, + "learning_rate": 8.810240963855422e-06, + "loss": 0.5026, + "step": 1755 + }, + { + "epoch": 10.602409638554217, + "grad_norm": 1.8629183769226074, + "learning_rate": 8.835341365461847e-06, + "loss": 0.4684, + "step": 1760 + }, + { + "epoch": 10.632530120481928, + "grad_norm": 2.280298948287964, + "learning_rate": 8.860441767068275e-06, + "loss": 0.4979, + "step": 1765 + }, + { + "epoch": 10.662650602409638, + "grad_norm": 2.412539005279541, + "learning_rate": 8.885542168674699e-06, + "loss": 0.4823, + "step": 1770 + }, + { + "epoch": 10.69277108433735, + "grad_norm": 2.6285996437072754, + "learning_rate": 8.910642570281124e-06, + "loss": 0.486, + "step": 1775 + }, + { + "epoch": 10.72289156626506, + "grad_norm": 2.4618632793426514, + "learning_rate": 8.93574297188755e-06, + "loss": 0.4472, + "step": 1780 + }, + { + "epoch": 10.75301204819277, + "grad_norm": 2.290416955947876, + "learning_rate": 8.960843373493977e-06, + "loss": 0.4769, + "step": 1785 + }, + { + "epoch": 10.783132530120483, + "grad_norm": 3.2826364040374756, + "learning_rate": 8.985943775100401e-06, + "loss": 0.4776, + "step": 1790 + }, + { + "epoch": 10.813253012048193, + "grad_norm": 2.097832441329956, + "learning_rate": 9.011044176706827e-06, + "loss": 0.5232, + "step": 1795 + }, + { + "epoch": 10.843373493975903, + "grad_norm": 2.799823045730591, + "learning_rate": 9.036144578313253e-06, + "loss": 0.5045, + "step": 1800 + }, + { + "epoch": 10.873493975903614, + "grad_norm": 2.0956716537475586, + "learning_rate": 9.06124497991968e-06, + "loss": 0.4784, + "step": 1805 + }, + { + "epoch": 10.903614457831326, + "grad_norm": 3.0392680168151855, + "learning_rate": 9.086345381526106e-06, + "loss": 0.486, + "step": 1810 + }, + { + "epoch": 10.933734939759036, + "grad_norm": 3.4051640033721924, + "learning_rate": 9.11144578313253e-06, + "loss": 0.4957, + "step": 1815 + }, + { + "epoch": 10.963855421686747, + "grad_norm": 4.216493606567383, + "learning_rate": 9.136546184738955e-06, + "loss": 0.4817, + "step": 1820 + }, + { + "epoch": 10.993975903614459, + "grad_norm": 3.6580519676208496, + "learning_rate": 9.161646586345383e-06, + "loss": 0.5234, + "step": 1825 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.7809673613841919, + "eval_auc": 0.851010415568736, + "eval_f1": 0.7041954328199681, + "eval_loss": 0.44431841373443604, + "eval_precision": 0.6590457256461233, + "eval_recall": 0.7559863169897377, + "eval_runtime": 20.0651, + "eval_samples_per_second": 126.738, + "eval_steps_per_second": 0.648, + "step": 1826 + }, + { + "epoch": 11.024096385542169, + "grad_norm": 2.170461654663086, + "learning_rate": 9.186746987951808e-06, + "loss": 0.4666, + "step": 1830 + }, + { + "epoch": 11.05421686746988, + "grad_norm": 1.8590593338012695, + "learning_rate": 9.211847389558232e-06, + "loss": 0.4547, + "step": 1835 + }, + { + "epoch": 11.08433734939759, + "grad_norm": 1.9973385334014893, + "learning_rate": 9.236947791164658e-06, + "loss": 0.4707, + "step": 1840 + }, + { + "epoch": 11.114457831325302, + "grad_norm": 2.076169013977051, + "learning_rate": 9.262048192771085e-06, + "loss": 0.5125, + "step": 1845 + }, + { + "epoch": 11.144578313253012, + "grad_norm": 2.7113735675811768, + "learning_rate": 9.287148594377511e-06, + "loss": 0.4566, + "step": 1850 + }, + { + "epoch": 11.174698795180722, + "grad_norm": 3.533003330230713, + "learning_rate": 9.312248995983937e-06, + "loss": 0.4702, + "step": 1855 + }, + { + "epoch": 11.204819277108435, + "grad_norm": 2.180222749710083, + "learning_rate": 9.33734939759036e-06, + "loss": 0.4937, + "step": 1860 + }, + { + "epoch": 11.234939759036145, + "grad_norm": 3.8985979557037354, + "learning_rate": 9.362449799196788e-06, + "loss": 0.4821, + "step": 1865 + }, + { + "epoch": 11.265060240963855, + "grad_norm": 3.7206363677978516, + "learning_rate": 9.387550200803214e-06, + "loss": 0.4762, + "step": 1870 + }, + { + "epoch": 11.295180722891565, + "grad_norm": 1.7842859029769897, + "learning_rate": 9.41265060240964e-06, + "loss": 0.4438, + "step": 1875 + }, + { + "epoch": 11.325301204819278, + "grad_norm": 2.2433724403381348, + "learning_rate": 9.437751004016063e-06, + "loss": 0.5085, + "step": 1880 + }, + { + "epoch": 11.355421686746988, + "grad_norm": 2.3268587589263916, + "learning_rate": 9.46285140562249e-06, + "loss": 0.4368, + "step": 1885 + }, + { + "epoch": 11.385542168674698, + "grad_norm": 2.997973918914795, + "learning_rate": 9.487951807228916e-06, + "loss": 0.4838, + "step": 1890 + }, + { + "epoch": 11.41566265060241, + "grad_norm": 2.7819128036499023, + "learning_rate": 9.513052208835342e-06, + "loss": 0.4669, + "step": 1895 + }, + { + "epoch": 11.44578313253012, + "grad_norm": 2.327488660812378, + "learning_rate": 9.538152610441768e-06, + "loss": 0.4901, + "step": 1900 + }, + { + "epoch": 11.475903614457831, + "grad_norm": 2.628075361251831, + "learning_rate": 9.563253012048193e-06, + "loss": 0.5057, + "step": 1905 + }, + { + "epoch": 11.506024096385541, + "grad_norm": 2.0853092670440674, + "learning_rate": 9.588353413654619e-06, + "loss": 0.4993, + "step": 1910 + }, + { + "epoch": 11.536144578313253, + "grad_norm": 2.37349271774292, + "learning_rate": 9.613453815261045e-06, + "loss": 0.4565, + "step": 1915 + }, + { + "epoch": 11.566265060240964, + "grad_norm": 3.0365233421325684, + "learning_rate": 9.63855421686747e-06, + "loss": 0.4829, + "step": 1920 + }, + { + "epoch": 11.596385542168674, + "grad_norm": 2.143648147583008, + "learning_rate": 9.663654618473896e-06, + "loss": 0.4739, + "step": 1925 + }, + { + "epoch": 11.626506024096386, + "grad_norm": 2.349785327911377, + "learning_rate": 9.688755020080322e-06, + "loss": 0.5107, + "step": 1930 + }, + { + "epoch": 11.656626506024097, + "grad_norm": 2.736833333969116, + "learning_rate": 9.713855421686747e-06, + "loss": 0.4743, + "step": 1935 + }, + { + "epoch": 11.686746987951807, + "grad_norm": 3.4357142448425293, + "learning_rate": 9.738955823293173e-06, + "loss": 0.489, + "step": 1940 + }, + { + "epoch": 11.716867469879517, + "grad_norm": 2.251582145690918, + "learning_rate": 9.764056224899599e-06, + "loss": 0.4888, + "step": 1945 + }, + { + "epoch": 11.74698795180723, + "grad_norm": 2.892306089401245, + "learning_rate": 9.789156626506024e-06, + "loss": 0.5466, + "step": 1950 + }, + { + "epoch": 11.77710843373494, + "grad_norm": 2.4566543102264404, + "learning_rate": 9.81425702811245e-06, + "loss": 0.4824, + "step": 1955 + }, + { + "epoch": 11.80722891566265, + "grad_norm": 1.9853315353393555, + "learning_rate": 9.839357429718876e-06, + "loss": 0.4504, + "step": 1960 + }, + { + "epoch": 11.837349397590362, + "grad_norm": 3.7275307178497314, + "learning_rate": 9.864457831325302e-06, + "loss": 0.462, + "step": 1965 + }, + { + "epoch": 11.867469879518072, + "grad_norm": 2.083540916442871, + "learning_rate": 9.889558232931727e-06, + "loss": 0.4787, + "step": 1970 + }, + { + "epoch": 11.897590361445783, + "grad_norm": 2.22110915184021, + "learning_rate": 9.914658634538153e-06, + "loss": 0.4848, + "step": 1975 + }, + { + "epoch": 11.927710843373493, + "grad_norm": 2.9532299041748047, + "learning_rate": 9.939759036144579e-06, + "loss": 0.4916, + "step": 1980 + }, + { + "epoch": 11.957831325301205, + "grad_norm": 3.3378384113311768, + "learning_rate": 9.964859437751004e-06, + "loss": 0.5118, + "step": 1985 + }, + { + "epoch": 11.987951807228916, + "grad_norm": 3.9365689754486084, + "learning_rate": 9.98995983935743e-06, + "loss": 0.5137, + "step": 1990 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.7605190719622493, + "eval_auc": 0.8553674605532064, + "eval_f1": 0.7112375533428165, + "eval_loss": 0.47835391759872437, + "eval_precision": 0.6087662337662337, + "eval_recall": 0.855188141391106, + "eval_runtime": 17.1677, + "eval_samples_per_second": 148.127, + "eval_steps_per_second": 0.757, + "step": 1992 + }, + { + "epoch": 12.018072289156626, + "grad_norm": 2.984025478363037, + "learning_rate": 1.0015060240963856e-05, + "loss": 0.5233, + "step": 1995 + }, + { + "epoch": 12.048192771084338, + "grad_norm": 4.041134357452393, + "learning_rate": 1.0040160642570281e-05, + "loss": 0.5057, + "step": 2000 + }, + { + "epoch": 12.078313253012048, + "grad_norm": 1.9200129508972168, + "learning_rate": 1.0065261044176707e-05, + "loss": 0.4799, + "step": 2005 + }, + { + "epoch": 12.108433734939759, + "grad_norm": 1.8479945659637451, + "learning_rate": 1.0090361445783134e-05, + "loss": 0.4227, + "step": 2010 + }, + { + "epoch": 12.13855421686747, + "grad_norm": 1.9779216051101685, + "learning_rate": 1.0115461847389558e-05, + "loss": 0.4581, + "step": 2015 + }, + { + "epoch": 12.168674698795181, + "grad_norm": 2.1734094619750977, + "learning_rate": 1.0140562248995984e-05, + "loss": 0.429, + "step": 2020 + }, + { + "epoch": 12.198795180722891, + "grad_norm": 2.0043869018554688, + "learning_rate": 1.016566265060241e-05, + "loss": 0.4395, + "step": 2025 + }, + { + "epoch": 12.228915662650602, + "grad_norm": 3.386418342590332, + "learning_rate": 1.0190763052208837e-05, + "loss": 0.4714, + "step": 2030 + }, + { + "epoch": 12.259036144578314, + "grad_norm": 1.8247755765914917, + "learning_rate": 1.0215863453815261e-05, + "loss": 0.4295, + "step": 2035 + }, + { + "epoch": 12.289156626506024, + "grad_norm": 2.1945042610168457, + "learning_rate": 1.0240963855421687e-05, + "loss": 0.4613, + "step": 2040 + }, + { + "epoch": 12.319277108433734, + "grad_norm": 2.047349214553833, + "learning_rate": 1.0266064257028112e-05, + "loss": 0.4658, + "step": 2045 + }, + { + "epoch": 12.349397590361447, + "grad_norm": 2.9103140830993652, + "learning_rate": 1.029116465863454e-05, + "loss": 0.4641, + "step": 2050 + }, + { + "epoch": 12.379518072289157, + "grad_norm": 3.0239338874816895, + "learning_rate": 1.0316265060240965e-05, + "loss": 0.4595, + "step": 2055 + }, + { + "epoch": 12.409638554216867, + "grad_norm": 2.5606324672698975, + "learning_rate": 1.034136546184739e-05, + "loss": 0.4078, + "step": 2060 + }, + { + "epoch": 12.439759036144578, + "grad_norm": 1.9044026136398315, + "learning_rate": 1.0366465863453815e-05, + "loss": 0.502, + "step": 2065 + }, + { + "epoch": 12.46987951807229, + "grad_norm": 1.8545125722885132, + "learning_rate": 1.0391566265060242e-05, + "loss": 0.4857, + "step": 2070 + }, + { + "epoch": 12.5, + "grad_norm": 2.916778087615967, + "learning_rate": 1.0416666666666668e-05, + "loss": 0.4664, + "step": 2075 + }, + { + "epoch": 12.53012048192771, + "grad_norm": 2.8088219165802, + "learning_rate": 1.0441767068273092e-05, + "loss": 0.4933, + "step": 2080 + }, + { + "epoch": 12.560240963855422, + "grad_norm": 2.9494364261627197, + "learning_rate": 1.0466867469879518e-05, + "loss": 0.4708, + "step": 2085 + }, + { + "epoch": 12.590361445783133, + "grad_norm": 2.231259822845459, + "learning_rate": 1.0491967871485945e-05, + "loss": 0.4536, + "step": 2090 + }, + { + "epoch": 12.620481927710843, + "grad_norm": 2.52738356590271, + "learning_rate": 1.051706827309237e-05, + "loss": 0.4716, + "step": 2095 + }, + { + "epoch": 12.650602409638553, + "grad_norm": 2.3508124351501465, + "learning_rate": 1.0542168674698796e-05, + "loss": 0.4922, + "step": 2100 + }, + { + "epoch": 12.680722891566266, + "grad_norm": 2.4535419940948486, + "learning_rate": 1.056726907630522e-05, + "loss": 0.4743, + "step": 2105 + }, + { + "epoch": 12.710843373493976, + "grad_norm": 1.9749757051467896, + "learning_rate": 1.0592369477911648e-05, + "loss": 0.4867, + "step": 2110 + }, + { + "epoch": 12.740963855421686, + "grad_norm": 2.2329463958740234, + "learning_rate": 1.0617469879518073e-05, + "loss": 0.5101, + "step": 2115 + }, + { + "epoch": 12.771084337349398, + "grad_norm": 2.830862045288086, + "learning_rate": 1.0642570281124499e-05, + "loss": 0.5009, + "step": 2120 + }, + { + "epoch": 12.801204819277109, + "grad_norm": 2.107712984085083, + "learning_rate": 1.0667670682730923e-05, + "loss": 0.505, + "step": 2125 + }, + { + "epoch": 12.831325301204819, + "grad_norm": 2.4264347553253174, + "learning_rate": 1.069277108433735e-05, + "loss": 0.4932, + "step": 2130 + }, + { + "epoch": 12.861445783132531, + "grad_norm": 2.3123373985290527, + "learning_rate": 1.0717871485943776e-05, + "loss": 0.5024, + "step": 2135 + }, + { + "epoch": 12.891566265060241, + "grad_norm": 2.1964051723480225, + "learning_rate": 1.0742971887550202e-05, + "loss": 0.467, + "step": 2140 + }, + { + "epoch": 12.921686746987952, + "grad_norm": 2.1655547618865967, + "learning_rate": 1.0768072289156627e-05, + "loss": 0.4637, + "step": 2145 + }, + { + "epoch": 12.951807228915662, + "grad_norm": 2.684300184249878, + "learning_rate": 1.0793172690763053e-05, + "loss": 0.4325, + "step": 2150 + }, + { + "epoch": 12.981927710843374, + "grad_norm": 3.0702404975891113, + "learning_rate": 1.0818273092369479e-05, + "loss": 0.4745, + "step": 2155 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8057412504915454, + "eval_auc": 0.8595369732841825, + "eval_f1": 0.7366737739872068, + "eval_loss": 0.41435614228248596, + "eval_precision": 0.6916916916916916, + "eval_recall": 0.7879133409350056, + "eval_runtime": 18.3313, + "eval_samples_per_second": 138.724, + "eval_steps_per_second": 0.709, + "step": 2158 + }, + { + "epoch": 13.012048192771084, + "grad_norm": 1.9328081607818604, + "learning_rate": 1.0843373493975904e-05, + "loss": 0.4262, + "step": 2160 + }, + { + "epoch": 13.042168674698795, + "grad_norm": 2.341899871826172, + "learning_rate": 1.086847389558233e-05, + "loss": 0.4617, + "step": 2165 + }, + { + "epoch": 13.072289156626505, + "grad_norm": 3.2174510955810547, + "learning_rate": 1.0893574297188756e-05, + "loss": 0.4322, + "step": 2170 + }, + { + "epoch": 13.102409638554217, + "grad_norm": 2.326920509338379, + "learning_rate": 1.0918674698795181e-05, + "loss": 0.4778, + "step": 2175 + }, + { + "epoch": 13.132530120481928, + "grad_norm": 2.192728281021118, + "learning_rate": 1.0943775100401607e-05, + "loss": 0.443, + "step": 2180 + }, + { + "epoch": 13.162650602409638, + "grad_norm": 2.808758497238159, + "learning_rate": 1.0968875502008033e-05, + "loss": 0.474, + "step": 2185 + }, + { + "epoch": 13.19277108433735, + "grad_norm": 2.31294846534729, + "learning_rate": 1.0993975903614459e-05, + "loss": 0.4829, + "step": 2190 + }, + { + "epoch": 13.22289156626506, + "grad_norm": 1.9450610876083374, + "learning_rate": 1.1019076305220884e-05, + "loss": 0.4561, + "step": 2195 + }, + { + "epoch": 13.25301204819277, + "grad_norm": 1.8041836023330688, + "learning_rate": 1.104417670682731e-05, + "loss": 0.4123, + "step": 2200 + }, + { + "epoch": 13.283132530120483, + "grad_norm": 2.511220932006836, + "learning_rate": 1.1069277108433736e-05, + "loss": 0.4709, + "step": 2205 + }, + { + "epoch": 13.313253012048193, + "grad_norm": 2.2910356521606445, + "learning_rate": 1.1094377510040161e-05, + "loss": 0.4616, + "step": 2210 + }, + { + "epoch": 13.343373493975903, + "grad_norm": 2.4841256141662598, + "learning_rate": 1.1119477911646587e-05, + "loss": 0.4435, + "step": 2215 + }, + { + "epoch": 13.373493975903614, + "grad_norm": 2.2364025115966797, + "learning_rate": 1.1144578313253013e-05, + "loss": 0.4511, + "step": 2220 + }, + { + "epoch": 13.403614457831326, + "grad_norm": 2.3213870525360107, + "learning_rate": 1.1169678714859438e-05, + "loss": 0.465, + "step": 2225 + }, + { + "epoch": 13.433734939759036, + "grad_norm": 4.151866436004639, + "learning_rate": 1.1194779116465864e-05, + "loss": 0.4568, + "step": 2230 + }, + { + "epoch": 13.463855421686747, + "grad_norm": 2.545639991760254, + "learning_rate": 1.121987951807229e-05, + "loss": 0.4145, + "step": 2235 + }, + { + "epoch": 13.493975903614459, + "grad_norm": 2.292720317840576, + "learning_rate": 1.1244979919678715e-05, + "loss": 0.4596, + "step": 2240 + }, + { + "epoch": 13.524096385542169, + "grad_norm": 2.5688793659210205, + "learning_rate": 1.1270080321285141e-05, + "loss": 0.454, + "step": 2245 + }, + { + "epoch": 13.55421686746988, + "grad_norm": 2.5358495712280273, + "learning_rate": 1.1295180722891567e-05, + "loss": 0.4683, + "step": 2250 + }, + { + "epoch": 13.58433734939759, + "grad_norm": 2.0106098651885986, + "learning_rate": 1.1320281124497994e-05, + "loss": 0.4849, + "step": 2255 + }, + { + "epoch": 13.614457831325302, + "grad_norm": 2.150888204574585, + "learning_rate": 1.1345381526104418e-05, + "loss": 0.4287, + "step": 2260 + }, + { + "epoch": 13.644578313253012, + "grad_norm": 4.17469596862793, + "learning_rate": 1.1370481927710844e-05, + "loss": 0.462, + "step": 2265 + }, + { + "epoch": 13.674698795180722, + "grad_norm": 3.2579774856567383, + "learning_rate": 1.139558232931727e-05, + "loss": 0.4311, + "step": 2270 + }, + { + "epoch": 13.704819277108435, + "grad_norm": 1.8688870668411255, + "learning_rate": 1.1420682730923695e-05, + "loss": 0.4558, + "step": 2275 + }, + { + "epoch": 13.734939759036145, + "grad_norm": 2.2652182579040527, + "learning_rate": 1.144578313253012e-05, + "loss": 0.4802, + "step": 2280 + }, + { + "epoch": 13.765060240963855, + "grad_norm": 2.247284412384033, + "learning_rate": 1.1470883534136546e-05, + "loss": 0.4435, + "step": 2285 + }, + { + "epoch": 13.795180722891565, + "grad_norm": 1.9053183794021606, + "learning_rate": 1.1495983935742972e-05, + "loss": 0.4649, + "step": 2290 + }, + { + "epoch": 13.825301204819278, + "grad_norm": 2.277320623397827, + "learning_rate": 1.1521084337349398e-05, + "loss": 0.4766, + "step": 2295 + }, + { + "epoch": 13.855421686746988, + "grad_norm": 2.1285111904144287, + "learning_rate": 1.1546184738955825e-05, + "loss": 0.4283, + "step": 2300 + }, + { + "epoch": 13.885542168674698, + "grad_norm": 2.0318009853363037, + "learning_rate": 1.1571285140562249e-05, + "loss": 0.4696, + "step": 2305 + }, + { + "epoch": 13.91566265060241, + "grad_norm": 1.9120746850967407, + "learning_rate": 1.1596385542168675e-05, + "loss": 0.4624, + "step": 2310 + }, + { + "epoch": 13.94578313253012, + "grad_norm": 2.1122283935546875, + "learning_rate": 1.16214859437751e-05, + "loss": 0.4681, + "step": 2315 + }, + { + "epoch": 13.975903614457831, + "grad_norm": 2.152334451675415, + "learning_rate": 1.1646586345381528e-05, + "loss": 0.4586, + "step": 2320 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8183248132127409, + "eval_auc": 0.8854995818167631, + "eval_f1": 0.7478165938864629, + "eval_loss": 0.38780173659324646, + "eval_precision": 0.7172774869109948, + "eval_recall": 0.7810718358038768, + "eval_runtime": 17.2376, + "eval_samples_per_second": 147.526, + "eval_steps_per_second": 0.754, + "step": 2324 + }, + { + "epoch": 14.006024096385541, + "grad_norm": 2.3053746223449707, + "learning_rate": 1.1671686746987952e-05, + "loss": 0.4225, + "step": 2325 + }, + { + "epoch": 14.036144578313253, + "grad_norm": 2.693596601486206, + "learning_rate": 1.1696787148594377e-05, + "loss": 0.455, + "step": 2330 + }, + { + "epoch": 14.066265060240964, + "grad_norm": 3.381603240966797, + "learning_rate": 1.1721887550200803e-05, + "loss": 0.4546, + "step": 2335 + }, + { + "epoch": 14.096385542168674, + "grad_norm": 2.240210771560669, + "learning_rate": 1.174698795180723e-05, + "loss": 0.447, + "step": 2340 + }, + { + "epoch": 14.126506024096386, + "grad_norm": 1.8732998371124268, + "learning_rate": 1.1772088353413656e-05, + "loss": 0.4198, + "step": 2345 + }, + { + "epoch": 14.156626506024097, + "grad_norm": 2.2043216228485107, + "learning_rate": 1.179718875502008e-05, + "loss": 0.4458, + "step": 2350 + }, + { + "epoch": 14.186746987951807, + "grad_norm": 2.2034971714019775, + "learning_rate": 1.1822289156626506e-05, + "loss": 0.4537, + "step": 2355 + }, + { + "epoch": 14.216867469879517, + "grad_norm": 2.6626431941986084, + "learning_rate": 1.1847389558232933e-05, + "loss": 0.4189, + "step": 2360 + }, + { + "epoch": 14.24698795180723, + "grad_norm": 2.321343421936035, + "learning_rate": 1.1872489959839359e-05, + "loss": 0.4419, + "step": 2365 + }, + { + "epoch": 14.27710843373494, + "grad_norm": 2.189931631088257, + "learning_rate": 1.1897590361445783e-05, + "loss": 0.4476, + "step": 2370 + }, + { + "epoch": 14.30722891566265, + "grad_norm": 2.4229791164398193, + "learning_rate": 1.1922690763052208e-05, + "loss": 0.4404, + "step": 2375 + }, + { + "epoch": 14.337349397590362, + "grad_norm": 1.694631814956665, + "learning_rate": 1.1947791164658636e-05, + "loss": 0.4097, + "step": 2380 + }, + { + "epoch": 14.367469879518072, + "grad_norm": 2.774210214614868, + "learning_rate": 1.1972891566265061e-05, + "loss": 0.4623, + "step": 2385 + }, + { + "epoch": 14.397590361445783, + "grad_norm": 2.141517162322998, + "learning_rate": 1.1997991967871485e-05, + "loss": 0.4368, + "step": 2390 + }, + { + "epoch": 14.427710843373493, + "grad_norm": 2.6954643726348877, + "learning_rate": 1.2023092369477911e-05, + "loss": 0.4576, + "step": 2395 + }, + { + "epoch": 14.457831325301205, + "grad_norm": 3.5512561798095703, + "learning_rate": 1.2048192771084338e-05, + "loss": 0.4513, + "step": 2400 + }, + { + "epoch": 14.487951807228916, + "grad_norm": 2.3789210319519043, + "learning_rate": 1.2073293172690764e-05, + "loss": 0.4625, + "step": 2405 + }, + { + "epoch": 14.518072289156626, + "grad_norm": 2.6126534938812256, + "learning_rate": 1.209839357429719e-05, + "loss": 0.4442, + "step": 2410 + }, + { + "epoch": 14.548192771084338, + "grad_norm": 2.2687761783599854, + "learning_rate": 1.2123493975903614e-05, + "loss": 0.4364, + "step": 2415 + }, + { + "epoch": 14.578313253012048, + "grad_norm": 2.199284791946411, + "learning_rate": 1.2148594377510041e-05, + "loss": 0.4618, + "step": 2420 + }, + { + "epoch": 14.608433734939759, + "grad_norm": 2.1206400394439697, + "learning_rate": 1.2173694779116467e-05, + "loss": 0.4652, + "step": 2425 + }, + { + "epoch": 14.638554216867469, + "grad_norm": 2.170057535171509, + "learning_rate": 1.2198795180722893e-05, + "loss": 0.4771, + "step": 2430 + }, + { + "epoch": 14.668674698795181, + "grad_norm": 2.2524478435516357, + "learning_rate": 1.2223895582329316e-05, + "loss": 0.4573, + "step": 2435 + }, + { + "epoch": 14.698795180722891, + "grad_norm": 1.919073462486267, + "learning_rate": 1.2248995983935744e-05, + "loss": 0.3936, + "step": 2440 + }, + { + "epoch": 14.728915662650602, + "grad_norm": 2.0246472358703613, + "learning_rate": 1.227409638554217e-05, + "loss": 0.431, + "step": 2445 + }, + { + "epoch": 14.759036144578314, + "grad_norm": 4.081275463104248, + "learning_rate": 1.2299196787148595e-05, + "loss": 0.4491, + "step": 2450 + }, + { + "epoch": 14.789156626506024, + "grad_norm": 2.2841455936431885, + "learning_rate": 1.2324297188755021e-05, + "loss": 0.4857, + "step": 2455 + }, + { + "epoch": 14.819277108433734, + "grad_norm": 3.2180285453796387, + "learning_rate": 1.2349397590361447e-05, + "loss": 0.4147, + "step": 2460 + }, + { + "epoch": 14.849397590361447, + "grad_norm": 2.3523502349853516, + "learning_rate": 1.2374497991967872e-05, + "loss": 0.4693, + "step": 2465 + }, + { + "epoch": 14.879518072289157, + "grad_norm": 3.6546506881713867, + "learning_rate": 1.2399598393574298e-05, + "loss": 0.4628, + "step": 2470 + }, + { + "epoch": 14.909638554216867, + "grad_norm": 1.9689726829528809, + "learning_rate": 1.2424698795180724e-05, + "loss": 0.4647, + "step": 2475 + }, + { + "epoch": 14.939759036144578, + "grad_norm": 2.322652578353882, + "learning_rate": 1.244979919678715e-05, + "loss": 0.4137, + "step": 2480 + }, + { + "epoch": 14.96987951807229, + "grad_norm": 4.471823215484619, + "learning_rate": 1.2474899598393575e-05, + "loss": 0.5001, + "step": 2485 + }, + { + "epoch": 15.0, + "grad_norm": 3.832202911376953, + "learning_rate": 1.25e-05, + "loss": 0.437, + "step": 2490 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8222571765631145, + "eval_auc": 0.8772050439331947, + "eval_f1": 0.7431818181818182, + "eval_loss": 0.38412806391716003, + "eval_precision": 0.7406568516421291, + "eval_recall": 0.7457240592930444, + "eval_runtime": 17.2897, + "eval_samples_per_second": 147.082, + "eval_steps_per_second": 0.752, + "step": 2490 + }, + { + "epoch": 15.03012048192771, + "grad_norm": 2.557508945465088, + "learning_rate": 1.2525100401606426e-05, + "loss": 0.4283, + "step": 2495 + }, + { + "epoch": 15.060240963855422, + "grad_norm": 2.2491331100463867, + "learning_rate": 1.2550200803212852e-05, + "loss": 0.4186, + "step": 2500 + }, + { + "epoch": 15.090361445783133, + "grad_norm": 2.5220303535461426, + "learning_rate": 1.257530120481928e-05, + "loss": 0.4495, + "step": 2505 + }, + { + "epoch": 15.120481927710843, + "grad_norm": 2.5314533710479736, + "learning_rate": 1.2600401606425705e-05, + "loss": 0.4852, + "step": 2510 + }, + { + "epoch": 15.150602409638553, + "grad_norm": 2.1047887802124023, + "learning_rate": 1.2625502008032127e-05, + "loss": 0.4289, + "step": 2515 + }, + { + "epoch": 15.180722891566266, + "grad_norm": 2.526026725769043, + "learning_rate": 1.2650602409638555e-05, + "loss": 0.409, + "step": 2520 + }, + { + "epoch": 15.210843373493976, + "grad_norm": 3.2399747371673584, + "learning_rate": 1.267570281124498e-05, + "loss": 0.4763, + "step": 2525 + }, + { + "epoch": 15.240963855421686, + "grad_norm": 2.312688112258911, + "learning_rate": 1.2700803212851406e-05, + "loss": 0.4512, + "step": 2530 + }, + { + "epoch": 15.271084337349398, + "grad_norm": 2.7564501762390137, + "learning_rate": 1.2725903614457832e-05, + "loss": 0.4458, + "step": 2535 + }, + { + "epoch": 15.301204819277109, + "grad_norm": 2.0259008407592773, + "learning_rate": 1.2751004016064257e-05, + "loss": 0.4129, + "step": 2540 + }, + { + "epoch": 15.331325301204819, + "grad_norm": 1.8796688318252563, + "learning_rate": 1.2776104417670685e-05, + "loss": 0.443, + "step": 2545 + }, + { + "epoch": 15.36144578313253, + "grad_norm": 5.187371730804443, + "learning_rate": 1.280120481927711e-05, + "loss": 0.426, + "step": 2550 + }, + { + "epoch": 15.391566265060241, + "grad_norm": 2.6445512771606445, + "learning_rate": 1.2826305220883536e-05, + "loss": 0.4807, + "step": 2555 + }, + { + "epoch": 15.421686746987952, + "grad_norm": 2.3695054054260254, + "learning_rate": 1.285140562248996e-05, + "loss": 0.3946, + "step": 2560 + }, + { + "epoch": 15.451807228915662, + "grad_norm": 2.0859386920928955, + "learning_rate": 1.2876506024096386e-05, + "loss": 0.4344, + "step": 2565 + }, + { + "epoch": 15.481927710843374, + "grad_norm": 2.2627930641174316, + "learning_rate": 1.2901606425702811e-05, + "loss": 0.4037, + "step": 2570 + }, + { + "epoch": 15.512048192771084, + "grad_norm": 2.312237024307251, + "learning_rate": 1.2926706827309237e-05, + "loss": 0.4294, + "step": 2575 + }, + { + "epoch": 15.542168674698795, + "grad_norm": 2.807929515838623, + "learning_rate": 1.2951807228915663e-05, + "loss": 0.4332, + "step": 2580 + }, + { + "epoch": 15.572289156626507, + "grad_norm": 2.819624662399292, + "learning_rate": 1.297690763052209e-05, + "loss": 0.433, + "step": 2585 + }, + { + "epoch": 15.602409638554217, + "grad_norm": 2.2831618785858154, + "learning_rate": 1.3002008032128516e-05, + "loss": 0.4538, + "step": 2590 + }, + { + "epoch": 15.632530120481928, + "grad_norm": 2.8296332359313965, + "learning_rate": 1.3027108433734941e-05, + "loss": 0.3969, + "step": 2595 + }, + { + "epoch": 15.662650602409638, + "grad_norm": 2.7965662479400635, + "learning_rate": 1.3052208835341367e-05, + "loss": 0.4303, + "step": 2600 + }, + { + "epoch": 15.69277108433735, + "grad_norm": 2.252262830734253, + "learning_rate": 1.3077309236947791e-05, + "loss": 0.4252, + "step": 2605 + }, + { + "epoch": 15.72289156626506, + "grad_norm": 2.0010712146759033, + "learning_rate": 1.3102409638554217e-05, + "loss": 0.4336, + "step": 2610 + }, + { + "epoch": 15.75301204819277, + "grad_norm": 2.653491497039795, + "learning_rate": 1.3127510040160642e-05, + "loss": 0.4209, + "step": 2615 + }, + { + "epoch": 15.783132530120483, + "grad_norm": 2.3631601333618164, + "learning_rate": 1.3152610441767068e-05, + "loss": 0.4405, + "step": 2620 + }, + { + "epoch": 15.813253012048193, + "grad_norm": 1.9908761978149414, + "learning_rate": 1.3177710843373495e-05, + "loss": 0.4168, + "step": 2625 + }, + { + "epoch": 15.843373493975903, + "grad_norm": 2.210310697555542, + "learning_rate": 1.3202811244979921e-05, + "loss": 0.46, + "step": 2630 + }, + { + "epoch": 15.873493975903614, + "grad_norm": 2.0091185569763184, + "learning_rate": 1.3227911646586347e-05, + "loss": 0.4702, + "step": 2635 + }, + { + "epoch": 15.903614457831326, + "grad_norm": 2.4611587524414062, + "learning_rate": 1.3253012048192772e-05, + "loss": 0.4707, + "step": 2640 + }, + { + "epoch": 15.933734939759036, + "grad_norm": 2.493058443069458, + "learning_rate": 1.3278112449799198e-05, + "loss": 0.4458, + "step": 2645 + }, + { + "epoch": 15.963855421686747, + "grad_norm": 3.501375436782837, + "learning_rate": 1.3303212851405622e-05, + "loss": 0.4466, + "step": 2650 + }, + { + "epoch": 15.993975903614459, + "grad_norm": 3.2416062355041504, + "learning_rate": 1.3328313253012048e-05, + "loss": 0.429, + "step": 2655 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8301219032638616, + "eval_auc": 0.8901451116364448, + "eval_f1": 0.76, + "eval_loss": 0.3746268153190613, + "eval_precision": 0.7410617551462622, + "eval_recall": 0.7799315849486887, + "eval_runtime": 17.3366, + "eval_samples_per_second": 146.684, + "eval_steps_per_second": 0.75, + "step": 2656 + }, + { + "epoch": 16.02409638554217, + "grad_norm": 2.0801899433135986, + "learning_rate": 1.3353413654618473e-05, + "loss": 0.4283, + "step": 2660 + }, + { + "epoch": 16.05421686746988, + "grad_norm": 2.239642858505249, + "learning_rate": 1.3378514056224901e-05, + "loss": 0.4091, + "step": 2665 + }, + { + "epoch": 16.08433734939759, + "grad_norm": 2.6524431705474854, + "learning_rate": 1.3403614457831327e-05, + "loss": 0.4487, + "step": 2670 + }, + { + "epoch": 16.1144578313253, + "grad_norm": 2.178330183029175, + "learning_rate": 1.3428714859437752e-05, + "loss": 0.4181, + "step": 2675 + }, + { + "epoch": 16.14457831325301, + "grad_norm": 1.9692491292953491, + "learning_rate": 1.3453815261044178e-05, + "loss": 0.3775, + "step": 2680 + }, + { + "epoch": 16.174698795180724, + "grad_norm": 2.0983362197875977, + "learning_rate": 1.3478915662650604e-05, + "loss": 0.3768, + "step": 2685 + }, + { + "epoch": 16.204819277108435, + "grad_norm": 2.7713608741760254, + "learning_rate": 1.3504016064257031e-05, + "loss": 0.4351, + "step": 2690 + }, + { + "epoch": 16.234939759036145, + "grad_norm": 2.1661791801452637, + "learning_rate": 1.3529116465863453e-05, + "loss": 0.4291, + "step": 2695 + }, + { + "epoch": 16.265060240963855, + "grad_norm": 3.4588308334350586, + "learning_rate": 1.3554216867469879e-05, + "loss": 0.4601, + "step": 2700 + }, + { + "epoch": 16.295180722891565, + "grad_norm": 2.1662585735321045, + "learning_rate": 1.3579317269076306e-05, + "loss": 0.4379, + "step": 2705 + }, + { + "epoch": 16.325301204819276, + "grad_norm": 4.048949241638184, + "learning_rate": 1.3604417670682732e-05, + "loss": 0.4661, + "step": 2710 + }, + { + "epoch": 16.355421686746986, + "grad_norm": 2.671419858932495, + "learning_rate": 1.3629518072289158e-05, + "loss": 0.4433, + "step": 2715 + }, + { + "epoch": 16.3855421686747, + "grad_norm": 2.4763331413269043, + "learning_rate": 1.3654618473895583e-05, + "loss": 0.4544, + "step": 2720 + }, + { + "epoch": 16.41566265060241, + "grad_norm": 2.3892018795013428, + "learning_rate": 1.3679718875502009e-05, + "loss": 0.4358, + "step": 2725 + }, + { + "epoch": 16.44578313253012, + "grad_norm": 2.494692325592041, + "learning_rate": 1.3704819277108436e-05, + "loss": 0.4141, + "step": 2730 + }, + { + "epoch": 16.47590361445783, + "grad_norm": 3.030679702758789, + "learning_rate": 1.3729919678714859e-05, + "loss": 0.4181, + "step": 2735 + }, + { + "epoch": 16.50602409638554, + "grad_norm": 2.8109540939331055, + "learning_rate": 1.3755020080321284e-05, + "loss": 0.4175, + "step": 2740 + }, + { + "epoch": 16.53614457831325, + "grad_norm": 2.295210599899292, + "learning_rate": 1.378012048192771e-05, + "loss": 0.4048, + "step": 2745 + }, + { + "epoch": 16.566265060240966, + "grad_norm": 2.345912456512451, + "learning_rate": 1.3805220883534137e-05, + "loss": 0.4578, + "step": 2750 + }, + { + "epoch": 16.596385542168676, + "grad_norm": 4.10270357131958, + "learning_rate": 1.3830321285140563e-05, + "loss": 0.4514, + "step": 2755 + }, + { + "epoch": 16.626506024096386, + "grad_norm": 3.1970739364624023, + "learning_rate": 1.3855421686746989e-05, + "loss": 0.4348, + "step": 2760 + }, + { + "epoch": 16.656626506024097, + "grad_norm": 2.188840866088867, + "learning_rate": 1.3880522088353414e-05, + "loss": 0.383, + "step": 2765 + }, + { + "epoch": 16.686746987951807, + "grad_norm": 2.1729989051818848, + "learning_rate": 1.390562248995984e-05, + "loss": 0.4274, + "step": 2770 + }, + { + "epoch": 16.716867469879517, + "grad_norm": 2.270476818084717, + "learning_rate": 1.3930722891566267e-05, + "loss": 0.4234, + "step": 2775 + }, + { + "epoch": 16.746987951807228, + "grad_norm": 2.173185110092163, + "learning_rate": 1.395582329317269e-05, + "loss": 0.3879, + "step": 2780 + }, + { + "epoch": 16.77710843373494, + "grad_norm": 2.8911218643188477, + "learning_rate": 1.3980923694779115e-05, + "loss": 0.456, + "step": 2785 + }, + { + "epoch": 16.80722891566265, + "grad_norm": 2.338966131210327, + "learning_rate": 1.4006024096385543e-05, + "loss": 0.458, + "step": 2790 + }, + { + "epoch": 16.837349397590362, + "grad_norm": 2.3324530124664307, + "learning_rate": 1.4031124497991968e-05, + "loss": 0.4686, + "step": 2795 + }, + { + "epoch": 16.867469879518072, + "grad_norm": 2.3572144508361816, + "learning_rate": 1.4056224899598394e-05, + "loss": 0.3995, + "step": 2800 + }, + { + "epoch": 16.897590361445783, + "grad_norm": 1.8775089979171753, + "learning_rate": 1.408132530120482e-05, + "loss": 0.3767, + "step": 2805 + }, + { + "epoch": 16.927710843373493, + "grad_norm": 2.193387508392334, + "learning_rate": 1.4106425702811245e-05, + "loss": 0.4548, + "step": 2810 + }, + { + "epoch": 16.957831325301203, + "grad_norm": 2.899744987487793, + "learning_rate": 1.4131526104417673e-05, + "loss": 0.4614, + "step": 2815 + }, + { + "epoch": 16.987951807228917, + "grad_norm": 2.185055732727051, + "learning_rate": 1.4156626506024098e-05, + "loss": 0.4286, + "step": 2820 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8305151395988989, + "eval_auc": 0.8864002841729622, + "eval_f1": 0.7598885793871867, + "eval_loss": 0.37183433771133423, + "eval_precision": 0.7429193899782135, + "eval_recall": 0.7776510832383124, + "eval_runtime": 19.1171, + "eval_samples_per_second": 133.022, + "eval_steps_per_second": 0.68, + "step": 2822 + }, + { + "epoch": 17.018072289156628, + "grad_norm": 2.4193804264068604, + "learning_rate": 1.418172690763052e-05, + "loss": 0.4336, + "step": 2825 + }, + { + "epoch": 17.048192771084338, + "grad_norm": 2.2396481037139893, + "learning_rate": 1.4206827309236948e-05, + "loss": 0.4114, + "step": 2830 + }, + { + "epoch": 17.07831325301205, + "grad_norm": 3.2262861728668213, + "learning_rate": 1.4231927710843374e-05, + "loss": 0.4276, + "step": 2835 + }, + { + "epoch": 17.10843373493976, + "grad_norm": 2.5907819271087646, + "learning_rate": 1.42570281124498e-05, + "loss": 0.4324, + "step": 2840 + }, + { + "epoch": 17.13855421686747, + "grad_norm": 2.5259037017822266, + "learning_rate": 1.4282128514056225e-05, + "loss": 0.4699, + "step": 2845 + }, + { + "epoch": 17.16867469879518, + "grad_norm": 2.109802007675171, + "learning_rate": 1.430722891566265e-05, + "loss": 0.3989, + "step": 2850 + }, + { + "epoch": 17.198795180722893, + "grad_norm": 2.3156588077545166, + "learning_rate": 1.4332329317269078e-05, + "loss": 0.4175, + "step": 2855 + }, + { + "epoch": 17.228915662650603, + "grad_norm": 2.192671060562134, + "learning_rate": 1.4357429718875504e-05, + "loss": 0.3869, + "step": 2860 + }, + { + "epoch": 17.259036144578314, + "grad_norm": 3.8719916343688965, + "learning_rate": 1.438253012048193e-05, + "loss": 0.442, + "step": 2865 + }, + { + "epoch": 17.289156626506024, + "grad_norm": 2.4408538341522217, + "learning_rate": 1.4407630522088353e-05, + "loss": 0.3831, + "step": 2870 + }, + { + "epoch": 17.319277108433734, + "grad_norm": 2.5923538208007812, + "learning_rate": 1.4432730923694779e-05, + "loss": 0.4272, + "step": 2875 + }, + { + "epoch": 17.349397590361445, + "grad_norm": 2.708015203475952, + "learning_rate": 1.4457831325301205e-05, + "loss": 0.4667, + "step": 2880 + }, + { + "epoch": 17.379518072289155, + "grad_norm": 2.7255101203918457, + "learning_rate": 1.448293172690763e-05, + "loss": 0.4217, + "step": 2885 + }, + { + "epoch": 17.40963855421687, + "grad_norm": 1.8880677223205566, + "learning_rate": 1.4508032128514056e-05, + "loss": 0.3921, + "step": 2890 + }, + { + "epoch": 17.43975903614458, + "grad_norm": 3.514613389968872, + "learning_rate": 1.4533132530120484e-05, + "loss": 0.4307, + "step": 2895 + }, + { + "epoch": 17.46987951807229, + "grad_norm": 2.4978322982788086, + "learning_rate": 1.455823293172691e-05, + "loss": 0.3752, + "step": 2900 + }, + { + "epoch": 17.5, + "grad_norm": 2.1256654262542725, + "learning_rate": 1.4583333333333335e-05, + "loss": 0.4139, + "step": 2905 + }, + { + "epoch": 17.53012048192771, + "grad_norm": 2.6442389488220215, + "learning_rate": 1.460843373493976e-05, + "loss": 0.4283, + "step": 2910 + }, + { + "epoch": 17.56024096385542, + "grad_norm": 2.4239025115966797, + "learning_rate": 1.4633534136546185e-05, + "loss": 0.3972, + "step": 2915 + }, + { + "epoch": 17.59036144578313, + "grad_norm": 2.8832712173461914, + "learning_rate": 1.465863453815261e-05, + "loss": 0.4248, + "step": 2920 + }, + { + "epoch": 17.620481927710845, + "grad_norm": 2.305220603942871, + "learning_rate": 1.4683734939759036e-05, + "loss": 0.3948, + "step": 2925 + }, + { + "epoch": 17.650602409638555, + "grad_norm": 3.6476662158966064, + "learning_rate": 1.4708835341365462e-05, + "loss": 0.427, + "step": 2930 + }, + { + "epoch": 17.680722891566266, + "grad_norm": 2.280860424041748, + "learning_rate": 1.4733935742971889e-05, + "loss": 0.431, + "step": 2935 + }, + { + "epoch": 17.710843373493976, + "grad_norm": 2.509981155395508, + "learning_rate": 1.4759036144578315e-05, + "loss": 0.423, + "step": 2940 + }, + { + "epoch": 17.740963855421686, + "grad_norm": 2.111283302307129, + "learning_rate": 1.478413654618474e-05, + "loss": 0.4201, + "step": 2945 + }, + { + "epoch": 17.771084337349397, + "grad_norm": 2.2227985858917236, + "learning_rate": 1.4809236947791166e-05, + "loss": 0.3701, + "step": 2950 + }, + { + "epoch": 17.801204819277107, + "grad_norm": 2.4401426315307617, + "learning_rate": 1.4834337349397592e-05, + "loss": 0.425, + "step": 2955 + }, + { + "epoch": 17.83132530120482, + "grad_norm": 2.3500068187713623, + "learning_rate": 1.4859437751004016e-05, + "loss": 0.4274, + "step": 2960 + }, + { + "epoch": 17.86144578313253, + "grad_norm": 2.8407883644104004, + "learning_rate": 1.4884538152610441e-05, + "loss": 0.4129, + "step": 2965 + }, + { + "epoch": 17.89156626506024, + "grad_norm": 3.6744883060455322, + "learning_rate": 1.4909638554216867e-05, + "loss": 0.3994, + "step": 2970 + }, + { + "epoch": 17.92168674698795, + "grad_norm": 2.0906033515930176, + "learning_rate": 1.4934738955823294e-05, + "loss": 0.3993, + "step": 2975 + }, + { + "epoch": 17.951807228915662, + "grad_norm": 2.5579147338867188, + "learning_rate": 1.495983935742972e-05, + "loss": 0.3937, + "step": 2980 + }, + { + "epoch": 17.981927710843372, + "grad_norm": 2.253021001815796, + "learning_rate": 1.4984939759036146e-05, + "loss": 0.3662, + "step": 2985 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.8462445930003932, + "eval_auc": 0.9037278537412684, + "eval_f1": 0.7751581368602645, + "eval_loss": 0.3433316648006439, + "eval_precision": 0.7819025522041764, + "eval_recall": 0.7685290763968073, + "eval_runtime": 18.8459, + "eval_samples_per_second": 134.937, + "eval_steps_per_second": 0.69, + "step": 2988 + }, + { + "epoch": 18.012048192771083, + "grad_norm": 2.6839776039123535, + "learning_rate": 1.5010040160642571e-05, + "loss": 0.405, + "step": 2990 + }, + { + "epoch": 18.042168674698797, + "grad_norm": 2.047701120376587, + "learning_rate": 1.5035140562248997e-05, + "loss": 0.3964, + "step": 2995 + }, + { + "epoch": 18.072289156626507, + "grad_norm": 2.45595383644104, + "learning_rate": 1.5060240963855424e-05, + "loss": 0.4076, + "step": 3000 + }, + { + "epoch": 18.102409638554217, + "grad_norm": 2.435271978378296, + "learning_rate": 1.5085341365461847e-05, + "loss": 0.4107, + "step": 3005 + }, + { + "epoch": 18.132530120481928, + "grad_norm": 2.5313150882720947, + "learning_rate": 1.5110441767068272e-05, + "loss": 0.3662, + "step": 3010 + }, + { + "epoch": 18.162650602409638, + "grad_norm": 2.0792062282562256, + "learning_rate": 1.51355421686747e-05, + "loss": 0.3682, + "step": 3015 + }, + { + "epoch": 18.19277108433735, + "grad_norm": 2.737271308898926, + "learning_rate": 1.5160642570281125e-05, + "loss": 0.4058, + "step": 3020 + }, + { + "epoch": 18.22289156626506, + "grad_norm": 2.3708345890045166, + "learning_rate": 1.5185742971887551e-05, + "loss": 0.4121, + "step": 3025 + }, + { + "epoch": 18.253012048192772, + "grad_norm": 2.9924778938293457, + "learning_rate": 1.5210843373493977e-05, + "loss": 0.4572, + "step": 3030 + }, + { + "epoch": 18.283132530120483, + "grad_norm": 2.2808122634887695, + "learning_rate": 1.5235943775100402e-05, + "loss": 0.4439, + "step": 3035 + }, + { + "epoch": 18.313253012048193, + "grad_norm": 2.355435371398926, + "learning_rate": 1.526104417670683e-05, + "loss": 0.4418, + "step": 3040 + }, + { + "epoch": 18.343373493975903, + "grad_norm": 2.5382118225097656, + "learning_rate": 1.5286144578313255e-05, + "loss": 0.3806, + "step": 3045 + }, + { + "epoch": 18.373493975903614, + "grad_norm": 2.1816794872283936, + "learning_rate": 1.5311244979919678e-05, + "loss": 0.4308, + "step": 3050 + }, + { + "epoch": 18.403614457831324, + "grad_norm": 3.9759576320648193, + "learning_rate": 1.5336345381526103e-05, + "loss": 0.4222, + "step": 3055 + }, + { + "epoch": 18.433734939759034, + "grad_norm": 1.9446662664413452, + "learning_rate": 1.536144578313253e-05, + "loss": 0.3724, + "step": 3060 + }, + { + "epoch": 18.46385542168675, + "grad_norm": 2.1402506828308105, + "learning_rate": 1.5386546184738955e-05, + "loss": 0.3833, + "step": 3065 + }, + { + "epoch": 18.49397590361446, + "grad_norm": 2.0538694858551025, + "learning_rate": 1.5411646586345384e-05, + "loss": 0.4076, + "step": 3070 + }, + { + "epoch": 18.52409638554217, + "grad_norm": 4.788428783416748, + "learning_rate": 1.543674698795181e-05, + "loss": 0.3939, + "step": 3075 + }, + { + "epoch": 18.55421686746988, + "grad_norm": 2.8548316955566406, + "learning_rate": 1.5461847389558235e-05, + "loss": 0.4389, + "step": 3080 + }, + { + "epoch": 18.58433734939759, + "grad_norm": 2.549076795578003, + "learning_rate": 1.548694779116466e-05, + "loss": 0.3949, + "step": 3085 + }, + { + "epoch": 18.6144578313253, + "grad_norm": 3.204897165298462, + "learning_rate": 1.5512048192771086e-05, + "loss": 0.4241, + "step": 3090 + }, + { + "epoch": 18.644578313253014, + "grad_norm": 2.265899181365967, + "learning_rate": 1.553714859437751e-05, + "loss": 0.4176, + "step": 3095 + }, + { + "epoch": 18.674698795180724, + "grad_norm": 2.1757800579071045, + "learning_rate": 1.5562248995983934e-05, + "loss": 0.3993, + "step": 3100 + }, + { + "epoch": 18.704819277108435, + "grad_norm": 2.321087121963501, + "learning_rate": 1.558734939759036e-05, + "loss": 0.3926, + "step": 3105 + }, + { + "epoch": 18.734939759036145, + "grad_norm": 2.9241983890533447, + "learning_rate": 1.561244979919679e-05, + "loss": 0.3926, + "step": 3110 + }, + { + "epoch": 18.765060240963855, + "grad_norm": 2.5136919021606445, + "learning_rate": 1.5637550200803215e-05, + "loss": 0.3915, + "step": 3115 + }, + { + "epoch": 18.795180722891565, + "grad_norm": 2.293180227279663, + "learning_rate": 1.566265060240964e-05, + "loss": 0.4226, + "step": 3120 + }, + { + "epoch": 18.825301204819276, + "grad_norm": 3.1935389041900635, + "learning_rate": 1.5687751004016066e-05, + "loss": 0.4073, + "step": 3125 + }, + { + "epoch": 18.855421686746986, + "grad_norm": 2.0698330402374268, + "learning_rate": 1.5712851405622492e-05, + "loss": 0.4049, + "step": 3130 + }, + { + "epoch": 18.8855421686747, + "grad_norm": 2.8074769973754883, + "learning_rate": 1.5737951807228914e-05, + "loss": 0.4053, + "step": 3135 + }, + { + "epoch": 18.91566265060241, + "grad_norm": 3.0730950832366943, + "learning_rate": 1.576305220883534e-05, + "loss": 0.4107, + "step": 3140 + }, + { + "epoch": 18.94578313253012, + "grad_norm": 2.4239420890808105, + "learning_rate": 1.5788152610441766e-05, + "loss": 0.4033, + "step": 3145 + }, + { + "epoch": 18.97590361445783, + "grad_norm": 2.209717035293579, + "learning_rate": 1.5813253012048195e-05, + "loss": 0.4602, + "step": 3150 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.853716083366103, + "eval_auc": 0.912781418154491, + "eval_f1": 0.7912457912457912, + "eval_loss": 0.33109456300735474, + "eval_precision": 0.7790055248618785, + "eval_recall": 0.8038768529076397, + "eval_runtime": 19.3279, + "eval_samples_per_second": 131.571, + "eval_steps_per_second": 0.673, + "step": 3154 + }, + { + "epoch": 19.00602409638554, + "grad_norm": 2.2700791358947754, + "learning_rate": 1.583835341365462e-05, + "loss": 0.4574, + "step": 3155 + }, + { + "epoch": 19.03614457831325, + "grad_norm": 1.9966039657592773, + "learning_rate": 1.5863453815261046e-05, + "loss": 0.3946, + "step": 3160 + }, + { + "epoch": 19.066265060240966, + "grad_norm": 2.130915403366089, + "learning_rate": 1.588855421686747e-05, + "loss": 0.3663, + "step": 3165 + }, + { + "epoch": 19.096385542168676, + "grad_norm": 2.385338306427002, + "learning_rate": 1.5913654618473897e-05, + "loss": 0.4464, + "step": 3170 + }, + { + "epoch": 19.126506024096386, + "grad_norm": 2.0878496170043945, + "learning_rate": 1.5938755020080323e-05, + "loss": 0.3871, + "step": 3175 + }, + { + "epoch": 19.156626506024097, + "grad_norm": 3.1889541149139404, + "learning_rate": 1.5963855421686745e-05, + "loss": 0.3963, + "step": 3180 + }, + { + "epoch": 19.186746987951807, + "grad_norm": 2.2727162837982178, + "learning_rate": 1.598895582329317e-05, + "loss": 0.3706, + "step": 3185 + }, + { + "epoch": 19.216867469879517, + "grad_norm": 2.489370107650757, + "learning_rate": 1.60140562248996e-05, + "loss": 0.4172, + "step": 3190 + }, + { + "epoch": 19.246987951807228, + "grad_norm": 2.4061601161956787, + "learning_rate": 1.6039156626506026e-05, + "loss": 0.3973, + "step": 3195 + }, + { + "epoch": 19.27710843373494, + "grad_norm": 2.163954734802246, + "learning_rate": 1.606425702811245e-05, + "loss": 0.347, + "step": 3200 + }, + { + "epoch": 19.30722891566265, + "grad_norm": 1.9165501594543457, + "learning_rate": 1.6089357429718877e-05, + "loss": 0.3723, + "step": 3205 + }, + { + "epoch": 19.337349397590362, + "grad_norm": 2.6702020168304443, + "learning_rate": 1.6114457831325303e-05, + "loss": 0.4096, + "step": 3210 + }, + { + "epoch": 19.367469879518072, + "grad_norm": 2.3621468544006348, + "learning_rate": 1.613955823293173e-05, + "loss": 0.3835, + "step": 3215 + }, + { + "epoch": 19.397590361445783, + "grad_norm": 2.0021228790283203, + "learning_rate": 1.6164658634538154e-05, + "loss": 0.3666, + "step": 3220 + }, + { + "epoch": 19.427710843373493, + "grad_norm": 2.663166046142578, + "learning_rate": 1.6189759036144576e-05, + "loss": 0.4176, + "step": 3225 + }, + { + "epoch": 19.457831325301203, + "grad_norm": 2.1119351387023926, + "learning_rate": 1.6214859437751005e-05, + "loss": 0.4178, + "step": 3230 + }, + { + "epoch": 19.487951807228917, + "grad_norm": 2.3716650009155273, + "learning_rate": 1.623995983935743e-05, + "loss": 0.4237, + "step": 3235 + }, + { + "epoch": 19.518072289156628, + "grad_norm": 3.074990749359131, + "learning_rate": 1.6265060240963857e-05, + "loss": 0.4067, + "step": 3240 + }, + { + "epoch": 19.548192771084338, + "grad_norm": 2.659364938735962, + "learning_rate": 1.6290160642570282e-05, + "loss": 0.3987, + "step": 3245 + }, + { + "epoch": 19.57831325301205, + "grad_norm": 2.0575156211853027, + "learning_rate": 1.6315261044176708e-05, + "loss": 0.3946, + "step": 3250 + }, + { + "epoch": 19.60843373493976, + "grad_norm": 2.162975311279297, + "learning_rate": 1.6340361445783134e-05, + "loss": 0.3781, + "step": 3255 + }, + { + "epoch": 19.63855421686747, + "grad_norm": 3.1139814853668213, + "learning_rate": 1.636546184738956e-05, + "loss": 0.4018, + "step": 3260 + }, + { + "epoch": 19.66867469879518, + "grad_norm": 2.6446077823638916, + "learning_rate": 1.6390562248995985e-05, + "loss": 0.3795, + "step": 3265 + }, + { + "epoch": 19.698795180722893, + "grad_norm": 2.3214240074157715, + "learning_rate": 1.641566265060241e-05, + "loss": 0.3774, + "step": 3270 + }, + { + "epoch": 19.728915662650603, + "grad_norm": 2.515679359436035, + "learning_rate": 1.6440763052208836e-05, + "loss": 0.3648, + "step": 3275 + }, + { + "epoch": 19.759036144578314, + "grad_norm": 3.9370508193969727, + "learning_rate": 1.6465863453815262e-05, + "loss": 0.3597, + "step": 3280 + }, + { + "epoch": 19.789156626506024, + "grad_norm": 2.8674943447113037, + "learning_rate": 1.6490963855421688e-05, + "loss": 0.4004, + "step": 3285 + }, + { + "epoch": 19.819277108433734, + "grad_norm": 2.5606460571289062, + "learning_rate": 1.6516064257028113e-05, + "loss": 0.3831, + "step": 3290 + }, + { + "epoch": 19.849397590361445, + "grad_norm": 2.4989705085754395, + "learning_rate": 1.654116465863454e-05, + "loss": 0.4079, + "step": 3295 + }, + { + "epoch": 19.879518072289155, + "grad_norm": 2.4194695949554443, + "learning_rate": 1.6566265060240965e-05, + "loss": 0.3862, + "step": 3300 + }, + { + "epoch": 19.90963855421687, + "grad_norm": 2.933333396911621, + "learning_rate": 1.659136546184739e-05, + "loss": 0.4224, + "step": 3305 + }, + { + "epoch": 19.93975903614458, + "grad_norm": 2.5552918910980225, + "learning_rate": 1.6616465863453816e-05, + "loss": 0.3786, + "step": 3310 + }, + { + "epoch": 19.96987951807229, + "grad_norm": 2.3705222606658936, + "learning_rate": 1.6641566265060242e-05, + "loss": 0.3776, + "step": 3315 + }, + { + "epoch": 20.0, + "grad_norm": 2.3991634845733643, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.3774, + "step": 3320 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8446716476602438, + "eval_auc": 0.9032141933170075, + "eval_f1": 0.7796988287785834, + "eval_loss": 0.344701886177063, + "eval_precision": 0.7631004366812227, + "eval_recall": 0.7970353477765109, + "eval_runtime": 19.3433, + "eval_samples_per_second": 131.467, + "eval_steps_per_second": 0.672, + "step": 3320 + }, + { + "epoch": 20.03012048192771, + "grad_norm": 2.448899984359741, + "learning_rate": 1.6691767068273093e-05, + "loss": 0.4055, + "step": 3325 + }, + { + "epoch": 20.06024096385542, + "grad_norm": 2.2480709552764893, + "learning_rate": 1.671686746987952e-05, + "loss": 0.377, + "step": 3330 + }, + { + "epoch": 20.09036144578313, + "grad_norm": 2.342336654663086, + "learning_rate": 1.6741967871485944e-05, + "loss": 0.3567, + "step": 3335 + }, + { + "epoch": 20.120481927710845, + "grad_norm": 3.282813549041748, + "learning_rate": 1.676706827309237e-05, + "loss": 0.3792, + "step": 3340 + }, + { + "epoch": 20.150602409638555, + "grad_norm": 15.908650398254395, + "learning_rate": 1.6792168674698796e-05, + "loss": 0.3796, + "step": 3345 + }, + { + "epoch": 20.180722891566266, + "grad_norm": 2.0319290161132812, + "learning_rate": 1.681726907630522e-05, + "loss": 0.3438, + "step": 3350 + }, + { + "epoch": 20.210843373493976, + "grad_norm": 2.748540163040161, + "learning_rate": 1.6842369477911647e-05, + "loss": 0.3816, + "step": 3355 + }, + { + "epoch": 20.240963855421686, + "grad_norm": 2.2890756130218506, + "learning_rate": 1.6867469879518073e-05, + "loss": 0.3743, + "step": 3360 + }, + { + "epoch": 20.271084337349397, + "grad_norm": 2.790903091430664, + "learning_rate": 1.68925702811245e-05, + "loss": 0.3602, + "step": 3365 + }, + { + "epoch": 20.301204819277107, + "grad_norm": 2.8439552783966064, + "learning_rate": 1.6917670682730924e-05, + "loss": 0.3961, + "step": 3370 + }, + { + "epoch": 20.33132530120482, + "grad_norm": 2.9442138671875, + "learning_rate": 1.694277108433735e-05, + "loss": 0.4049, + "step": 3375 + }, + { + "epoch": 20.36144578313253, + "grad_norm": 2.309587240219116, + "learning_rate": 1.6967871485943776e-05, + "loss": 0.3813, + "step": 3380 + }, + { + "epoch": 20.39156626506024, + "grad_norm": 2.2793304920196533, + "learning_rate": 1.69929718875502e-05, + "loss": 0.3744, + "step": 3385 + }, + { + "epoch": 20.42168674698795, + "grad_norm": 3.1960504055023193, + "learning_rate": 1.7018072289156627e-05, + "loss": 0.3549, + "step": 3390 + }, + { + "epoch": 20.451807228915662, + "grad_norm": 3.467097520828247, + "learning_rate": 1.7043172690763053e-05, + "loss": 0.393, + "step": 3395 + }, + { + "epoch": 20.481927710843372, + "grad_norm": 2.76165771484375, + "learning_rate": 1.706827309236948e-05, + "loss": 0.3616, + "step": 3400 + }, + { + "epoch": 20.512048192771083, + "grad_norm": 2.5405423641204834, + "learning_rate": 1.7093373493975904e-05, + "loss": 0.3904, + "step": 3405 + }, + { + "epoch": 20.542168674698797, + "grad_norm": 2.286052703857422, + "learning_rate": 1.711847389558233e-05, + "loss": 0.3578, + "step": 3410 + }, + { + "epoch": 20.572289156626507, + "grad_norm": 2.32137131690979, + "learning_rate": 1.7143574297188755e-05, + "loss": 0.3789, + "step": 3415 + }, + { + "epoch": 20.602409638554217, + "grad_norm": 4.155764579772949, + "learning_rate": 1.716867469879518e-05, + "loss": 0.4012, + "step": 3420 + }, + { + "epoch": 20.632530120481928, + "grad_norm": 3.8825199604034424, + "learning_rate": 1.7193775100401607e-05, + "loss": 0.4325, + "step": 3425 + }, + { + "epoch": 20.662650602409638, + "grad_norm": 2.3800666332244873, + "learning_rate": 1.7218875502008032e-05, + "loss": 0.3944, + "step": 3430 + }, + { + "epoch": 20.69277108433735, + "grad_norm": 2.744089126586914, + "learning_rate": 1.7243975903614458e-05, + "loss": 0.3912, + "step": 3435 + }, + { + "epoch": 20.72289156626506, + "grad_norm": 2.666856527328491, + "learning_rate": 1.7269076305220887e-05, + "loss": 0.4301, + "step": 3440 + }, + { + "epoch": 20.753012048192772, + "grad_norm": 2.391982316970825, + "learning_rate": 1.7294176706827313e-05, + "loss": 0.3911, + "step": 3445 + }, + { + "epoch": 20.783132530120483, + "grad_norm": 2.4049274921417236, + "learning_rate": 1.7319277108433735e-05, + "loss": 0.4008, + "step": 3450 + }, + { + "epoch": 20.813253012048193, + "grad_norm": 2.314380645751953, + "learning_rate": 1.734437751004016e-05, + "loss": 0.38, + "step": 3455 + }, + { + "epoch": 20.843373493975903, + "grad_norm": 2.4678962230682373, + "learning_rate": 1.7369477911646586e-05, + "loss": 0.3519, + "step": 3460 + }, + { + "epoch": 20.873493975903614, + "grad_norm": 2.493690013885498, + "learning_rate": 1.7394578313253012e-05, + "loss": 0.3785, + "step": 3465 + }, + { + "epoch": 20.903614457831324, + "grad_norm": 3.2063677310943604, + "learning_rate": 1.7419678714859438e-05, + "loss": 0.3828, + "step": 3470 + }, + { + "epoch": 20.933734939759034, + "grad_norm": 2.120603561401367, + "learning_rate": 1.7444779116465863e-05, + "loss": 0.368, + "step": 3475 + }, + { + "epoch": 20.96385542168675, + "grad_norm": 4.08734655380249, + "learning_rate": 1.7469879518072292e-05, + "loss": 0.3879, + "step": 3480 + }, + { + "epoch": 20.99397590361446, + "grad_norm": 2.1004254817962646, + "learning_rate": 1.7494979919678718e-05, + "loss": 0.3923, + "step": 3485 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.8623672827369249, + "eval_auc": 0.9178071456632825, + "eval_f1": 0.7928994082840237, + "eval_loss": 0.32685163617134094, + "eval_precision": 0.8241082410824109, + "eval_recall": 0.7639680729760547, + "eval_runtime": 20.6758, + "eval_samples_per_second": 122.994, + "eval_steps_per_second": 0.629, + "step": 3486 + }, + { + "epoch": 21.02409638554217, + "grad_norm": 2.5413122177124023, + "learning_rate": 1.7520080321285144e-05, + "loss": 0.4122, + "step": 3490 + }, + { + "epoch": 21.05421686746988, + "grad_norm": 2.392362594604492, + "learning_rate": 1.7545180722891566e-05, + "loss": 0.3893, + "step": 3495 + }, + { + "epoch": 21.08433734939759, + "grad_norm": 2.0848727226257324, + "learning_rate": 1.7570281124497992e-05, + "loss": 0.3634, + "step": 3500 + }, + { + "epoch": 21.1144578313253, + "grad_norm": 2.455618381500244, + "learning_rate": 1.7595381526104417e-05, + "loss": 0.364, + "step": 3505 + }, + { + "epoch": 21.14457831325301, + "grad_norm": 2.857142686843872, + "learning_rate": 1.7620481927710843e-05, + "loss": 0.3587, + "step": 3510 + }, + { + "epoch": 21.174698795180724, + "grad_norm": 2.2013301849365234, + "learning_rate": 1.764558232931727e-05, + "loss": 0.3519, + "step": 3515 + }, + { + "epoch": 21.204819277108435, + "grad_norm": 2.3021109104156494, + "learning_rate": 1.7670682730923694e-05, + "loss": 0.348, + "step": 3520 + }, + { + "epoch": 21.234939759036145, + "grad_norm": 3.415651559829712, + "learning_rate": 1.7695783132530123e-05, + "loss": 0.3986, + "step": 3525 + }, + { + "epoch": 21.265060240963855, + "grad_norm": 2.0990231037139893, + "learning_rate": 1.772088353413655e-05, + "loss": 0.3364, + "step": 3530 + }, + { + "epoch": 21.295180722891565, + "grad_norm": 3.0432047843933105, + "learning_rate": 1.774598393574297e-05, + "loss": 0.3789, + "step": 3535 + }, + { + "epoch": 21.325301204819276, + "grad_norm": 2.8268072605133057, + "learning_rate": 1.7771084337349397e-05, + "loss": 0.3893, + "step": 3540 + }, + { + "epoch": 21.355421686746986, + "grad_norm": 2.920424699783325, + "learning_rate": 1.7796184738955823e-05, + "loss": 0.352, + "step": 3545 + }, + { + "epoch": 21.3855421686747, + "grad_norm": 3.2624945640563965, + "learning_rate": 1.782128514056225e-05, + "loss": 0.3555, + "step": 3550 + }, + { + "epoch": 21.41566265060241, + "grad_norm": 2.2209317684173584, + "learning_rate": 1.7846385542168674e-05, + "loss": 0.3672, + "step": 3555 + }, + { + "epoch": 21.44578313253012, + "grad_norm": 2.224517822265625, + "learning_rate": 1.78714859437751e-05, + "loss": 0.3419, + "step": 3560 + }, + { + "epoch": 21.47590361445783, + "grad_norm": 2.8915982246398926, + "learning_rate": 1.789658634538153e-05, + "loss": 0.3926, + "step": 3565 + }, + { + "epoch": 21.50602409638554, + "grad_norm": 2.213768243789673, + "learning_rate": 1.7921686746987955e-05, + "loss": 0.3947, + "step": 3570 + }, + { + "epoch": 21.53614457831325, + "grad_norm": 2.0302085876464844, + "learning_rate": 1.794678714859438e-05, + "loss": 0.3402, + "step": 3575 + }, + { + "epoch": 21.566265060240966, + "grad_norm": 2.2996370792388916, + "learning_rate": 1.7971887550200802e-05, + "loss": 0.3934, + "step": 3580 + }, + { + "epoch": 21.596385542168676, + "grad_norm": 2.4716296195983887, + "learning_rate": 1.7996987951807228e-05, + "loss": 0.3488, + "step": 3585 + }, + { + "epoch": 21.626506024096386, + "grad_norm": 3.6752865314483643, + "learning_rate": 1.8022088353413654e-05, + "loss": 0.3884, + "step": 3590 + }, + { + "epoch": 21.656626506024097, + "grad_norm": 2.3490021228790283, + "learning_rate": 1.804718875502008e-05, + "loss": 0.4053, + "step": 3595 + }, + { + "epoch": 21.686746987951807, + "grad_norm": 3.6254935264587402, + "learning_rate": 1.8072289156626505e-05, + "loss": 0.3961, + "step": 3600 + }, + { + "epoch": 21.716867469879517, + "grad_norm": 2.5923497676849365, + "learning_rate": 1.8097389558232934e-05, + "loss": 0.3617, + "step": 3605 + }, + { + "epoch": 21.746987951807228, + "grad_norm": 2.2978155612945557, + "learning_rate": 1.812248995983936e-05, + "loss": 0.3901, + "step": 3610 + }, + { + "epoch": 21.77710843373494, + "grad_norm": 2.2318062782287598, + "learning_rate": 1.8147590361445786e-05, + "loss": 0.3437, + "step": 3615 + }, + { + "epoch": 21.80722891566265, + "grad_norm": 2.4424312114715576, + "learning_rate": 1.817269076305221e-05, + "loss": 0.3888, + "step": 3620 + }, + { + "epoch": 21.837349397590362, + "grad_norm": 1.9771126508712769, + "learning_rate": 1.8197791164658634e-05, + "loss": 0.4051, + "step": 3625 + }, + { + "epoch": 21.867469879518072, + "grad_norm": 2.4145617485046387, + "learning_rate": 1.822289156626506e-05, + "loss": 0.3732, + "step": 3630 + }, + { + "epoch": 21.897590361445783, + "grad_norm": 3.3357062339782715, + "learning_rate": 1.8247991967871485e-05, + "loss": 0.3943, + "step": 3635 + }, + { + "epoch": 21.927710843373493, + "grad_norm": 2.0927658081054688, + "learning_rate": 1.827309236947791e-05, + "loss": 0.3556, + "step": 3640 + }, + { + "epoch": 21.957831325301203, + "grad_norm": 2.8143198490142822, + "learning_rate": 1.829819277108434e-05, + "loss": 0.4176, + "step": 3645 + }, + { + "epoch": 21.987951807228917, + "grad_norm": 4.187919616699219, + "learning_rate": 1.8323293172690765e-05, + "loss": 0.4007, + "step": 3650 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.8568619740464019, + "eval_auc": 0.9210646630373929, + "eval_f1": 0.8019586507072906, + "eval_loss": 0.3195960521697998, + "eval_precision": 0.7669094693028096, + "eval_recall": 0.8403648802736602, + "eval_runtime": 19.5548, + "eval_samples_per_second": 130.045, + "eval_steps_per_second": 0.665, + "step": 3652 + }, + { + "epoch": 22.018072289156628, + "grad_norm": 2.3164548873901367, + "learning_rate": 1.834839357429719e-05, + "loss": 0.3858, + "step": 3655 + }, + { + "epoch": 22.048192771084338, + "grad_norm": 2.90985107421875, + "learning_rate": 1.8373493975903617e-05, + "loss": 0.36, + "step": 3660 + }, + { + "epoch": 22.07831325301205, + "grad_norm": 1.9936435222625732, + "learning_rate": 1.8398594377510042e-05, + "loss": 0.3651, + "step": 3665 + }, + { + "epoch": 22.10843373493976, + "grad_norm": 2.573457956314087, + "learning_rate": 1.8423694779116465e-05, + "loss": 0.355, + "step": 3670 + }, + { + "epoch": 22.13855421686747, + "grad_norm": 2.2051055431365967, + "learning_rate": 1.844879518072289e-05, + "loss": 0.3728, + "step": 3675 + }, + { + "epoch": 22.16867469879518, + "grad_norm": 3.0916407108306885, + "learning_rate": 1.8473895582329316e-05, + "loss": 0.3307, + "step": 3680 + }, + { + "epoch": 22.198795180722893, + "grad_norm": 1.9302294254302979, + "learning_rate": 1.8498995983935745e-05, + "loss": 0.3339, + "step": 3685 + }, + { + "epoch": 22.228915662650603, + "grad_norm": 2.323669672012329, + "learning_rate": 1.852409638554217e-05, + "loss": 0.3519, + "step": 3690 + }, + { + "epoch": 22.259036144578314, + "grad_norm": 2.630171537399292, + "learning_rate": 1.8549196787148596e-05, + "loss": 0.3389, + "step": 3695 + }, + { + "epoch": 22.289156626506024, + "grad_norm": 3.4154629707336426, + "learning_rate": 1.8574297188755022e-05, + "loss": 0.3859, + "step": 3700 + }, + { + "epoch": 22.319277108433734, + "grad_norm": 2.571350574493408, + "learning_rate": 1.8599397590361448e-05, + "loss": 0.4023, + "step": 3705 + }, + { + "epoch": 22.349397590361445, + "grad_norm": 3.9686989784240723, + "learning_rate": 1.8624497991967873e-05, + "loss": 0.3795, + "step": 3710 + }, + { + "epoch": 22.379518072289155, + "grad_norm": 2.3565986156463623, + "learning_rate": 1.8649598393574296e-05, + "loss": 0.3461, + "step": 3715 + }, + { + "epoch": 22.40963855421687, + "grad_norm": 2.3396501541137695, + "learning_rate": 1.867469879518072e-05, + "loss": 0.3685, + "step": 3720 + }, + { + "epoch": 22.43975903614458, + "grad_norm": 3.2269747257232666, + "learning_rate": 1.869979919678715e-05, + "loss": 0.3571, + "step": 3725 + }, + { + "epoch": 22.46987951807229, + "grad_norm": 2.388921022415161, + "learning_rate": 1.8724899598393576e-05, + "loss": 0.38, + "step": 3730 + }, + { + "epoch": 22.5, + "grad_norm": 2.4904425144195557, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.3453, + "step": 3735 + }, + { + "epoch": 22.53012048192771, + "grad_norm": 3.9464221000671387, + "learning_rate": 1.8775100401606427e-05, + "loss": 0.3572, + "step": 3740 + }, + { + "epoch": 22.56024096385542, + "grad_norm": 2.9925246238708496, + "learning_rate": 1.8800200803212853e-05, + "loss": 0.389, + "step": 3745 + }, + { + "epoch": 22.59036144578313, + "grad_norm": 2.383758544921875, + "learning_rate": 1.882530120481928e-05, + "loss": 0.3399, + "step": 3750 + }, + { + "epoch": 22.620481927710845, + "grad_norm": 2.634162187576294, + "learning_rate": 1.8850401606425704e-05, + "loss": 0.3316, + "step": 3755 + }, + { + "epoch": 22.650602409638555, + "grad_norm": 2.5719075202941895, + "learning_rate": 1.8875502008032127e-05, + "loss": 0.3976, + "step": 3760 + }, + { + "epoch": 22.680722891566266, + "grad_norm": 2.7136175632476807, + "learning_rate": 1.8900602409638556e-05, + "loss": 0.3312, + "step": 3765 + }, + { + "epoch": 22.710843373493976, + "grad_norm": 3.161940097808838, + "learning_rate": 1.892570281124498e-05, + "loss": 0.4142, + "step": 3770 + }, + { + "epoch": 22.740963855421686, + "grad_norm": 2.9586448669433594, + "learning_rate": 1.8950803212851407e-05, + "loss": 0.35, + "step": 3775 + }, + { + "epoch": 22.771084337349397, + "grad_norm": 2.721144437789917, + "learning_rate": 1.8975903614457833e-05, + "loss": 0.3401, + "step": 3780 + }, + { + "epoch": 22.801204819277107, + "grad_norm": 3.0611133575439453, + "learning_rate": 1.900100401606426e-05, + "loss": 0.3541, + "step": 3785 + }, + { + "epoch": 22.83132530120482, + "grad_norm": 2.4524519443511963, + "learning_rate": 1.9026104417670684e-05, + "loss": 0.3899, + "step": 3790 + }, + { + "epoch": 22.86144578313253, + "grad_norm": 2.0838871002197266, + "learning_rate": 1.905120481927711e-05, + "loss": 0.3568, + "step": 3795 + }, + { + "epoch": 22.89156626506024, + "grad_norm": 3.6144070625305176, + "learning_rate": 1.9076305220883535e-05, + "loss": 0.3253, + "step": 3800 + }, + { + "epoch": 22.92168674698795, + "grad_norm": 3.958813190460205, + "learning_rate": 1.910140562248996e-05, + "loss": 0.3907, + "step": 3805 + }, + { + "epoch": 22.951807228915662, + "grad_norm": 2.1493561267852783, + "learning_rate": 1.9126506024096387e-05, + "loss": 0.3422, + "step": 3810 + }, + { + "epoch": 22.981927710843372, + "grad_norm": 2.8188695907592773, + "learning_rate": 1.9151606425702813e-05, + "loss": 0.3662, + "step": 3815 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.8686590640975226, + "eval_auc": 0.9263426008944056, + "eval_f1": 0.7938271604938272, + "eval_loss": 0.3158295452594757, + "eval_precision": 0.8654104979811574, + "eval_recall": 0.7331812998859749, + "eval_runtime": 19.0721, + "eval_samples_per_second": 133.336, + "eval_steps_per_second": 0.682, + "step": 3818 + }, + { + "epoch": 23.012048192771083, + "grad_norm": 2.2430033683776855, + "learning_rate": 1.9176706827309238e-05, + "loss": 0.3296, + "step": 3820 + }, + { + "epoch": 23.042168674698797, + "grad_norm": 3.505044460296631, + "learning_rate": 1.9201807228915664e-05, + "loss": 0.3289, + "step": 3825 + }, + { + "epoch": 23.072289156626507, + "grad_norm": 2.142225980758667, + "learning_rate": 1.922690763052209e-05, + "loss": 0.3551, + "step": 3830 + }, + { + "epoch": 23.102409638554217, + "grad_norm": 2.1978600025177, + "learning_rate": 1.9252008032128515e-05, + "loss": 0.3303, + "step": 3835 + }, + { + "epoch": 23.132530120481928, + "grad_norm": 1.8615117073059082, + "learning_rate": 1.927710843373494e-05, + "loss": 0.3353, + "step": 3840 + }, + { + "epoch": 23.162650602409638, + "grad_norm": 2.1642777919769287, + "learning_rate": 1.9302208835341367e-05, + "loss": 0.318, + "step": 3845 + }, + { + "epoch": 23.19277108433735, + "grad_norm": 2.3634889125823975, + "learning_rate": 1.9327309236947792e-05, + "loss": 0.3373, + "step": 3850 + }, + { + "epoch": 23.22289156626506, + "grad_norm": 2.5314648151397705, + "learning_rate": 1.9352409638554218e-05, + "loss": 0.3287, + "step": 3855 + }, + { + "epoch": 23.253012048192772, + "grad_norm": 2.921504259109497, + "learning_rate": 1.9377510040160644e-05, + "loss": 0.3802, + "step": 3860 + }, + { + "epoch": 23.283132530120483, + "grad_norm": 1.9109606742858887, + "learning_rate": 1.940261044176707e-05, + "loss": 0.3371, + "step": 3865 + }, + { + "epoch": 23.313253012048193, + "grad_norm": 2.454645872116089, + "learning_rate": 1.9427710843373495e-05, + "loss": 0.3005, + "step": 3870 + }, + { + "epoch": 23.343373493975903, + "grad_norm": 2.995619058609009, + "learning_rate": 1.945281124497992e-05, + "loss": 0.3862, + "step": 3875 + }, + { + "epoch": 23.373493975903614, + "grad_norm": 2.480074644088745, + "learning_rate": 1.9477911646586346e-05, + "loss": 0.3488, + "step": 3880 + }, + { + "epoch": 23.403614457831324, + "grad_norm": 3.2270078659057617, + "learning_rate": 1.9503012048192772e-05, + "loss": 0.3759, + "step": 3885 + }, + { + "epoch": 23.433734939759034, + "grad_norm": 2.5098702907562256, + "learning_rate": 1.9528112449799198e-05, + "loss": 0.3453, + "step": 3890 + }, + { + "epoch": 23.46385542168675, + "grad_norm": 3.194838523864746, + "learning_rate": 1.9553212851405623e-05, + "loss": 0.3524, + "step": 3895 + }, + { + "epoch": 23.49397590361446, + "grad_norm": 2.51259446144104, + "learning_rate": 1.957831325301205e-05, + "loss": 0.3899, + "step": 3900 + }, + { + "epoch": 23.52409638554217, + "grad_norm": 2.8412113189697266, + "learning_rate": 1.9603413654618475e-05, + "loss": 0.3071, + "step": 3905 + }, + { + "epoch": 23.55421686746988, + "grad_norm": 2.583353281021118, + "learning_rate": 1.96285140562249e-05, + "loss": 0.377, + "step": 3910 + }, + { + "epoch": 23.58433734939759, + "grad_norm": 2.712259292602539, + "learning_rate": 1.9653614457831326e-05, + "loss": 0.3414, + "step": 3915 + }, + { + "epoch": 23.6144578313253, + "grad_norm": 2.5728349685668945, + "learning_rate": 1.967871485943775e-05, + "loss": 0.3841, + "step": 3920 + }, + { + "epoch": 23.644578313253014, + "grad_norm": 2.7579598426818848, + "learning_rate": 1.9703815261044177e-05, + "loss": 0.3236, + "step": 3925 + }, + { + "epoch": 23.674698795180724, + "grad_norm": 3.745579481124878, + "learning_rate": 1.9728915662650603e-05, + "loss": 0.3788, + "step": 3930 + }, + { + "epoch": 23.704819277108435, + "grad_norm": 3.3026535511016846, + "learning_rate": 1.9754016064257032e-05, + "loss": 0.35, + "step": 3935 + }, + { + "epoch": 23.734939759036145, + "grad_norm": 2.341129779815674, + "learning_rate": 1.9779116465863454e-05, + "loss": 0.3162, + "step": 3940 + }, + { + "epoch": 23.765060240963855, + "grad_norm": 2.3009142875671387, + "learning_rate": 1.980421686746988e-05, + "loss": 0.3607, + "step": 3945 + }, + { + "epoch": 23.795180722891565, + "grad_norm": 2.9585721492767334, + "learning_rate": 1.9829317269076306e-05, + "loss": 0.368, + "step": 3950 + }, + { + "epoch": 23.825301204819276, + "grad_norm": 2.5716402530670166, + "learning_rate": 1.985441767068273e-05, + "loss": 0.3882, + "step": 3955 + }, + { + "epoch": 23.855421686746986, + "grad_norm": 3.239875316619873, + "learning_rate": 1.9879518072289157e-05, + "loss": 0.3542, + "step": 3960 + }, + { + "epoch": 23.8855421686747, + "grad_norm": 3.1310906410217285, + "learning_rate": 1.9904618473895583e-05, + "loss": 0.3364, + "step": 3965 + }, + { + "epoch": 23.91566265060241, + "grad_norm": 2.2465121746063232, + "learning_rate": 1.992971887550201e-05, + "loss": 0.3696, + "step": 3970 + }, + { + "epoch": 23.94578313253012, + "grad_norm": 2.290149450302124, + "learning_rate": 1.9954819277108437e-05, + "loss": 0.4005, + "step": 3975 + }, + { + "epoch": 23.97590361445783, + "grad_norm": 2.4030425548553467, + "learning_rate": 1.997991967871486e-05, + "loss": 0.3689, + "step": 3980 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.8769170271333071, + "eval_auc": 0.938771745870526, + "eval_f1": 0.8187608569774175, + "eval_loss": 0.2837126851081848, + "eval_precision": 0.831764705882353, + "eval_recall": 0.806157354618016, + "eval_runtime": 19.0066, + "eval_samples_per_second": 133.796, + "eval_steps_per_second": 0.684, + "step": 3984 + }, + { + "epoch": 24.00602409638554, + "grad_norm": 2.0611824989318848, + "learning_rate": 2.0005020080321285e-05, + "loss": 0.3252, + "step": 3985 + }, + { + "epoch": 24.03614457831325, + "grad_norm": 2.9021127223968506, + "learning_rate": 2.003012048192771e-05, + "loss": 0.3678, + "step": 3990 + }, + { + "epoch": 24.066265060240966, + "grad_norm": 1.9793673753738403, + "learning_rate": 2.0055220883534137e-05, + "loss": 0.3251, + "step": 3995 + }, + { + "epoch": 24.096385542168676, + "grad_norm": 2.8167641162872314, + "learning_rate": 2.0080321285140562e-05, + "loss": 0.3344, + "step": 4000 + }, + { + "epoch": 24.126506024096386, + "grad_norm": 2.1713151931762695, + "learning_rate": 2.0105421686746988e-05, + "loss": 0.3358, + "step": 4005 + }, + { + "epoch": 24.156626506024097, + "grad_norm": 2.3592758178710938, + "learning_rate": 2.0130522088353414e-05, + "loss": 0.3663, + "step": 4010 + }, + { + "epoch": 24.186746987951807, + "grad_norm": 3.2822067737579346, + "learning_rate": 2.015562248995984e-05, + "loss": 0.367, + "step": 4015 + }, + { + "epoch": 24.216867469879517, + "grad_norm": 3.195669412612915, + "learning_rate": 2.018072289156627e-05, + "loss": 0.3254, + "step": 4020 + }, + { + "epoch": 24.246987951807228, + "grad_norm": 2.3356804847717285, + "learning_rate": 2.020582329317269e-05, + "loss": 0.3005, + "step": 4025 + }, + { + "epoch": 24.27710843373494, + "grad_norm": 2.135450839996338, + "learning_rate": 2.0230923694779116e-05, + "loss": 0.359, + "step": 4030 + }, + { + "epoch": 24.30722891566265, + "grad_norm": 2.1273319721221924, + "learning_rate": 2.0256024096385542e-05, + "loss": 0.3043, + "step": 4035 + }, + { + "epoch": 24.337349397590362, + "grad_norm": 2.6562674045562744, + "learning_rate": 2.0281124497991968e-05, + "loss": 0.2992, + "step": 4040 + }, + { + "epoch": 24.367469879518072, + "grad_norm": 2.4005606174468994, + "learning_rate": 2.0306224899598393e-05, + "loss": 0.3517, + "step": 4045 + }, + { + "epoch": 24.397590361445783, + "grad_norm": 3.0539345741271973, + "learning_rate": 2.033132530120482e-05, + "loss": 0.3625, + "step": 4050 + }, + { + "epoch": 24.427710843373493, + "grad_norm": 2.6296353340148926, + "learning_rate": 2.0356425702811245e-05, + "loss": 0.3369, + "step": 4055 + }, + { + "epoch": 24.457831325301203, + "grad_norm": 2.197061061859131, + "learning_rate": 2.0381526104417674e-05, + "loss": 0.3291, + "step": 4060 + }, + { + "epoch": 24.487951807228917, + "grad_norm": 2.6472768783569336, + "learning_rate": 2.04066265060241e-05, + "loss": 0.3227, + "step": 4065 + }, + { + "epoch": 24.518072289156628, + "grad_norm": 2.7284440994262695, + "learning_rate": 2.0431726907630522e-05, + "loss": 0.3645, + "step": 4070 + }, + { + "epoch": 24.548192771084338, + "grad_norm": 1.8101915121078491, + "learning_rate": 2.0456827309236948e-05, + "loss": 0.3179, + "step": 4075 + }, + { + "epoch": 24.57831325301205, + "grad_norm": 1.9829938411712646, + "learning_rate": 2.0481927710843373e-05, + "loss": 0.3132, + "step": 4080 + }, + { + "epoch": 24.60843373493976, + "grad_norm": 2.770651340484619, + "learning_rate": 2.05070281124498e-05, + "loss": 0.366, + "step": 4085 + }, + { + "epoch": 24.63855421686747, + "grad_norm": 2.8670294284820557, + "learning_rate": 2.0532128514056225e-05, + "loss": 0.3599, + "step": 4090 + }, + { + "epoch": 24.66867469879518, + "grad_norm": 2.399897336959839, + "learning_rate": 2.055722891566265e-05, + "loss": 0.323, + "step": 4095 + }, + { + "epoch": 24.698795180722893, + "grad_norm": 1.969288945198059, + "learning_rate": 2.058232931726908e-05, + "loss": 0.3776, + "step": 4100 + }, + { + "epoch": 24.728915662650603, + "grad_norm": 2.6503965854644775, + "learning_rate": 2.0607429718875505e-05, + "loss": 0.3266, + "step": 4105 + }, + { + "epoch": 24.759036144578314, + "grad_norm": 3.4772753715515137, + "learning_rate": 2.063253012048193e-05, + "loss": 0.361, + "step": 4110 + }, + { + "epoch": 24.789156626506024, + "grad_norm": 2.970876932144165, + "learning_rate": 2.0657630522088353e-05, + "loss": 0.3393, + "step": 4115 + }, + { + "epoch": 24.819277108433734, + "grad_norm": 2.6036980152130127, + "learning_rate": 2.068273092369478e-05, + "loss": 0.381, + "step": 4120 + }, + { + "epoch": 24.849397590361445, + "grad_norm": 2.629304885864258, + "learning_rate": 2.0707831325301204e-05, + "loss": 0.3413, + "step": 4125 + }, + { + "epoch": 24.879518072289155, + "grad_norm": 2.1564583778381348, + "learning_rate": 2.073293172690763e-05, + "loss": 0.3209, + "step": 4130 + }, + { + "epoch": 24.90963855421687, + "grad_norm": 2.885538339614868, + "learning_rate": 2.0758032128514056e-05, + "loss": 0.3361, + "step": 4135 + }, + { + "epoch": 24.93975903614458, + "grad_norm": 2.5669989585876465, + "learning_rate": 2.0783132530120485e-05, + "loss": 0.3291, + "step": 4140 + }, + { + "epoch": 24.96987951807229, + "grad_norm": 2.1680426597595215, + "learning_rate": 2.080823293172691e-05, + "loss": 0.2835, + "step": 4145 + }, + { + "epoch": 25.0, + "grad_norm": 2.454000473022461, + "learning_rate": 2.0833333333333336e-05, + "loss": 0.3682, + "step": 4150 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.8725914274478962, + "eval_auc": 0.934971822252276, + "eval_f1": 0.8038740920096852, + "eval_loss": 0.29524460434913635, + "eval_precision": 0.8567741935483871, + "eval_recall": 0.7571265678449259, + "eval_runtime": 19.0056, + "eval_samples_per_second": 133.803, + "eval_steps_per_second": 0.684, + "step": 4150 + }, + { + "epoch": 25.03012048192771, + "grad_norm": 3.144068479537964, + "learning_rate": 2.0858433734939762e-05, + "loss": 0.3608, + "step": 4155 + }, + { + "epoch": 25.06024096385542, + "grad_norm": 3.0255117416381836, + "learning_rate": 2.0883534136546184e-05, + "loss": 0.2815, + "step": 4160 + }, + { + "epoch": 25.09036144578313, + "grad_norm": 3.002054452896118, + "learning_rate": 2.090863453815261e-05, + "loss": 0.3316, + "step": 4165 + }, + { + "epoch": 25.120481927710845, + "grad_norm": 2.5188658237457275, + "learning_rate": 2.0933734939759035e-05, + "loss": 0.3329, + "step": 4170 + }, + { + "epoch": 25.150602409638555, + "grad_norm": 2.7840096950531006, + "learning_rate": 2.095883534136546e-05, + "loss": 0.3082, + "step": 4175 + }, + { + "epoch": 25.180722891566266, + "grad_norm": 3.077366352081299, + "learning_rate": 2.098393574297189e-05, + "loss": 0.3485, + "step": 4180 + }, + { + "epoch": 25.210843373493976, + "grad_norm": 3.5452566146850586, + "learning_rate": 2.1009036144578316e-05, + "loss": 0.3514, + "step": 4185 + }, + { + "epoch": 25.240963855421686, + "grad_norm": 3.2208802700042725, + "learning_rate": 2.103413654618474e-05, + "loss": 0.3577, + "step": 4190 + }, + { + "epoch": 25.271084337349397, + "grad_norm": 3.3380303382873535, + "learning_rate": 2.1059236947791167e-05, + "loss": 0.3381, + "step": 4195 + }, + { + "epoch": 25.301204819277107, + "grad_norm": 2.096365451812744, + "learning_rate": 2.1084337349397593e-05, + "loss": 0.3047, + "step": 4200 + }, + { + "epoch": 25.33132530120482, + "grad_norm": 6.2428812980651855, + "learning_rate": 2.1109437751004015e-05, + "loss": 0.3539, + "step": 4205 + }, + { + "epoch": 25.36144578313253, + "grad_norm": 1.7556854486465454, + "learning_rate": 2.113453815261044e-05, + "loss": 0.3485, + "step": 4210 + }, + { + "epoch": 25.39156626506024, + "grad_norm": 2.071019172668457, + "learning_rate": 2.1159638554216866e-05, + "loss": 0.3556, + "step": 4215 + }, + { + "epoch": 25.42168674698795, + "grad_norm": 2.454308271408081, + "learning_rate": 2.1184738955823295e-05, + "loss": 0.3335, + "step": 4220 + }, + { + "epoch": 25.451807228915662, + "grad_norm": 2.7519564628601074, + "learning_rate": 2.120983935742972e-05, + "loss": 0.315, + "step": 4225 + }, + { + "epoch": 25.481927710843372, + "grad_norm": 2.2847065925598145, + "learning_rate": 2.1234939759036147e-05, + "loss": 0.2953, + "step": 4230 + }, + { + "epoch": 25.512048192771083, + "grad_norm": 2.7434380054473877, + "learning_rate": 2.1260040160642572e-05, + "loss": 0.3394, + "step": 4235 + }, + { + "epoch": 25.542168674698797, + "grad_norm": 2.0522360801696777, + "learning_rate": 2.1285140562248998e-05, + "loss": 0.3153, + "step": 4240 + }, + { + "epoch": 25.572289156626507, + "grad_norm": 2.233391046524048, + "learning_rate": 2.1310240963855424e-05, + "loss": 0.3331, + "step": 4245 + }, + { + "epoch": 25.602409638554217, + "grad_norm": 2.3065338134765625, + "learning_rate": 2.1335341365461846e-05, + "loss": 0.3479, + "step": 4250 + }, + { + "epoch": 25.632530120481928, + "grad_norm": 2.2913033962249756, + "learning_rate": 2.1360441767068272e-05, + "loss": 0.3246, + "step": 4255 + }, + { + "epoch": 25.662650602409638, + "grad_norm": 2.851597785949707, + "learning_rate": 2.13855421686747e-05, + "loss": 0.3538, + "step": 4260 + }, + { + "epoch": 25.69277108433735, + "grad_norm": 2.6081836223602295, + "learning_rate": 2.1410642570281127e-05, + "loss": 0.3465, + "step": 4265 + }, + { + "epoch": 25.72289156626506, + "grad_norm": 2.6602559089660645, + "learning_rate": 2.1435742971887552e-05, + "loss": 0.3445, + "step": 4270 + }, + { + "epoch": 25.753012048192772, + "grad_norm": 2.375763416290283, + "learning_rate": 2.1460843373493978e-05, + "loss": 0.3392, + "step": 4275 + }, + { + "epoch": 25.783132530120483, + "grad_norm": 2.4636614322662354, + "learning_rate": 2.1485943775100404e-05, + "loss": 0.3553, + "step": 4280 + }, + { + "epoch": 25.813253012048193, + "grad_norm": 2.436706304550171, + "learning_rate": 2.151104417670683e-05, + "loss": 0.2978, + "step": 4285 + }, + { + "epoch": 25.843373493975903, + "grad_norm": 2.432201862335205, + "learning_rate": 2.1536144578313255e-05, + "loss": 0.3598, + "step": 4290 + }, + { + "epoch": 25.873493975903614, + "grad_norm": 2.2942583560943604, + "learning_rate": 2.1561244979919677e-05, + "loss": 0.3526, + "step": 4295 + }, + { + "epoch": 25.903614457831324, + "grad_norm": 2.467564821243286, + "learning_rate": 2.1586345381526106e-05, + "loss": 0.317, + "step": 4300 + }, + { + "epoch": 25.933734939759034, + "grad_norm": 3.1461193561553955, + "learning_rate": 2.1611445783132532e-05, + "loss": 0.337, + "step": 4305 + }, + { + "epoch": 25.96385542168675, + "grad_norm": 3.0762431621551514, + "learning_rate": 2.1636546184738958e-05, + "loss": 0.3591, + "step": 4310 + }, + { + "epoch": 25.99397590361446, + "grad_norm": 2.0135581493377686, + "learning_rate": 2.1661646586345383e-05, + "loss": 0.3498, + "step": 4315 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.8560755013763272, + "eval_auc": 0.9323354199148303, + "eval_f1": 0.805111821086262, + "eval_loss": 0.320932537317276, + "eval_precision": 0.7552447552447552, + "eval_recall": 0.8620296465222349, + "eval_runtime": 20.2403, + "eval_samples_per_second": 125.64, + "eval_steps_per_second": 0.642, + "step": 4316 + }, + { + "epoch": 26.02409638554217, + "grad_norm": 2.15742564201355, + "learning_rate": 2.168674698795181e-05, + "loss": 0.3685, + "step": 4320 + }, + { + "epoch": 26.05421686746988, + "grad_norm": 2.6329147815704346, + "learning_rate": 2.1711847389558235e-05, + "loss": 0.3217, + "step": 4325 + }, + { + "epoch": 26.08433734939759, + "grad_norm": 3.114494562149048, + "learning_rate": 2.173694779116466e-05, + "loss": 0.3356, + "step": 4330 + }, + { + "epoch": 26.1144578313253, + "grad_norm": 2.8055694103240967, + "learning_rate": 2.1762048192771086e-05, + "loss": 0.3055, + "step": 4335 + }, + { + "epoch": 26.14457831325301, + "grad_norm": 2.372647762298584, + "learning_rate": 2.178714859437751e-05, + "loss": 0.317, + "step": 4340 + }, + { + "epoch": 26.174698795180724, + "grad_norm": 2.197999954223633, + "learning_rate": 2.1812248995983937e-05, + "loss": 0.2848, + "step": 4345 + }, + { + "epoch": 26.204819277108435, + "grad_norm": 2.3357388973236084, + "learning_rate": 2.1837349397590363e-05, + "loss": 0.3206, + "step": 4350 + }, + { + "epoch": 26.234939759036145, + "grad_norm": 2.4657440185546875, + "learning_rate": 2.186244979919679e-05, + "loss": 0.3049, + "step": 4355 + }, + { + "epoch": 26.265060240963855, + "grad_norm": 2.735355854034424, + "learning_rate": 2.1887550200803214e-05, + "loss": 0.3481, + "step": 4360 + }, + { + "epoch": 26.295180722891565, + "grad_norm": 2.7895607948303223, + "learning_rate": 2.191265060240964e-05, + "loss": 0.3654, + "step": 4365 + }, + { + "epoch": 26.325301204819276, + "grad_norm": 2.3301947116851807, + "learning_rate": 2.1937751004016066e-05, + "loss": 0.2964, + "step": 4370 + }, + { + "epoch": 26.355421686746986, + "grad_norm": 2.4436914920806885, + "learning_rate": 2.196285140562249e-05, + "loss": 0.3042, + "step": 4375 + }, + { + "epoch": 26.3855421686747, + "grad_norm": 2.6277318000793457, + "learning_rate": 2.1987951807228917e-05, + "loss": 0.3552, + "step": 4380 + }, + { + "epoch": 26.41566265060241, + "grad_norm": 2.0885562896728516, + "learning_rate": 2.2013052208835343e-05, + "loss": 0.2793, + "step": 4385 + }, + { + "epoch": 26.44578313253012, + "grad_norm": 2.6496646404266357, + "learning_rate": 2.203815261044177e-05, + "loss": 0.4158, + "step": 4390 + }, + { + "epoch": 26.47590361445783, + "grad_norm": 2.3065197467803955, + "learning_rate": 2.2063253012048194e-05, + "loss": 0.3017, + "step": 4395 + }, + { + "epoch": 26.50602409638554, + "grad_norm": 2.375241279602051, + "learning_rate": 2.208835341365462e-05, + "loss": 0.3368, + "step": 4400 + }, + { + "epoch": 26.53614457831325, + "grad_norm": 3.1456806659698486, + "learning_rate": 2.2113453815261045e-05, + "loss": 0.3444, + "step": 4405 + }, + { + "epoch": 26.566265060240966, + "grad_norm": 2.517967939376831, + "learning_rate": 2.213855421686747e-05, + "loss": 0.3547, + "step": 4410 + }, + { + "epoch": 26.596385542168676, + "grad_norm": 3.2278215885162354, + "learning_rate": 2.2163654618473897e-05, + "loss": 0.3366, + "step": 4415 + }, + { + "epoch": 26.626506024096386, + "grad_norm": 2.074816942214966, + "learning_rate": 2.2188755020080322e-05, + "loss": 0.3233, + "step": 4420 + }, + { + "epoch": 26.656626506024097, + "grad_norm": 2.988708972930908, + "learning_rate": 2.2213855421686748e-05, + "loss": 0.3688, + "step": 4425 + }, + { + "epoch": 26.686746987951807, + "grad_norm": 2.934281349182129, + "learning_rate": 2.2238955823293174e-05, + "loss": 0.3232, + "step": 4430 + }, + { + "epoch": 26.716867469879517, + "grad_norm": 2.9825849533081055, + "learning_rate": 2.22640562248996e-05, + "loss": 0.3339, + "step": 4435 + }, + { + "epoch": 26.746987951807228, + "grad_norm": 2.0730199813842773, + "learning_rate": 2.2289156626506025e-05, + "loss": 0.3341, + "step": 4440 + }, + { + "epoch": 26.77710843373494, + "grad_norm": 2.3709566593170166, + "learning_rate": 2.231425702811245e-05, + "loss": 0.3125, + "step": 4445 + }, + { + "epoch": 26.80722891566265, + "grad_norm": 2.547858476638794, + "learning_rate": 2.2339357429718876e-05, + "loss": 0.3113, + "step": 4450 + }, + { + "epoch": 26.837349397590362, + "grad_norm": 2.848167896270752, + "learning_rate": 2.2364457831325302e-05, + "loss": 0.3405, + "step": 4455 + }, + { + "epoch": 26.867469879518072, + "grad_norm": 3.4910738468170166, + "learning_rate": 2.2389558232931728e-05, + "loss": 0.3602, + "step": 4460 + }, + { + "epoch": 26.897590361445783, + "grad_norm": 1.9076507091522217, + "learning_rate": 2.2414658634538153e-05, + "loss": 0.3015, + "step": 4465 + }, + { + "epoch": 26.927710843373493, + "grad_norm": 2.182180643081665, + "learning_rate": 2.243975903614458e-05, + "loss": 0.2855, + "step": 4470 + }, + { + "epoch": 26.957831325301203, + "grad_norm": 2.6377947330474854, + "learning_rate": 2.2464859437751005e-05, + "loss": 0.2983, + "step": 4475 + }, + { + "epoch": 26.987951807228917, + "grad_norm": 3.0566041469573975, + "learning_rate": 2.248995983935743e-05, + "loss": 0.3086, + "step": 4480 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.8847817538340542, + "eval_auc": 0.9410505365201953, + "eval_f1": 0.8223165554881746, + "eval_loss": 0.2839481830596924, + "eval_precision": 0.8782383419689119, + "eval_recall": 0.7730900798175598, + "eval_runtime": 20.3148, + "eval_samples_per_second": 125.18, + "eval_steps_per_second": 0.64, + "step": 4482 + }, + { + "epoch": 27.018072289156628, + "grad_norm": 2.496335983276367, + "learning_rate": 2.2515060240963856e-05, + "loss": 0.29, + "step": 4485 + }, + { + "epoch": 27.048192771084338, + "grad_norm": 2.7706382274627686, + "learning_rate": 2.2540160642570282e-05, + "loss": 0.319, + "step": 4490 + }, + { + "epoch": 27.07831325301205, + "grad_norm": 2.7136242389678955, + "learning_rate": 2.2565261044176707e-05, + "loss": 0.3257, + "step": 4495 + }, + { + "epoch": 27.10843373493976, + "grad_norm": 2.489665985107422, + "learning_rate": 2.2590361445783133e-05, + "loss": 0.2822, + "step": 4500 + }, + { + "epoch": 27.13855421686747, + "grad_norm": 2.4213666915893555, + "learning_rate": 2.261546184738956e-05, + "loss": 0.3223, + "step": 4505 + }, + { + "epoch": 27.16867469879518, + "grad_norm": 2.8688440322875977, + "learning_rate": 2.2640562248995988e-05, + "loss": 0.2624, + "step": 4510 + }, + { + "epoch": 27.198795180722893, + "grad_norm": 2.302858829498291, + "learning_rate": 2.266566265060241e-05, + "loss": 0.299, + "step": 4515 + }, + { + "epoch": 27.228915662650603, + "grad_norm": 3.0998318195343018, + "learning_rate": 2.2690763052208836e-05, + "loss": 0.278, + "step": 4520 + }, + { + "epoch": 27.259036144578314, + "grad_norm": 2.804643392562866, + "learning_rate": 2.271586345381526e-05, + "loss": 0.3197, + "step": 4525 + }, + { + "epoch": 27.289156626506024, + "grad_norm": 3.7301785945892334, + "learning_rate": 2.2740963855421687e-05, + "loss": 0.3267, + "step": 4530 + }, + { + "epoch": 27.319277108433734, + "grad_norm": 2.538990020751953, + "learning_rate": 2.2766064257028113e-05, + "loss": 0.3259, + "step": 4535 + }, + { + "epoch": 27.349397590361445, + "grad_norm": 2.8566887378692627, + "learning_rate": 2.279116465863454e-05, + "loss": 0.3106, + "step": 4540 + }, + { + "epoch": 27.379518072289155, + "grad_norm": 2.617983818054199, + "learning_rate": 2.2816265060240964e-05, + "loss": 0.3084, + "step": 4545 + }, + { + "epoch": 27.40963855421687, + "grad_norm": 4.547421455383301, + "learning_rate": 2.284136546184739e-05, + "loss": 0.3253, + "step": 4550 + }, + { + "epoch": 27.43975903614458, + "grad_norm": 2.438436985015869, + "learning_rate": 2.286646586345382e-05, + "loss": 0.3303, + "step": 4555 + }, + { + "epoch": 27.46987951807229, + "grad_norm": 2.6192996501922607, + "learning_rate": 2.289156626506024e-05, + "loss": 0.353, + "step": 4560 + }, + { + "epoch": 27.5, + "grad_norm": 3.8371706008911133, + "learning_rate": 2.2916666666666667e-05, + "loss": 0.3507, + "step": 4565 + }, + { + "epoch": 27.53012048192771, + "grad_norm": 2.9321835041046143, + "learning_rate": 2.2941767068273093e-05, + "loss": 0.3357, + "step": 4570 + }, + { + "epoch": 27.56024096385542, + "grad_norm": 2.8408875465393066, + "learning_rate": 2.2966867469879518e-05, + "loss": 0.388, + "step": 4575 + }, + { + "epoch": 27.59036144578313, + "grad_norm": 2.674762010574341, + "learning_rate": 2.2991967871485944e-05, + "loss": 0.3919, + "step": 4580 + }, + { + "epoch": 27.620481927710845, + "grad_norm": 2.97341251373291, + "learning_rate": 2.301706827309237e-05, + "loss": 0.2904, + "step": 4585 + }, + { + "epoch": 27.650602409638555, + "grad_norm": 3.0861120223999023, + "learning_rate": 2.3042168674698795e-05, + "loss": 0.3829, + "step": 4590 + }, + { + "epoch": 27.680722891566266, + "grad_norm": 2.3060944080352783, + "learning_rate": 2.3067269076305224e-05, + "loss": 0.2962, + "step": 4595 + }, + { + "epoch": 27.710843373493976, + "grad_norm": 2.146397352218628, + "learning_rate": 2.309236947791165e-05, + "loss": 0.3021, + "step": 4600 + }, + { + "epoch": 27.740963855421686, + "grad_norm": 2.135899305343628, + "learning_rate": 2.3117469879518072e-05, + "loss": 0.286, + "step": 4605 + }, + { + "epoch": 27.771084337349397, + "grad_norm": 3.1837735176086426, + "learning_rate": 2.3142570281124498e-05, + "loss": 0.3592, + "step": 4610 + }, + { + "epoch": 27.801204819277107, + "grad_norm": 2.126497507095337, + "learning_rate": 2.3167670682730924e-05, + "loss": 0.2836, + "step": 4615 + }, + { + "epoch": 27.83132530120482, + "grad_norm": 2.43947434425354, + "learning_rate": 2.319277108433735e-05, + "loss": 0.3092, + "step": 4620 + }, + { + "epoch": 27.86144578313253, + "grad_norm": 2.086073875427246, + "learning_rate": 2.3217871485943775e-05, + "loss": 0.2689, + "step": 4625 + }, + { + "epoch": 27.89156626506024, + "grad_norm": 2.582994222640991, + "learning_rate": 2.32429718875502e-05, + "loss": 0.3375, + "step": 4630 + }, + { + "epoch": 27.92168674698795, + "grad_norm": 2.3924102783203125, + "learning_rate": 2.326807228915663e-05, + "loss": 0.3274, + "step": 4635 + }, + { + "epoch": 27.951807228915662, + "grad_norm": 2.193665027618408, + "learning_rate": 2.3293172690763055e-05, + "loss": 0.3148, + "step": 4640 + }, + { + "epoch": 27.981927710843372, + "grad_norm": 2.6703038215637207, + "learning_rate": 2.331827309236948e-05, + "loss": 0.2451, + "step": 4645 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.8761305544632324, + "eval_auc": 0.9346111306552267, + "eval_f1": 0.804953560371517, + "eval_loss": 0.30121734738349915, + "eval_precision": 0.8807588075880759, + "eval_recall": 0.7411630558722919, + "eval_runtime": 19.0477, + "eval_samples_per_second": 133.507, + "eval_steps_per_second": 0.682, + "step": 4648 + }, + { + "epoch": 28.012048192771083, + "grad_norm": 2.621669054031372, + "learning_rate": 2.3343373493975903e-05, + "loss": 0.2893, + "step": 4650 + }, + { + "epoch": 28.042168674698797, + "grad_norm": 2.453294277191162, + "learning_rate": 2.336847389558233e-05, + "loss": 0.3258, + "step": 4655 + }, + { + "epoch": 28.072289156626507, + "grad_norm": 2.217541456222534, + "learning_rate": 2.3393574297188755e-05, + "loss": 0.2883, + "step": 4660 + }, + { + "epoch": 28.102409638554217, + "grad_norm": 2.2983946800231934, + "learning_rate": 2.341867469879518e-05, + "loss": 0.3041, + "step": 4665 + }, + { + "epoch": 28.132530120481928, + "grad_norm": 2.4966137409210205, + "learning_rate": 2.3443775100401606e-05, + "loss": 0.3251, + "step": 4670 + }, + { + "epoch": 28.162650602409638, + "grad_norm": 2.3451552391052246, + "learning_rate": 2.3468875502008035e-05, + "loss": 0.3188, + "step": 4675 + }, + { + "epoch": 28.19277108433735, + "grad_norm": 1.914646029472351, + "learning_rate": 2.349397590361446e-05, + "loss": 0.2955, + "step": 4680 + }, + { + "epoch": 28.22289156626506, + "grad_norm": 2.2887744903564453, + "learning_rate": 2.3519076305220886e-05, + "loss": 0.3102, + "step": 4685 + }, + { + "epoch": 28.253012048192772, + "grad_norm": 2.1860218048095703, + "learning_rate": 2.3544176706827312e-05, + "loss": 0.272, + "step": 4690 + }, + { + "epoch": 28.283132530120483, + "grad_norm": 2.304988145828247, + "learning_rate": 2.3569277108433734e-05, + "loss": 0.3393, + "step": 4695 + }, + { + "epoch": 28.313253012048193, + "grad_norm": 2.7805275917053223, + "learning_rate": 2.359437751004016e-05, + "loss": 0.2752, + "step": 4700 + }, + { + "epoch": 28.343373493975903, + "grad_norm": 2.5115225315093994, + "learning_rate": 2.3619477911646586e-05, + "loss": 0.3172, + "step": 4705 + }, + { + "epoch": 28.373493975903614, + "grad_norm": 1.7648457288742065, + "learning_rate": 2.364457831325301e-05, + "loss": 0.2794, + "step": 4710 + }, + { + "epoch": 28.403614457831324, + "grad_norm": 2.6415631771087646, + "learning_rate": 2.366967871485944e-05, + "loss": 0.2879, + "step": 4715 + }, + { + "epoch": 28.433734939759034, + "grad_norm": 2.885535478591919, + "learning_rate": 2.3694779116465866e-05, + "loss": 0.3089, + "step": 4720 + }, + { + "epoch": 28.46385542168675, + "grad_norm": 2.2063450813293457, + "learning_rate": 2.3719879518072292e-05, + "loss": 0.3147, + "step": 4725 + }, + { + "epoch": 28.49397590361446, + "grad_norm": 2.069905996322632, + "learning_rate": 2.3744979919678718e-05, + "loss": 0.2878, + "step": 4730 + }, + { + "epoch": 28.52409638554217, + "grad_norm": 2.4482076168060303, + "learning_rate": 2.3770080321285143e-05, + "loss": 0.286, + "step": 4735 + }, + { + "epoch": 28.55421686746988, + "grad_norm": 2.077254295349121, + "learning_rate": 2.3795180722891565e-05, + "loss": 0.2983, + "step": 4740 + }, + { + "epoch": 28.58433734939759, + "grad_norm": 2.402545928955078, + "learning_rate": 2.382028112449799e-05, + "loss": 0.278, + "step": 4745 + }, + { + "epoch": 28.6144578313253, + "grad_norm": 2.59379506111145, + "learning_rate": 2.3845381526104417e-05, + "loss": 0.3238, + "step": 4750 + }, + { + "epoch": 28.644578313253014, + "grad_norm": 2.294840097427368, + "learning_rate": 2.3870481927710846e-05, + "loss": 0.26, + "step": 4755 + }, + { + "epoch": 28.674698795180724, + "grad_norm": 2.939584732055664, + "learning_rate": 2.389558232931727e-05, + "loss": 0.2753, + "step": 4760 + }, + { + "epoch": 28.704819277108435, + "grad_norm": 2.7921321392059326, + "learning_rate": 2.3920682730923697e-05, + "loss": 0.3366, + "step": 4765 + }, + { + "epoch": 28.734939759036145, + "grad_norm": 2.302945375442505, + "learning_rate": 2.3945783132530123e-05, + "loss": 0.3063, + "step": 4770 + }, + { + "epoch": 28.765060240963855, + "grad_norm": 3.6218788623809814, + "learning_rate": 2.397088353413655e-05, + "loss": 0.3335, + "step": 4775 + }, + { + "epoch": 28.795180722891565, + "grad_norm": 2.533871650695801, + "learning_rate": 2.399598393574297e-05, + "loss": 0.2699, + "step": 4780 + }, + { + "epoch": 28.825301204819276, + "grad_norm": 2.306363344192505, + "learning_rate": 2.4021084337349397e-05, + "loss": 0.3398, + "step": 4785 + }, + { + "epoch": 28.855421686746986, + "grad_norm": 2.817366600036621, + "learning_rate": 2.4046184738955822e-05, + "loss": 0.3025, + "step": 4790 + }, + { + "epoch": 28.8855421686747, + "grad_norm": 4.681380271911621, + "learning_rate": 2.407128514056225e-05, + "loss": 0.3139, + "step": 4795 + }, + { + "epoch": 28.91566265060241, + "grad_norm": 2.63634991645813, + "learning_rate": 2.4096385542168677e-05, + "loss": 0.3137, + "step": 4800 + }, + { + "epoch": 28.94578313253012, + "grad_norm": 2.8113434314727783, + "learning_rate": 2.4121485943775103e-05, + "loss": 0.3196, + "step": 4805 + }, + { + "epoch": 28.97590361445783, + "grad_norm": 3.485682964324951, + "learning_rate": 2.4146586345381528e-05, + "loss": 0.3329, + "step": 4810 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.8871411718442784, + "eval_auc": 0.942342045141888, + "eval_f1": 0.8366533864541833, + "eval_loss": 0.2750353217124939, + "eval_precision": 0.8352272727272727, + "eval_recall": 0.8380843785632839, + "eval_runtime": 20.3781, + "eval_samples_per_second": 124.791, + "eval_steps_per_second": 0.638, + "step": 4814 + }, + { + "epoch": 29.00602409638554, + "grad_norm": 2.1197657585144043, + "learning_rate": 2.4171686746987954e-05, + "loss": 0.2904, + "step": 4815 + }, + { + "epoch": 29.03614457831325, + "grad_norm": 2.599250555038452, + "learning_rate": 2.419678714859438e-05, + "loss": 0.3039, + "step": 4820 + }, + { + "epoch": 29.066265060240966, + "grad_norm": 2.422440767288208, + "learning_rate": 2.4221887550200802e-05, + "loss": 0.3298, + "step": 4825 + }, + { + "epoch": 29.096385542168676, + "grad_norm": 2.069216012954712, + "learning_rate": 2.4246987951807228e-05, + "loss": 0.2861, + "step": 4830 + }, + { + "epoch": 29.126506024096386, + "grad_norm": 2.8253629207611084, + "learning_rate": 2.4272088353413657e-05, + "loss": 0.3208, + "step": 4835 + }, + { + "epoch": 29.156626506024097, + "grad_norm": 2.8499755859375, + "learning_rate": 2.4297188755020082e-05, + "loss": 0.2923, + "step": 4840 + }, + { + "epoch": 29.186746987951807, + "grad_norm": 2.887667179107666, + "learning_rate": 2.4322289156626508e-05, + "loss": 0.2739, + "step": 4845 + }, + { + "epoch": 29.216867469879517, + "grad_norm": 2.273495674133301, + "learning_rate": 2.4347389558232934e-05, + "loss": 0.2687, + "step": 4850 + }, + { + "epoch": 29.246987951807228, + "grad_norm": 2.1487019062042236, + "learning_rate": 2.437248995983936e-05, + "loss": 0.2888, + "step": 4855 + }, + { + "epoch": 29.27710843373494, + "grad_norm": 1.8572500944137573, + "learning_rate": 2.4397590361445785e-05, + "loss": 0.293, + "step": 4860 + }, + { + "epoch": 29.30722891566265, + "grad_norm": 5.140269756317139, + "learning_rate": 2.442269076305221e-05, + "loss": 0.2901, + "step": 4865 + }, + { + "epoch": 29.337349397590362, + "grad_norm": 2.208930730819702, + "learning_rate": 2.4447791164658633e-05, + "loss": 0.2808, + "step": 4870 + }, + { + "epoch": 29.367469879518072, + "grad_norm": 2.0343732833862305, + "learning_rate": 2.4472891566265062e-05, + "loss": 0.278, + "step": 4875 + }, + { + "epoch": 29.397590361445783, + "grad_norm": 2.5664570331573486, + "learning_rate": 2.4497991967871488e-05, + "loss": 0.3075, + "step": 4880 + }, + { + "epoch": 29.427710843373493, + "grad_norm": 2.6145036220550537, + "learning_rate": 2.4523092369477913e-05, + "loss": 0.3107, + "step": 4885 + }, + { + "epoch": 29.457831325301203, + "grad_norm": 2.0771539211273193, + "learning_rate": 2.454819277108434e-05, + "loss": 0.3047, + "step": 4890 + }, + { + "epoch": 29.487951807228917, + "grad_norm": 2.758699893951416, + "learning_rate": 2.4573293172690765e-05, + "loss": 0.2777, + "step": 4895 + }, + { + "epoch": 29.518072289156628, + "grad_norm": 2.6498754024505615, + "learning_rate": 2.459839357429719e-05, + "loss": 0.2903, + "step": 4900 + }, + { + "epoch": 29.548192771084338, + "grad_norm": 1.9495166540145874, + "learning_rate": 2.4623493975903616e-05, + "loss": 0.2703, + "step": 4905 + }, + { + "epoch": 29.57831325301205, + "grad_norm": 2.5841169357299805, + "learning_rate": 2.4648594377510042e-05, + "loss": 0.3075, + "step": 4910 + }, + { + "epoch": 29.60843373493976, + "grad_norm": 2.615114688873291, + "learning_rate": 2.4673694779116467e-05, + "loss": 0.2809, + "step": 4915 + }, + { + "epoch": 29.63855421686747, + "grad_norm": 2.9745523929595947, + "learning_rate": 2.4698795180722893e-05, + "loss": 0.3286, + "step": 4920 + }, + { + "epoch": 29.66867469879518, + "grad_norm": 2.6587026119232178, + "learning_rate": 2.472389558232932e-05, + "loss": 0.3095, + "step": 4925 + }, + { + "epoch": 29.698795180722893, + "grad_norm": 2.4806618690490723, + "learning_rate": 2.4748995983935744e-05, + "loss": 0.324, + "step": 4930 + }, + { + "epoch": 29.728915662650603, + "grad_norm": 1.9983023405075073, + "learning_rate": 2.477409638554217e-05, + "loss": 0.3014, + "step": 4935 + }, + { + "epoch": 29.759036144578314, + "grad_norm": 2.5685226917266846, + "learning_rate": 2.4799196787148596e-05, + "loss": 0.2951, + "step": 4940 + }, + { + "epoch": 29.789156626506024, + "grad_norm": 2.277895927429199, + "learning_rate": 2.482429718875502e-05, + "loss": 0.2378, + "step": 4945 + }, + { + "epoch": 29.819277108433734, + "grad_norm": 3.36366605758667, + "learning_rate": 2.4849397590361447e-05, + "loss": 0.2895, + "step": 4950 + }, + { + "epoch": 29.849397590361445, + "grad_norm": 2.543626070022583, + "learning_rate": 2.4874497991967873e-05, + "loss": 0.3248, + "step": 4955 + }, + { + "epoch": 29.879518072289155, + "grad_norm": 2.6698496341705322, + "learning_rate": 2.48995983935743e-05, + "loss": 0.3171, + "step": 4960 + }, + { + "epoch": 29.90963855421687, + "grad_norm": 2.6674630641937256, + "learning_rate": 2.4924698795180724e-05, + "loss": 0.323, + "step": 4965 + }, + { + "epoch": 29.93975903614458, + "grad_norm": 1.9268379211425781, + "learning_rate": 2.494979919678715e-05, + "loss": 0.3019, + "step": 4970 + }, + { + "epoch": 29.96987951807229, + "grad_norm": 2.4100546836853027, + "learning_rate": 2.4974899598393576e-05, + "loss": 0.3307, + "step": 4975 + }, + { + "epoch": 30.0, + "grad_norm": 2.917056083679199, + "learning_rate": 2.5e-05, + "loss": 0.3131, + "step": 4980 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.8906802988596146, + "eval_auc": 0.9512488005464445, + "eval_f1": 0.8398617511520737, + "eval_loss": 0.24600017070770264, + "eval_precision": 0.8486612339930152, + "eval_recall": 0.8312428734321551, + "eval_runtime": 17.5103, + "eval_samples_per_second": 145.229, + "eval_steps_per_second": 0.742, + "step": 4980 + }, + { + "epoch": 30.03012048192771, + "grad_norm": 2.3530986309051514, + "learning_rate": 2.5025100401606427e-05, + "loss": 0.2913, + "step": 4985 + }, + { + "epoch": 30.06024096385542, + "grad_norm": 2.8278725147247314, + "learning_rate": 2.5050200803212853e-05, + "loss": 0.2972, + "step": 4990 + }, + { + "epoch": 30.09036144578313, + "grad_norm": 2.1610164642333984, + "learning_rate": 2.5075301204819278e-05, + "loss": 0.2711, + "step": 4995 + }, + { + "epoch": 30.120481927710845, + "grad_norm": 2.423787832260132, + "learning_rate": 2.5100401606425704e-05, + "loss": 0.2504, + "step": 5000 + }, + { + "epoch": 30.150602409638555, + "grad_norm": 3.632481575012207, + "learning_rate": 2.5125502008032133e-05, + "loss": 0.2882, + "step": 5005 + }, + { + "epoch": 30.180722891566266, + "grad_norm": 3.5192201137542725, + "learning_rate": 2.515060240963856e-05, + "loss": 0.3301, + "step": 5010 + }, + { + "epoch": 30.210843373493976, + "grad_norm": 2.7295660972595215, + "learning_rate": 2.5175702811244984e-05, + "loss": 0.272, + "step": 5015 + }, + { + "epoch": 30.240963855421686, + "grad_norm": 2.8712375164031982, + "learning_rate": 2.520080321285141e-05, + "loss": 0.2634, + "step": 5020 + }, + { + "epoch": 30.271084337349397, + "grad_norm": 2.6461923122406006, + "learning_rate": 2.522590361445783e-05, + "loss": 0.277, + "step": 5025 + }, + { + "epoch": 30.301204819277107, + "grad_norm": 2.4615397453308105, + "learning_rate": 2.5251004016064255e-05, + "loss": 0.3056, + "step": 5030 + }, + { + "epoch": 30.33132530120482, + "grad_norm": 2.3607475757598877, + "learning_rate": 2.527610441767068e-05, + "loss": 0.287, + "step": 5035 + }, + { + "epoch": 30.36144578313253, + "grad_norm": 2.6344406604766846, + "learning_rate": 2.530120481927711e-05, + "loss": 0.3256, + "step": 5040 + }, + { + "epoch": 30.39156626506024, + "grad_norm": 2.357358455657959, + "learning_rate": 2.5326305220883535e-05, + "loss": 0.2615, + "step": 5045 + }, + { + "epoch": 30.42168674698795, + "grad_norm": 4.351990699768066, + "learning_rate": 2.535140562248996e-05, + "loss": 0.3105, + "step": 5050 + }, + { + "epoch": 30.451807228915662, + "grad_norm": 2.7113723754882812, + "learning_rate": 2.5376506024096386e-05, + "loss": 0.2849, + "step": 5055 + }, + { + "epoch": 30.481927710843372, + "grad_norm": 1.9872444868087769, + "learning_rate": 2.5401606425702812e-05, + "loss": 0.2922, + "step": 5060 + }, + { + "epoch": 30.512048192771083, + "grad_norm": 2.4583382606506348, + "learning_rate": 2.5426706827309238e-05, + "loss": 0.3054, + "step": 5065 + }, + { + "epoch": 30.542168674698797, + "grad_norm": 2.36474871635437, + "learning_rate": 2.5451807228915663e-05, + "loss": 0.2723, + "step": 5070 + }, + { + "epoch": 30.572289156626507, + "grad_norm": 2.877342700958252, + "learning_rate": 2.547690763052209e-05, + "loss": 0.3518, + "step": 5075 + }, + { + "epoch": 30.602409638554217, + "grad_norm": 2.3828818798065186, + "learning_rate": 2.5502008032128515e-05, + "loss": 0.334, + "step": 5080 + }, + { + "epoch": 30.632530120481928, + "grad_norm": 2.944418430328369, + "learning_rate": 2.552710843373494e-05, + "loss": 0.2791, + "step": 5085 + }, + { + "epoch": 30.662650602409638, + "grad_norm": 2.43225359916687, + "learning_rate": 2.555220883534137e-05, + "loss": 0.3237, + "step": 5090 + }, + { + "epoch": 30.69277108433735, + "grad_norm": 2.221041202545166, + "learning_rate": 2.5577309236947795e-05, + "loss": 0.306, + "step": 5095 + }, + { + "epoch": 30.72289156626506, + "grad_norm": 2.513659715652466, + "learning_rate": 2.560240963855422e-05, + "loss": 0.2948, + "step": 5100 + }, + { + "epoch": 30.753012048192772, + "grad_norm": 1.9354315996170044, + "learning_rate": 2.5627510040160646e-05, + "loss": 0.3008, + "step": 5105 + }, + { + "epoch": 30.783132530120483, + "grad_norm": 3.294189929962158, + "learning_rate": 2.5652610441767072e-05, + "loss": 0.3362, + "step": 5110 + }, + { + "epoch": 30.813253012048193, + "grad_norm": 3.190519094467163, + "learning_rate": 2.567771084337349e-05, + "loss": 0.3287, + "step": 5115 + }, + { + "epoch": 30.843373493975903, + "grad_norm": 2.336742639541626, + "learning_rate": 2.570281124497992e-05, + "loss": 0.3103, + "step": 5120 + }, + { + "epoch": 30.873493975903614, + "grad_norm": 2.363055944442749, + "learning_rate": 2.5727911646586346e-05, + "loss": 0.3117, + "step": 5125 + }, + { + "epoch": 30.903614457831324, + "grad_norm": 2.6356406211853027, + "learning_rate": 2.575301204819277e-05, + "loss": 0.2972, + "step": 5130 + }, + { + "epoch": 30.933734939759034, + "grad_norm": 2.487208366394043, + "learning_rate": 2.5778112449799197e-05, + "loss": 0.2578, + "step": 5135 + }, + { + "epoch": 30.96385542168675, + "grad_norm": 2.5588066577911377, + "learning_rate": 2.5803212851405623e-05, + "loss": 0.276, + "step": 5140 + }, + { + "epoch": 30.99397590361446, + "grad_norm": 2.008023977279663, + "learning_rate": 2.582831325301205e-05, + "loss": 0.2947, + "step": 5145 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.892253244199764, + "eval_auc": 0.9521710622675524, + "eval_f1": 0.8399532710280374, + "eval_loss": 0.25605228543281555, + "eval_precision": 0.8610778443113772, + "eval_recall": 0.8198403648802737, + "eval_runtime": 19.4574, + "eval_samples_per_second": 130.695, + "eval_steps_per_second": 0.668, + "step": 5146 + }, + { + "epoch": 31.02409638554217, + "grad_norm": 2.0197880268096924, + "learning_rate": 2.5853413654618474e-05, + "loss": 0.2584, + "step": 5150 + }, + { + "epoch": 31.05421686746988, + "grad_norm": 3.194411516189575, + "learning_rate": 2.58785140562249e-05, + "loss": 0.2708, + "step": 5155 + }, + { + "epoch": 31.08433734939759, + "grad_norm": 2.815106153488159, + "learning_rate": 2.5903614457831325e-05, + "loss": 0.265, + "step": 5160 + }, + { + "epoch": 31.1144578313253, + "grad_norm": 2.1786208152770996, + "learning_rate": 2.592871485943775e-05, + "loss": 0.2492, + "step": 5165 + }, + { + "epoch": 31.14457831325301, + "grad_norm": 2.171779155731201, + "learning_rate": 2.595381526104418e-05, + "loss": 0.2593, + "step": 5170 + }, + { + "epoch": 31.174698795180724, + "grad_norm": 2.532412052154541, + "learning_rate": 2.5978915662650606e-05, + "loss": 0.2688, + "step": 5175 + }, + { + "epoch": 31.204819277108435, + "grad_norm": 2.324094295501709, + "learning_rate": 2.600401606425703e-05, + "loss": 0.2687, + "step": 5180 + }, + { + "epoch": 31.234939759036145, + "grad_norm": 2.6688435077667236, + "learning_rate": 2.6029116465863457e-05, + "loss": 0.2775, + "step": 5185 + }, + { + "epoch": 31.265060240963855, + "grad_norm": 2.4075028896331787, + "learning_rate": 2.6054216867469883e-05, + "loss": 0.2541, + "step": 5190 + }, + { + "epoch": 31.295180722891565, + "grad_norm": 2.7049026489257812, + "learning_rate": 2.607931726907631e-05, + "loss": 0.2834, + "step": 5195 + }, + { + "epoch": 31.325301204819276, + "grad_norm": 3.187969923019409, + "learning_rate": 2.6104417670682734e-05, + "loss": 0.3352, + "step": 5200 + }, + { + "epoch": 31.355421686746986, + "grad_norm": 3.6192030906677246, + "learning_rate": 2.6129518072289157e-05, + "loss": 0.2842, + "step": 5205 + }, + { + "epoch": 31.3855421686747, + "grad_norm": 2.197908878326416, + "learning_rate": 2.6154618473895582e-05, + "loss": 0.2872, + "step": 5210 + }, + { + "epoch": 31.41566265060241, + "grad_norm": 2.4250454902648926, + "learning_rate": 2.6179718875502008e-05, + "loss": 0.2835, + "step": 5215 + }, + { + "epoch": 31.44578313253012, + "grad_norm": 2.701233148574829, + "learning_rate": 2.6204819277108434e-05, + "loss": 0.2726, + "step": 5220 + }, + { + "epoch": 31.47590361445783, + "grad_norm": 2.1720259189605713, + "learning_rate": 2.622991967871486e-05, + "loss": 0.2698, + "step": 5225 + }, + { + "epoch": 31.50602409638554, + "grad_norm": 2.6421265602111816, + "learning_rate": 2.6255020080321285e-05, + "loss": 0.2931, + "step": 5230 + }, + { + "epoch": 31.53614457831325, + "grad_norm": 3.280252695083618, + "learning_rate": 2.628012048192771e-05, + "loss": 0.299, + "step": 5235 + }, + { + "epoch": 31.566265060240966, + "grad_norm": 2.5766305923461914, + "learning_rate": 2.6305220883534136e-05, + "loss": 0.3077, + "step": 5240 + }, + { + "epoch": 31.596385542168676, + "grad_norm": 2.385478973388672, + "learning_rate": 2.6330321285140562e-05, + "loss": 0.2979, + "step": 5245 + }, + { + "epoch": 31.626506024096386, + "grad_norm": 2.208635091781616, + "learning_rate": 2.635542168674699e-05, + "loss": 0.2661, + "step": 5250 + }, + { + "epoch": 31.656626506024097, + "grad_norm": 3.1388778686523438, + "learning_rate": 2.6380522088353417e-05, + "loss": 0.3259, + "step": 5255 + }, + { + "epoch": 31.686746987951807, + "grad_norm": 2.6264169216156006, + "learning_rate": 2.6405622489959842e-05, + "loss": 0.2976, + "step": 5260 + }, + { + "epoch": 31.716867469879517, + "grad_norm": 2.8914477825164795, + "learning_rate": 2.6430722891566268e-05, + "loss": 0.2797, + "step": 5265 + }, + { + "epoch": 31.746987951807228, + "grad_norm": 1.6656322479248047, + "learning_rate": 2.6455823293172694e-05, + "loss": 0.2702, + "step": 5270 + }, + { + "epoch": 31.77710843373494, + "grad_norm": 2.7814254760742188, + "learning_rate": 2.648092369477912e-05, + "loss": 0.3048, + "step": 5275 + }, + { + "epoch": 31.80722891566265, + "grad_norm": 2.2331786155700684, + "learning_rate": 2.6506024096385545e-05, + "loss": 0.2608, + "step": 5280 + }, + { + "epoch": 31.837349397590362, + "grad_norm": 3.587602138519287, + "learning_rate": 2.653112449799197e-05, + "loss": 0.2824, + "step": 5285 + }, + { + "epoch": 31.867469879518072, + "grad_norm": 2.4856202602386475, + "learning_rate": 2.6556224899598396e-05, + "loss": 0.2636, + "step": 5290 + }, + { + "epoch": 31.897590361445783, + "grad_norm": 1.821752667427063, + "learning_rate": 2.658132530120482e-05, + "loss": 0.2835, + "step": 5295 + }, + { + "epoch": 31.927710843373493, + "grad_norm": 2.444525957107544, + "learning_rate": 2.6606425702811244e-05, + "loss": 0.2744, + "step": 5300 + }, + { + "epoch": 31.957831325301203, + "grad_norm": 3.3772637844085693, + "learning_rate": 2.663152610441767e-05, + "loss": 0.3373, + "step": 5305 + }, + { + "epoch": 31.987951807228917, + "grad_norm": 2.0322577953338623, + "learning_rate": 2.6656626506024096e-05, + "loss": 0.2789, + "step": 5310 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.8930397168698387, + "eval_auc": 0.9527353700887423, + "eval_f1": 0.8434982738780207, + "eval_loss": 0.25190040469169617, + "eval_precision": 0.851335656213705, + "eval_recall": 0.8358038768529077, + "eval_runtime": 18.443, + "eval_samples_per_second": 137.884, + "eval_steps_per_second": 0.705, + "step": 5312 + }, + { + "epoch": 32.01807228915663, + "grad_norm": 3.5874390602111816, + "learning_rate": 2.668172690763052e-05, + "loss": 0.3335, + "step": 5315 + }, + { + "epoch": 32.04819277108434, + "grad_norm": 3.012038469314575, + "learning_rate": 2.6706827309236947e-05, + "loss": 0.2645, + "step": 5320 + }, + { + "epoch": 32.07831325301205, + "grad_norm": 2.6814894676208496, + "learning_rate": 2.6731927710843373e-05, + "loss": 0.2605, + "step": 5325 + }, + { + "epoch": 32.10843373493976, + "grad_norm": 2.470123052597046, + "learning_rate": 2.6757028112449802e-05, + "loss": 0.252, + "step": 5330 + }, + { + "epoch": 32.13855421686747, + "grad_norm": 2.329923629760742, + "learning_rate": 2.6782128514056227e-05, + "loss": 0.2885, + "step": 5335 + }, + { + "epoch": 32.16867469879518, + "grad_norm": 2.523280382156372, + "learning_rate": 2.6807228915662653e-05, + "loss": 0.2695, + "step": 5340 + }, + { + "epoch": 32.19879518072289, + "grad_norm": 2.2069311141967773, + "learning_rate": 2.683232931726908e-05, + "loss": 0.2958, + "step": 5345 + }, + { + "epoch": 32.2289156626506, + "grad_norm": 2.726677417755127, + "learning_rate": 2.6857429718875504e-05, + "loss": 0.3159, + "step": 5350 + }, + { + "epoch": 32.25903614457831, + "grad_norm": 3.455636739730835, + "learning_rate": 2.688253012048193e-05, + "loss": 0.3096, + "step": 5355 + }, + { + "epoch": 32.28915662650602, + "grad_norm": 2.2268497943878174, + "learning_rate": 2.6907630522088356e-05, + "loss": 0.2715, + "step": 5360 + }, + { + "epoch": 32.31927710843374, + "grad_norm": 2.406238317489624, + "learning_rate": 2.693273092369478e-05, + "loss": 0.2928, + "step": 5365 + }, + { + "epoch": 32.34939759036145, + "grad_norm": 2.166278123855591, + "learning_rate": 2.6957831325301207e-05, + "loss": 0.2765, + "step": 5370 + }, + { + "epoch": 32.37951807228916, + "grad_norm": 1.9824190139770508, + "learning_rate": 2.6982931726907633e-05, + "loss": 0.3195, + "step": 5375 + }, + { + "epoch": 32.40963855421687, + "grad_norm": 1.7944291830062866, + "learning_rate": 2.7008032128514062e-05, + "loss": 0.2438, + "step": 5380 + }, + { + "epoch": 32.43975903614458, + "grad_norm": 2.768326759338379, + "learning_rate": 2.703313253012048e-05, + "loss": 0.3254, + "step": 5385 + }, + { + "epoch": 32.46987951807229, + "grad_norm": 2.4229800701141357, + "learning_rate": 2.7058232931726906e-05, + "loss": 0.2791, + "step": 5390 + }, + { + "epoch": 32.5, + "grad_norm": 2.3203210830688477, + "learning_rate": 2.7083333333333332e-05, + "loss": 0.2886, + "step": 5395 + }, + { + "epoch": 32.53012048192771, + "grad_norm": 2.4342074394226074, + "learning_rate": 2.7108433734939758e-05, + "loss": 0.273, + "step": 5400 + }, + { + "epoch": 32.56024096385542, + "grad_norm": 2.3602750301361084, + "learning_rate": 2.7133534136546183e-05, + "loss": 0.248, + "step": 5405 + }, + { + "epoch": 32.59036144578313, + "grad_norm": 2.7906014919281006, + "learning_rate": 2.7158634538152612e-05, + "loss": 0.3255, + "step": 5410 + }, + { + "epoch": 32.62048192771084, + "grad_norm": 2.2469797134399414, + "learning_rate": 2.7183734939759038e-05, + "loss": 0.2498, + "step": 5415 + }, + { + "epoch": 32.65060240963855, + "grad_norm": 2.7367377281188965, + "learning_rate": 2.7208835341365464e-05, + "loss": 0.2596, + "step": 5420 + }, + { + "epoch": 32.68072289156626, + "grad_norm": 2.7294719219207764, + "learning_rate": 2.723393574297189e-05, + "loss": 0.28, + "step": 5425 + }, + { + "epoch": 32.71084337349397, + "grad_norm": 1.9965656995773315, + "learning_rate": 2.7259036144578315e-05, + "loss": 0.2654, + "step": 5430 + }, + { + "epoch": 32.74096385542169, + "grad_norm": 2.9363596439361572, + "learning_rate": 2.728413654618474e-05, + "loss": 0.3083, + "step": 5435 + }, + { + "epoch": 32.7710843373494, + "grad_norm": 2.3121836185455322, + "learning_rate": 2.7309236947791167e-05, + "loss": 0.3089, + "step": 5440 + }, + { + "epoch": 32.80120481927711, + "grad_norm": 2.360670566558838, + "learning_rate": 2.7334337349397592e-05, + "loss": 0.3105, + "step": 5445 + }, + { + "epoch": 32.83132530120482, + "grad_norm": 2.362351417541504, + "learning_rate": 2.7359437751004018e-05, + "loss": 0.2843, + "step": 5450 + }, + { + "epoch": 32.86144578313253, + "grad_norm": 2.308806896209717, + "learning_rate": 2.7384538152610444e-05, + "loss": 0.2634, + "step": 5455 + }, + { + "epoch": 32.89156626506024, + "grad_norm": 2.528810739517212, + "learning_rate": 2.7409638554216873e-05, + "loss": 0.2863, + "step": 5460 + }, + { + "epoch": 32.92168674698795, + "grad_norm": 2.1996474266052246, + "learning_rate": 2.7434738955823298e-05, + "loss": 0.2991, + "step": 5465 + }, + { + "epoch": 32.95180722891566, + "grad_norm": 2.9335763454437256, + "learning_rate": 2.7459839357429717e-05, + "loss": 0.2628, + "step": 5470 + }, + { + "epoch": 32.98192771084337, + "grad_norm": 3.3871850967407227, + "learning_rate": 2.7484939759036143e-05, + "loss": 0.2774, + "step": 5475 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9044435705859222, + "eval_auc": 0.9555521182247131, + "eval_f1": 0.8504615384615385, + "eval_loss": 0.253192663192749, + "eval_precision": 0.9237967914438503, + "eval_recall": 0.7879133409350056, + "eval_runtime": 17.3797, + "eval_samples_per_second": 146.32, + "eval_steps_per_second": 0.748, + "step": 5478 + }, + { + "epoch": 33.01204819277108, + "grad_norm": 2.615572929382324, + "learning_rate": 2.751004016064257e-05, + "loss": 0.2574, + "step": 5480 + }, + { + "epoch": 33.04216867469879, + "grad_norm": 1.9657039642333984, + "learning_rate": 2.7535140562248994e-05, + "loss": 0.2799, + "step": 5485 + }, + { + "epoch": 33.0722891566265, + "grad_norm": 2.3047263622283936, + "learning_rate": 2.756024096385542e-05, + "loss": 0.2787, + "step": 5490 + }, + { + "epoch": 33.102409638554214, + "grad_norm": 1.813161849975586, + "learning_rate": 2.758534136546185e-05, + "loss": 0.234, + "step": 5495 + }, + { + "epoch": 33.13253012048193, + "grad_norm": 2.4800333976745605, + "learning_rate": 2.7610441767068275e-05, + "loss": 0.2717, + "step": 5500 + }, + { + "epoch": 33.16265060240964, + "grad_norm": 1.8988195657730103, + "learning_rate": 2.76355421686747e-05, + "loss": 0.2628, + "step": 5505 + }, + { + "epoch": 33.19277108433735, + "grad_norm": 2.371368885040283, + "learning_rate": 2.7660642570281126e-05, + "loss": 0.2665, + "step": 5510 + }, + { + "epoch": 33.22289156626506, + "grad_norm": 2.7762610912323, + "learning_rate": 2.768574297188755e-05, + "loss": 0.2797, + "step": 5515 + }, + { + "epoch": 33.25301204819277, + "grad_norm": 1.872565746307373, + "learning_rate": 2.7710843373493977e-05, + "loss": 0.2721, + "step": 5520 + }, + { + "epoch": 33.28313253012048, + "grad_norm": 2.8854243755340576, + "learning_rate": 2.7735943775100403e-05, + "loss": 0.306, + "step": 5525 + }, + { + "epoch": 33.31325301204819, + "grad_norm": 2.694687843322754, + "learning_rate": 2.776104417670683e-05, + "loss": 0.2859, + "step": 5530 + }, + { + "epoch": 33.3433734939759, + "grad_norm": 2.2184691429138184, + "learning_rate": 2.7786144578313254e-05, + "loss": 0.2645, + "step": 5535 + }, + { + "epoch": 33.373493975903614, + "grad_norm": 3.2997360229492188, + "learning_rate": 2.781124497991968e-05, + "loss": 0.2841, + "step": 5540 + }, + { + "epoch": 33.403614457831324, + "grad_norm": 2.774646520614624, + "learning_rate": 2.783634538152611e-05, + "loss": 0.2917, + "step": 5545 + }, + { + "epoch": 33.433734939759034, + "grad_norm": 1.8466839790344238, + "learning_rate": 2.7861445783132535e-05, + "loss": 0.2519, + "step": 5550 + }, + { + "epoch": 33.463855421686745, + "grad_norm": 2.7159290313720703, + "learning_rate": 2.788654618473896e-05, + "loss": 0.2991, + "step": 5555 + }, + { + "epoch": 33.493975903614455, + "grad_norm": 2.4454333782196045, + "learning_rate": 2.791164658634538e-05, + "loss": 0.2997, + "step": 5560 + }, + { + "epoch": 33.524096385542165, + "grad_norm": 2.329941987991333, + "learning_rate": 2.7936746987951805e-05, + "loss": 0.2437, + "step": 5565 + }, + { + "epoch": 33.55421686746988, + "grad_norm": 2.7902016639709473, + "learning_rate": 2.796184738955823e-05, + "loss": 0.2614, + "step": 5570 + }, + { + "epoch": 33.58433734939759, + "grad_norm": 2.3303303718566895, + "learning_rate": 2.798694779116466e-05, + "loss": 0.2841, + "step": 5575 + }, + { + "epoch": 33.6144578313253, + "grad_norm": 2.3831629753112793, + "learning_rate": 2.8012048192771085e-05, + "loss": 0.2626, + "step": 5580 + }, + { + "epoch": 33.644578313253014, + "grad_norm": 1.8426225185394287, + "learning_rate": 2.803714859437751e-05, + "loss": 0.2828, + "step": 5585 + }, + { + "epoch": 33.674698795180724, + "grad_norm": 2.2950408458709717, + "learning_rate": 2.8062248995983937e-05, + "loss": 0.2557, + "step": 5590 + }, + { + "epoch": 33.704819277108435, + "grad_norm": 2.125751256942749, + "learning_rate": 2.8087349397590362e-05, + "loss": 0.2585, + "step": 5595 + }, + { + "epoch": 33.734939759036145, + "grad_norm": 2.5507137775421143, + "learning_rate": 2.8112449799196788e-05, + "loss": 0.2856, + "step": 5600 + }, + { + "epoch": 33.765060240963855, + "grad_norm": 3.3059866428375244, + "learning_rate": 2.8137550200803214e-05, + "loss": 0.282, + "step": 5605 + }, + { + "epoch": 33.795180722891565, + "grad_norm": 2.6528871059417725, + "learning_rate": 2.816265060240964e-05, + "loss": 0.3061, + "step": 5610 + }, + { + "epoch": 33.825301204819276, + "grad_norm": 3.684744358062744, + "learning_rate": 2.8187751004016065e-05, + "loss": 0.2988, + "step": 5615 + }, + { + "epoch": 33.855421686746986, + "grad_norm": 2.484884738922119, + "learning_rate": 2.821285140562249e-05, + "loss": 0.2788, + "step": 5620 + }, + { + "epoch": 33.8855421686747, + "grad_norm": 1.8708103895187378, + "learning_rate": 2.823795180722892e-05, + "loss": 0.236, + "step": 5625 + }, + { + "epoch": 33.91566265060241, + "grad_norm": 2.291910171508789, + "learning_rate": 2.8263052208835346e-05, + "loss": 0.2555, + "step": 5630 + }, + { + "epoch": 33.94578313253012, + "grad_norm": 2.124656915664673, + "learning_rate": 2.828815261044177e-05, + "loss": 0.2616, + "step": 5635 + }, + { + "epoch": 33.975903614457835, + "grad_norm": 3.6708576679229736, + "learning_rate": 2.8313253012048197e-05, + "loss": 0.2232, + "step": 5640 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.9040503342508848, + "eval_auc": 0.9544108407331006, + "eval_f1": 0.8586326767091541, + "eval_loss": 0.2504521608352661, + "eval_precision": 0.872791519434629, + "eval_recall": 0.8449258836944128, + "eval_runtime": 17.3902, + "eval_samples_per_second": 146.232, + "eval_steps_per_second": 0.748, + "step": 5644 + }, + { + "epoch": 34.006024096385545, + "grad_norm": 3.1373565196990967, + "learning_rate": 2.8338353413654623e-05, + "loss": 0.331, + "step": 5645 + }, + { + "epoch": 34.036144578313255, + "grad_norm": 2.5919554233551025, + "learning_rate": 2.836345381526104e-05, + "loss": 0.2949, + "step": 5650 + }, + { + "epoch": 34.066265060240966, + "grad_norm": 2.182943344116211, + "learning_rate": 2.838855421686747e-05, + "loss": 0.2116, + "step": 5655 + }, + { + "epoch": 34.096385542168676, + "grad_norm": 2.259359121322632, + "learning_rate": 2.8413654618473896e-05, + "loss": 0.2538, + "step": 5660 + }, + { + "epoch": 34.126506024096386, + "grad_norm": 2.015058755874634, + "learning_rate": 2.8438755020080322e-05, + "loss": 0.2232, + "step": 5665 + }, + { + "epoch": 34.1566265060241, + "grad_norm": 1.9988059997558594, + "learning_rate": 2.8463855421686748e-05, + "loss": 0.2737, + "step": 5670 + }, + { + "epoch": 34.18674698795181, + "grad_norm": 2.2244763374328613, + "learning_rate": 2.8488955823293173e-05, + "loss": 0.2608, + "step": 5675 + }, + { + "epoch": 34.21686746987952, + "grad_norm": 2.8359451293945312, + "learning_rate": 2.85140562248996e-05, + "loss": 0.2806, + "step": 5680 + }, + { + "epoch": 34.24698795180723, + "grad_norm": 2.3713274002075195, + "learning_rate": 2.8539156626506025e-05, + "loss": 0.2939, + "step": 5685 + }, + { + "epoch": 34.27710843373494, + "grad_norm": 2.9533097743988037, + "learning_rate": 2.856425702811245e-05, + "loss": 0.2589, + "step": 5690 + }, + { + "epoch": 34.30722891566265, + "grad_norm": 2.3739044666290283, + "learning_rate": 2.8589357429718876e-05, + "loss": 0.2415, + "step": 5695 + }, + { + "epoch": 34.33734939759036, + "grad_norm": 2.4942517280578613, + "learning_rate": 2.86144578313253e-05, + "loss": 0.2825, + "step": 5700 + }, + { + "epoch": 34.36746987951807, + "grad_norm": 2.3502275943756104, + "learning_rate": 2.863955823293173e-05, + "loss": 0.2321, + "step": 5705 + }, + { + "epoch": 34.397590361445786, + "grad_norm": 2.2536942958831787, + "learning_rate": 2.8664658634538156e-05, + "loss": 0.2823, + "step": 5710 + }, + { + "epoch": 34.4277108433735, + "grad_norm": 2.001239776611328, + "learning_rate": 2.8689759036144582e-05, + "loss": 0.2799, + "step": 5715 + }, + { + "epoch": 34.45783132530121, + "grad_norm": 2.6977627277374268, + "learning_rate": 2.8714859437751008e-05, + "loss": 0.2941, + "step": 5720 + }, + { + "epoch": 34.48795180722892, + "grad_norm": 2.146167516708374, + "learning_rate": 2.8739959839357433e-05, + "loss": 0.2154, + "step": 5725 + }, + { + "epoch": 34.51807228915663, + "grad_norm": 2.3009486198425293, + "learning_rate": 2.876506024096386e-05, + "loss": 0.2217, + "step": 5730 + }, + { + "epoch": 34.54819277108434, + "grad_norm": 6.846885681152344, + "learning_rate": 2.8790160642570285e-05, + "loss": 0.25, + "step": 5735 + }, + { + "epoch": 34.57831325301205, + "grad_norm": 3.1928577423095703, + "learning_rate": 2.8815261044176707e-05, + "loss": 0.3159, + "step": 5740 + }, + { + "epoch": 34.60843373493976, + "grad_norm": 2.9763200283050537, + "learning_rate": 2.8840361445783133e-05, + "loss": 0.2633, + "step": 5745 + }, + { + "epoch": 34.63855421686747, + "grad_norm": 2.920257091522217, + "learning_rate": 2.8865461847389558e-05, + "loss": 0.2693, + "step": 5750 + }, + { + "epoch": 34.66867469879518, + "grad_norm": 2.94160532951355, + "learning_rate": 2.8890562248995984e-05, + "loss": 0.2848, + "step": 5755 + }, + { + "epoch": 34.69879518072289, + "grad_norm": 2.237041473388672, + "learning_rate": 2.891566265060241e-05, + "loss": 0.2573, + "step": 5760 + }, + { + "epoch": 34.7289156626506, + "grad_norm": 2.879662036895752, + "learning_rate": 2.8940763052208835e-05, + "loss": 0.2577, + "step": 5765 + }, + { + "epoch": 34.75903614457831, + "grad_norm": 2.4831583499908447, + "learning_rate": 2.896586345381526e-05, + "loss": 0.2173, + "step": 5770 + }, + { + "epoch": 34.78915662650603, + "grad_norm": 3.967815399169922, + "learning_rate": 2.8990963855421687e-05, + "loss": 0.274, + "step": 5775 + }, + { + "epoch": 34.81927710843374, + "grad_norm": 3.6488428115844727, + "learning_rate": 2.9016064257028112e-05, + "loss": 0.3035, + "step": 5780 + }, + { + "epoch": 34.84939759036145, + "grad_norm": 2.2879462242126465, + "learning_rate": 2.904116465863454e-05, + "loss": 0.2783, + "step": 5785 + }, + { + "epoch": 34.87951807228916, + "grad_norm": 2.7552425861358643, + "learning_rate": 2.9066265060240967e-05, + "loss": 0.2255, + "step": 5790 + }, + { + "epoch": 34.90963855421687, + "grad_norm": 2.1789631843566895, + "learning_rate": 2.9091365461847393e-05, + "loss": 0.2339, + "step": 5795 + }, + { + "epoch": 34.93975903614458, + "grad_norm": 1.7293847799301147, + "learning_rate": 2.911646586345382e-05, + "loss": 0.2709, + "step": 5800 + }, + { + "epoch": 34.96987951807229, + "grad_norm": 2.7016797065734863, + "learning_rate": 2.9141566265060244e-05, + "loss": 0.2735, + "step": 5805 + }, + { + "epoch": 35.0, + "grad_norm": 2.5335707664489746, + "learning_rate": 2.916666666666667e-05, + "loss": 0.279, + "step": 5810 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9028706252457727, + "eval_auc": 0.956116426045903, + "eval_f1": 0.8445563247325362, + "eval_loss": 0.2674693167209625, + "eval_precision": 0.9424157303370787, + "eval_recall": 0.7651083238312428, + "eval_runtime": 17.6831, + "eval_samples_per_second": 143.81, + "eval_steps_per_second": 0.735, + "step": 5810 + }, + { + "epoch": 35.03012048192771, + "grad_norm": 2.46142578125, + "learning_rate": 2.9191767068273095e-05, + "loss": 0.2537, + "step": 5815 + }, + { + "epoch": 35.06024096385542, + "grad_norm": 3.206148862838745, + "learning_rate": 2.921686746987952e-05, + "loss": 0.2767, + "step": 5820 + }, + { + "epoch": 35.09036144578313, + "grad_norm": 2.477686882019043, + "learning_rate": 2.9241967871485943e-05, + "loss": 0.2423, + "step": 5825 + }, + { + "epoch": 35.12048192771084, + "grad_norm": 3.08777117729187, + "learning_rate": 2.926706827309237e-05, + "loss": 0.2657, + "step": 5830 + }, + { + "epoch": 35.15060240963855, + "grad_norm": 2.131882429122925, + "learning_rate": 2.9292168674698795e-05, + "loss": 0.245, + "step": 5835 + }, + { + "epoch": 35.18072289156626, + "grad_norm": 2.3372786045074463, + "learning_rate": 2.931726907630522e-05, + "loss": 0.2793, + "step": 5840 + }, + { + "epoch": 35.21084337349398, + "grad_norm": 2.8479580879211426, + "learning_rate": 2.9342369477911646e-05, + "loss": 0.2144, + "step": 5845 + }, + { + "epoch": 35.24096385542169, + "grad_norm": 3.9904942512512207, + "learning_rate": 2.9367469879518072e-05, + "loss": 0.2547, + "step": 5850 + }, + { + "epoch": 35.2710843373494, + "grad_norm": 2.9604651927948, + "learning_rate": 2.9392570281124497e-05, + "loss": 0.2963, + "step": 5855 + }, + { + "epoch": 35.30120481927711, + "grad_norm": 2.49530029296875, + "learning_rate": 2.9417670682730923e-05, + "loss": 0.2565, + "step": 5860 + }, + { + "epoch": 35.33132530120482, + "grad_norm": 1.704471468925476, + "learning_rate": 2.9442771084337352e-05, + "loss": 0.254, + "step": 5865 + }, + { + "epoch": 35.36144578313253, + "grad_norm": 2.494004964828491, + "learning_rate": 2.9467871485943778e-05, + "loss": 0.2319, + "step": 5870 + }, + { + "epoch": 35.39156626506024, + "grad_norm": 2.478168487548828, + "learning_rate": 2.9492971887550204e-05, + "loss": 0.2676, + "step": 5875 + }, + { + "epoch": 35.42168674698795, + "grad_norm": 2.5450990200042725, + "learning_rate": 2.951807228915663e-05, + "loss": 0.2246, + "step": 5880 + }, + { + "epoch": 35.45180722891566, + "grad_norm": 1.7174454927444458, + "learning_rate": 2.9543172690763055e-05, + "loss": 0.2338, + "step": 5885 + }, + { + "epoch": 35.48192771084337, + "grad_norm": 2.448060989379883, + "learning_rate": 2.956827309236948e-05, + "loss": 0.2705, + "step": 5890 + }, + { + "epoch": 35.51204819277108, + "grad_norm": 2.077549457550049, + "learning_rate": 2.9593373493975906e-05, + "loss": 0.2549, + "step": 5895 + }, + { + "epoch": 35.54216867469879, + "grad_norm": 2.6109633445739746, + "learning_rate": 2.9618473895582332e-05, + "loss": 0.2472, + "step": 5900 + }, + { + "epoch": 35.5722891566265, + "grad_norm": 2.551146984100342, + "learning_rate": 2.9643574297188758e-05, + "loss": 0.268, + "step": 5905 + }, + { + "epoch": 35.602409638554214, + "grad_norm": 2.3147637844085693, + "learning_rate": 2.9668674698795183e-05, + "loss": 0.23, + "step": 5910 + }, + { + "epoch": 35.63253012048193, + "grad_norm": 2.584393262863159, + "learning_rate": 2.9693775100401606e-05, + "loss": 0.2507, + "step": 5915 + }, + { + "epoch": 35.66265060240964, + "grad_norm": 2.71573543548584, + "learning_rate": 2.971887550200803e-05, + "loss": 0.3107, + "step": 5920 + }, + { + "epoch": 35.69277108433735, + "grad_norm": 2.746415376663208, + "learning_rate": 2.9743975903614457e-05, + "loss": 0.2501, + "step": 5925 + }, + { + "epoch": 35.72289156626506, + "grad_norm": 1.9923425912857056, + "learning_rate": 2.9769076305220883e-05, + "loss": 0.2986, + "step": 5930 + }, + { + "epoch": 35.75301204819277, + "grad_norm": 2.963034152984619, + "learning_rate": 2.9794176706827308e-05, + "loss": 0.2979, + "step": 5935 + }, + { + "epoch": 35.78313253012048, + "grad_norm": 2.78971791267395, + "learning_rate": 2.9819277108433734e-05, + "loss": 0.2302, + "step": 5940 + }, + { + "epoch": 35.81325301204819, + "grad_norm": 1.9936814308166504, + "learning_rate": 2.9844377510040163e-05, + "loss": 0.2608, + "step": 5945 + }, + { + "epoch": 35.8433734939759, + "grad_norm": 2.7506966590881348, + "learning_rate": 2.986947791164659e-05, + "loss": 0.2347, + "step": 5950 + }, + { + "epoch": 35.873493975903614, + "grad_norm": 2.516293525695801, + "learning_rate": 2.9894578313253014e-05, + "loss": 0.2546, + "step": 5955 + }, + { + "epoch": 35.903614457831324, + "grad_norm": 3.7010300159454346, + "learning_rate": 2.991967871485944e-05, + "loss": 0.2571, + "step": 5960 + }, + { + "epoch": 35.933734939759034, + "grad_norm": 2.489213466644287, + "learning_rate": 2.9944779116465866e-05, + "loss": 0.2735, + "step": 5965 + }, + { + "epoch": 35.963855421686745, + "grad_norm": 3.4056196212768555, + "learning_rate": 2.996987951807229e-05, + "loss": 0.3093, + "step": 5970 + }, + { + "epoch": 35.993975903614455, + "grad_norm": 2.641334295272827, + "learning_rate": 2.9994979919678717e-05, + "loss": 0.3134, + "step": 5975 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9040503342508848, + "eval_auc": 0.9602633527755456, + "eval_f1": 0.8653421633554084, + "eval_loss": 0.24029910564422607, + "eval_precision": 0.8385026737967914, + "eval_recall": 0.8939566704675028, + "eval_runtime": 17.1657, + "eval_samples_per_second": 148.145, + "eval_steps_per_second": 0.757, + "step": 5976 + }, + { + "epoch": 36.024096385542165, + "grad_norm": 2.1633102893829346, + "learning_rate": 3.0020080321285143e-05, + "loss": 0.2807, + "step": 5980 + }, + { + "epoch": 36.05421686746988, + "grad_norm": 2.105492115020752, + "learning_rate": 3.004518072289157e-05, + "loss": 0.2768, + "step": 5985 + }, + { + "epoch": 36.08433734939759, + "grad_norm": 2.0044474601745605, + "learning_rate": 3.0070281124497994e-05, + "loss": 0.2568, + "step": 5990 + }, + { + "epoch": 36.1144578313253, + "grad_norm": 2.109309434890747, + "learning_rate": 3.0095381526104423e-05, + "loss": 0.2165, + "step": 5995 + }, + { + "epoch": 36.144578313253014, + "grad_norm": 3.5626933574676514, + "learning_rate": 3.012048192771085e-05, + "loss": 0.2817, + "step": 6000 + }, + { + "epoch": 36.174698795180724, + "grad_norm": 3.385136127471924, + "learning_rate": 3.0145582329317268e-05, + "loss": 0.2461, + "step": 6005 + }, + { + "epoch": 36.204819277108435, + "grad_norm": 2.151554822921753, + "learning_rate": 3.0170682730923693e-05, + "loss": 0.2306, + "step": 6010 + }, + { + "epoch": 36.234939759036145, + "grad_norm": 2.27700138092041, + "learning_rate": 3.019578313253012e-05, + "loss": 0.2384, + "step": 6015 + }, + { + "epoch": 36.265060240963855, + "grad_norm": 2.4433555603027344, + "learning_rate": 3.0220883534136545e-05, + "loss": 0.2264, + "step": 6020 + }, + { + "epoch": 36.295180722891565, + "grad_norm": 2.5779662132263184, + "learning_rate": 3.024598393574297e-05, + "loss": 0.2486, + "step": 6025 + }, + { + "epoch": 36.325301204819276, + "grad_norm": 2.5328152179718018, + "learning_rate": 3.02710843373494e-05, + "loss": 0.2668, + "step": 6030 + }, + { + "epoch": 36.355421686746986, + "grad_norm": 2.2948355674743652, + "learning_rate": 3.0296184738955825e-05, + "loss": 0.2898, + "step": 6035 + }, + { + "epoch": 36.3855421686747, + "grad_norm": 2.2671010494232178, + "learning_rate": 3.032128514056225e-05, + "loss": 0.2695, + "step": 6040 + }, + { + "epoch": 36.41566265060241, + "grad_norm": 1.7064307928085327, + "learning_rate": 3.0346385542168676e-05, + "loss": 0.2441, + "step": 6045 + }, + { + "epoch": 36.44578313253012, + "grad_norm": 2.15734601020813, + "learning_rate": 3.0371485943775102e-05, + "loss": 0.26, + "step": 6050 + }, + { + "epoch": 36.475903614457835, + "grad_norm": 2.113530158996582, + "learning_rate": 3.0396586345381528e-05, + "loss": 0.2525, + "step": 6055 + }, + { + "epoch": 36.506024096385545, + "grad_norm": 1.943936824798584, + "learning_rate": 3.0421686746987953e-05, + "loss": 0.2653, + "step": 6060 + }, + { + "epoch": 36.536144578313255, + "grad_norm": 1.9741214513778687, + "learning_rate": 3.044678714859438e-05, + "loss": 0.2628, + "step": 6065 + }, + { + "epoch": 36.566265060240966, + "grad_norm": 2.583420991897583, + "learning_rate": 3.0471887550200805e-05, + "loss": 0.21, + "step": 6070 + }, + { + "epoch": 36.596385542168676, + "grad_norm": 2.4453647136688232, + "learning_rate": 3.049698795180723e-05, + "loss": 0.2651, + "step": 6075 + }, + { + "epoch": 36.626506024096386, + "grad_norm": 2.5714426040649414, + "learning_rate": 3.052208835341366e-05, + "loss": 0.2722, + "step": 6080 + }, + { + "epoch": 36.6566265060241, + "grad_norm": 3.1495213508605957, + "learning_rate": 3.054718875502008e-05, + "loss": 0.2545, + "step": 6085 + }, + { + "epoch": 36.68674698795181, + "grad_norm": 3.1175990104675293, + "learning_rate": 3.057228915662651e-05, + "loss": 0.2512, + "step": 6090 + }, + { + "epoch": 36.71686746987952, + "grad_norm": 3.22166109085083, + "learning_rate": 3.059738955823293e-05, + "loss": 0.2541, + "step": 6095 + }, + { + "epoch": 36.74698795180723, + "grad_norm": 2.9877281188964844, + "learning_rate": 3.0622489959839355e-05, + "loss": 0.2832, + "step": 6100 + }, + { + "epoch": 36.77710843373494, + "grad_norm": 2.9391582012176514, + "learning_rate": 3.0647590361445784e-05, + "loss": 0.3119, + "step": 6105 + }, + { + "epoch": 36.80722891566265, + "grad_norm": 2.741736650466919, + "learning_rate": 3.067269076305221e-05, + "loss": 0.3377, + "step": 6110 + }, + { + "epoch": 36.83734939759036, + "grad_norm": 1.8150005340576172, + "learning_rate": 3.0697791164658636e-05, + "loss": 0.2782, + "step": 6115 + }, + { + "epoch": 36.86746987951807, + "grad_norm": 2.3358445167541504, + "learning_rate": 3.072289156626506e-05, + "loss": 0.2753, + "step": 6120 + }, + { + "epoch": 36.897590361445786, + "grad_norm": 2.2918527126312256, + "learning_rate": 3.074799196787149e-05, + "loss": 0.2636, + "step": 6125 + }, + { + "epoch": 36.9277108433735, + "grad_norm": 2.7234408855438232, + "learning_rate": 3.077309236947791e-05, + "loss": 0.2631, + "step": 6130 + }, + { + "epoch": 36.95783132530121, + "grad_norm": 2.4186816215515137, + "learning_rate": 3.079819277108434e-05, + "loss": 0.2569, + "step": 6135 + }, + { + "epoch": 36.98795180722892, + "grad_norm": 2.6633265018463135, + "learning_rate": 3.082329317269077e-05, + "loss": 0.2653, + "step": 6140 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9213527329925285, + "eval_auc": 0.9617944098962276, + "eval_f1": 0.8835855646100116, + "eval_loss": 0.21790018677711487, + "eval_precision": 0.9024970273483948, + "eval_recall": 0.8654503990877993, + "eval_runtime": 17.1708, + "eval_samples_per_second": 148.1, + "eval_steps_per_second": 0.757, + "step": 6142 + }, + { + "epoch": 37.01807228915663, + "grad_norm": 2.8472111225128174, + "learning_rate": 3.084839357429719e-05, + "loss": 0.2264, + "step": 6145 + }, + { + "epoch": 37.04819277108434, + "grad_norm": 2.0315515995025635, + "learning_rate": 3.087349397590362e-05, + "loss": 0.2352, + "step": 6150 + }, + { + "epoch": 37.07831325301205, + "grad_norm": 2.0432772636413574, + "learning_rate": 3.089859437751004e-05, + "loss": 0.2107, + "step": 6155 + }, + { + "epoch": 37.10843373493976, + "grad_norm": 2.9990720748901367, + "learning_rate": 3.092369477911647e-05, + "loss": 0.2566, + "step": 6160 + }, + { + "epoch": 37.13855421686747, + "grad_norm": 2.3924663066864014, + "learning_rate": 3.094879518072289e-05, + "loss": 0.241, + "step": 6165 + }, + { + "epoch": 37.16867469879518, + "grad_norm": 2.013744592666626, + "learning_rate": 3.097389558232932e-05, + "loss": 0.2483, + "step": 6170 + }, + { + "epoch": 37.19879518072289, + "grad_norm": 1.6174145936965942, + "learning_rate": 3.0998995983935744e-05, + "loss": 0.2591, + "step": 6175 + }, + { + "epoch": 37.2289156626506, + "grad_norm": 2.5364930629730225, + "learning_rate": 3.102409638554217e-05, + "loss": 0.2525, + "step": 6180 + }, + { + "epoch": 37.25903614457831, + "grad_norm": 2.143467664718628, + "learning_rate": 3.1049196787148595e-05, + "loss": 0.2601, + "step": 6185 + }, + { + "epoch": 37.28915662650602, + "grad_norm": 2.03132963180542, + "learning_rate": 3.107429718875502e-05, + "loss": 0.2342, + "step": 6190 + }, + { + "epoch": 37.31927710843374, + "grad_norm": 2.004775285720825, + "learning_rate": 3.1099397590361447e-05, + "loss": 0.2839, + "step": 6195 + }, + { + "epoch": 37.34939759036145, + "grad_norm": 2.106445074081421, + "learning_rate": 3.112449799196787e-05, + "loss": 0.241, + "step": 6200 + }, + { + "epoch": 37.37951807228916, + "grad_norm": 2.17498779296875, + "learning_rate": 3.11495983935743e-05, + "loss": 0.2339, + "step": 6205 + }, + { + "epoch": 37.40963855421687, + "grad_norm": 2.7526564598083496, + "learning_rate": 3.117469879518072e-05, + "loss": 0.2675, + "step": 6210 + }, + { + "epoch": 37.43975903614458, + "grad_norm": 2.4853410720825195, + "learning_rate": 3.119979919678715e-05, + "loss": 0.2863, + "step": 6215 + }, + { + "epoch": 37.46987951807229, + "grad_norm": 2.1971094608306885, + "learning_rate": 3.122489959839358e-05, + "loss": 0.2694, + "step": 6220 + }, + { + "epoch": 37.5, + "grad_norm": 2.1958532333374023, + "learning_rate": 3.125e-05, + "loss": 0.2826, + "step": 6225 + }, + { + "epoch": 37.53012048192771, + "grad_norm": 1.6764627695083618, + "learning_rate": 3.127510040160643e-05, + "loss": 0.2419, + "step": 6230 + }, + { + "epoch": 37.56024096385542, + "grad_norm": 1.8171449899673462, + "learning_rate": 3.130020080321285e-05, + "loss": 0.2418, + "step": 6235 + }, + { + "epoch": 37.59036144578313, + "grad_norm": 1.5661555528640747, + "learning_rate": 3.132530120481928e-05, + "loss": 0.2646, + "step": 6240 + }, + { + "epoch": 37.62048192771084, + "grad_norm": 1.699576497077942, + "learning_rate": 3.13504016064257e-05, + "loss": 0.2558, + "step": 6245 + }, + { + "epoch": 37.65060240963855, + "grad_norm": 2.271219253540039, + "learning_rate": 3.137550200803213e-05, + "loss": 0.2487, + "step": 6250 + }, + { + "epoch": 37.68072289156626, + "grad_norm": 2.469144105911255, + "learning_rate": 3.1400602409638555e-05, + "loss": 0.2311, + "step": 6255 + }, + { + "epoch": 37.71084337349397, + "grad_norm": 1.8686410188674927, + "learning_rate": 3.1425702811244984e-05, + "loss": 0.2417, + "step": 6260 + }, + { + "epoch": 37.74096385542169, + "grad_norm": 2.3052306175231934, + "learning_rate": 3.145080321285141e-05, + "loss": 0.2551, + "step": 6265 + }, + { + "epoch": 37.7710843373494, + "grad_norm": 2.1152069568634033, + "learning_rate": 3.147590361445783e-05, + "loss": 0.2644, + "step": 6270 + }, + { + "epoch": 37.80120481927711, + "grad_norm": 3.332885265350342, + "learning_rate": 3.150100401606426e-05, + "loss": 0.2368, + "step": 6275 + }, + { + "epoch": 37.83132530120482, + "grad_norm": 3.286970615386963, + "learning_rate": 3.152610441767068e-05, + "loss": 0.2925, + "step": 6280 + }, + { + "epoch": 37.86144578313253, + "grad_norm": 2.1198184490203857, + "learning_rate": 3.155120481927711e-05, + "loss": 0.2001, + "step": 6285 + }, + { + "epoch": 37.89156626506024, + "grad_norm": 2.578740119934082, + "learning_rate": 3.157630522088353e-05, + "loss": 0.2602, + "step": 6290 + }, + { + "epoch": 37.92168674698795, + "grad_norm": 2.738973617553711, + "learning_rate": 3.160140562248996e-05, + "loss": 0.2542, + "step": 6295 + }, + { + "epoch": 37.95180722891566, + "grad_norm": 2.4956698417663574, + "learning_rate": 3.162650602409639e-05, + "loss": 0.2792, + "step": 6300 + }, + { + "epoch": 37.98192771084337, + "grad_norm": 2.3061397075653076, + "learning_rate": 3.165160642570281e-05, + "loss": 0.2746, + "step": 6305 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.9182068423122296, + "eval_auc": 0.9663975054103738, + "eval_f1": 0.8737864077669902, + "eval_loss": 0.2299535572528839, + "eval_precision": 0.933852140077821, + "eval_recall": 0.8209806157354618, + "eval_runtime": 17.2374, + "eval_samples_per_second": 147.528, + "eval_steps_per_second": 0.754, + "step": 6308 + }, + { + "epoch": 38.01204819277108, + "grad_norm": 2.216925859451294, + "learning_rate": 3.167670682730924e-05, + "loss": 0.228, + "step": 6310 + }, + { + "epoch": 38.04216867469879, + "grad_norm": 1.9454985857009888, + "learning_rate": 3.170180722891566e-05, + "loss": 0.2047, + "step": 6315 + }, + { + "epoch": 38.0722891566265, + "grad_norm": 2.274045467376709, + "learning_rate": 3.172690763052209e-05, + "loss": 0.2281, + "step": 6320 + }, + { + "epoch": 38.102409638554214, + "grad_norm": 2.2698090076446533, + "learning_rate": 3.1752008032128514e-05, + "loss": 0.2617, + "step": 6325 + }, + { + "epoch": 38.13253012048193, + "grad_norm": 2.3226888179779053, + "learning_rate": 3.177710843373494e-05, + "loss": 0.2442, + "step": 6330 + }, + { + "epoch": 38.16265060240964, + "grad_norm": 2.456148147583008, + "learning_rate": 3.1802208835341365e-05, + "loss": 0.2716, + "step": 6335 + }, + { + "epoch": 38.19277108433735, + "grad_norm": 2.4330947399139404, + "learning_rate": 3.1827309236947795e-05, + "loss": 0.2403, + "step": 6340 + }, + { + "epoch": 38.22289156626506, + "grad_norm": 3.385854959487915, + "learning_rate": 3.1852409638554224e-05, + "loss": 0.2505, + "step": 6345 + }, + { + "epoch": 38.25301204819277, + "grad_norm": 2.735837936401367, + "learning_rate": 3.1877510040160646e-05, + "loss": 0.2294, + "step": 6350 + }, + { + "epoch": 38.28313253012048, + "grad_norm": 3.903822660446167, + "learning_rate": 3.1902610441767075e-05, + "loss": 0.225, + "step": 6355 + }, + { + "epoch": 38.31325301204819, + "grad_norm": 2.4196770191192627, + "learning_rate": 3.192771084337349e-05, + "loss": 0.2276, + "step": 6360 + }, + { + "epoch": 38.3433734939759, + "grad_norm": 2.6809000968933105, + "learning_rate": 3.195281124497992e-05, + "loss": 0.2765, + "step": 6365 + }, + { + "epoch": 38.373493975903614, + "grad_norm": 2.375967025756836, + "learning_rate": 3.197791164658634e-05, + "loss": 0.2218, + "step": 6370 + }, + { + "epoch": 38.403614457831324, + "grad_norm": 2.497779130935669, + "learning_rate": 3.200301204819277e-05, + "loss": 0.2772, + "step": 6375 + }, + { + "epoch": 38.433734939759034, + "grad_norm": 3.284944534301758, + "learning_rate": 3.20281124497992e-05, + "loss": 0.2785, + "step": 6380 + }, + { + "epoch": 38.463855421686745, + "grad_norm": 2.3392996788024902, + "learning_rate": 3.205321285140562e-05, + "loss": 0.2187, + "step": 6385 + }, + { + "epoch": 38.493975903614455, + "grad_norm": 2.683300495147705, + "learning_rate": 3.207831325301205e-05, + "loss": 0.2323, + "step": 6390 + }, + { + "epoch": 38.524096385542165, + "grad_norm": 2.324141502380371, + "learning_rate": 3.2103413654618474e-05, + "loss": 0.2644, + "step": 6395 + }, + { + "epoch": 38.55421686746988, + "grad_norm": 1.9178905487060547, + "learning_rate": 3.21285140562249e-05, + "loss": 0.2284, + "step": 6400 + }, + { + "epoch": 38.58433734939759, + "grad_norm": 2.290726661682129, + "learning_rate": 3.2153614457831325e-05, + "loss": 0.2214, + "step": 6405 + }, + { + "epoch": 38.6144578313253, + "grad_norm": 2.622251510620117, + "learning_rate": 3.2178714859437754e-05, + "loss": 0.2897, + "step": 6410 + }, + { + "epoch": 38.644578313253014, + "grad_norm": 2.4100887775421143, + "learning_rate": 3.2203815261044176e-05, + "loss": 0.2199, + "step": 6415 + }, + { + "epoch": 38.674698795180724, + "grad_norm": 2.782029151916504, + "learning_rate": 3.2228915662650605e-05, + "loss": 0.2315, + "step": 6420 + }, + { + "epoch": 38.704819277108435, + "grad_norm": 2.4892172813415527, + "learning_rate": 3.2254016064257034e-05, + "loss": 0.2177, + "step": 6425 + }, + { + "epoch": 38.734939759036145, + "grad_norm": 3.1388111114501953, + "learning_rate": 3.227911646586346e-05, + "loss": 0.2011, + "step": 6430 + }, + { + "epoch": 38.765060240963855, + "grad_norm": 2.3266518115997314, + "learning_rate": 3.2304216867469886e-05, + "loss": 0.2693, + "step": 6435 + }, + { + "epoch": 38.795180722891565, + "grad_norm": 2.3568663597106934, + "learning_rate": 3.232931726907631e-05, + "loss": 0.2237, + "step": 6440 + }, + { + "epoch": 38.825301204819276, + "grad_norm": 2.961787462234497, + "learning_rate": 3.235441767068274e-05, + "loss": 0.2548, + "step": 6445 + }, + { + "epoch": 38.855421686746986, + "grad_norm": 3.2425358295440674, + "learning_rate": 3.237951807228915e-05, + "loss": 0.2585, + "step": 6450 + }, + { + "epoch": 38.8855421686747, + "grad_norm": 2.586047410964966, + "learning_rate": 3.240461847389558e-05, + "loss": 0.2849, + "step": 6455 + }, + { + "epoch": 38.91566265060241, + "grad_norm": 1.9064887762069702, + "learning_rate": 3.242971887550201e-05, + "loss": 0.2631, + "step": 6460 + }, + { + "epoch": 38.94578313253012, + "grad_norm": 2.6929991245269775, + "learning_rate": 3.245481927710843e-05, + "loss": 0.285, + "step": 6465 + }, + { + "epoch": 38.975903614457835, + "grad_norm": 2.097921133041382, + "learning_rate": 3.247991967871486e-05, + "loss": 0.2464, + "step": 6470 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.913094769956744, + "eval_auc": 0.9603937356014242, + "eval_f1": 0.8709865732632808, + "eval_loss": 0.23605474829673767, + "eval_precision": 0.8923444976076556, + "eval_recall": 0.8506271379703535, + "eval_runtime": 18.2756, + "eval_samples_per_second": 139.147, + "eval_steps_per_second": 0.711, + "step": 6474 + }, + { + "epoch": 39.006024096385545, + "grad_norm": 2.050915002822876, + "learning_rate": 3.2505020080321284e-05, + "loss": 0.2261, + "step": 6475 + }, + { + "epoch": 39.036144578313255, + "grad_norm": 2.389162302017212, + "learning_rate": 3.253012048192771e-05, + "loss": 0.2349, + "step": 6480 + }, + { + "epoch": 39.066265060240966, + "grad_norm": 2.4876036643981934, + "learning_rate": 3.2555220883534136e-05, + "loss": 0.222, + "step": 6485 + }, + { + "epoch": 39.096385542168676, + "grad_norm": 2.4081146717071533, + "learning_rate": 3.2580321285140565e-05, + "loss": 0.2289, + "step": 6490 + }, + { + "epoch": 39.126506024096386, + "grad_norm": 2.9568965435028076, + "learning_rate": 3.260542168674699e-05, + "loss": 0.2343, + "step": 6495 + }, + { + "epoch": 39.1566265060241, + "grad_norm": 2.8140311241149902, + "learning_rate": 3.2630522088353416e-05, + "loss": 0.2473, + "step": 6500 + }, + { + "epoch": 39.18674698795181, + "grad_norm": 3.025899648666382, + "learning_rate": 3.265562248995984e-05, + "loss": 0.2317, + "step": 6505 + }, + { + "epoch": 39.21686746987952, + "grad_norm": 3.627990245819092, + "learning_rate": 3.268072289156627e-05, + "loss": 0.2362, + "step": 6510 + }, + { + "epoch": 39.24698795180723, + "grad_norm": 1.3940659761428833, + "learning_rate": 3.2705823293172696e-05, + "loss": 0.2056, + "step": 6515 + }, + { + "epoch": 39.27710843373494, + "grad_norm": 2.9245965480804443, + "learning_rate": 3.273092369477912e-05, + "loss": 0.2311, + "step": 6520 + }, + { + "epoch": 39.30722891566265, + "grad_norm": 2.4287238121032715, + "learning_rate": 3.275602409638555e-05, + "loss": 0.2385, + "step": 6525 + }, + { + "epoch": 39.33734939759036, + "grad_norm": 2.023843765258789, + "learning_rate": 3.278112449799197e-05, + "loss": 0.2409, + "step": 6530 + }, + { + "epoch": 39.36746987951807, + "grad_norm": 1.6753655672073364, + "learning_rate": 3.28062248995984e-05, + "loss": 0.2207, + "step": 6535 + }, + { + "epoch": 39.397590361445786, + "grad_norm": 2.5869171619415283, + "learning_rate": 3.283132530120482e-05, + "loss": 0.2518, + "step": 6540 + }, + { + "epoch": 39.4277108433735, + "grad_norm": 1.6591705083847046, + "learning_rate": 3.2856425702811244e-05, + "loss": 0.2157, + "step": 6545 + }, + { + "epoch": 39.45783132530121, + "grad_norm": 1.6560969352722168, + "learning_rate": 3.288152610441767e-05, + "loss": 0.2458, + "step": 6550 + }, + { + "epoch": 39.48795180722892, + "grad_norm": 2.0736494064331055, + "learning_rate": 3.2906626506024095e-05, + "loss": 0.2072, + "step": 6555 + }, + { + "epoch": 39.51807228915663, + "grad_norm": 2.9606733322143555, + "learning_rate": 3.2931726907630524e-05, + "loss": 0.2252, + "step": 6560 + }, + { + "epoch": 39.54819277108434, + "grad_norm": 2.837667226791382, + "learning_rate": 3.2956827309236946e-05, + "loss": 0.2588, + "step": 6565 + }, + { + "epoch": 39.57831325301205, + "grad_norm": 2.7487363815307617, + "learning_rate": 3.2981927710843376e-05, + "loss": 0.269, + "step": 6570 + }, + { + "epoch": 39.60843373493976, + "grad_norm": 2.1929149627685547, + "learning_rate": 3.30070281124498e-05, + "loss": 0.2282, + "step": 6575 + }, + { + "epoch": 39.63855421686747, + "grad_norm": 3.401123046875, + "learning_rate": 3.303212851405623e-05, + "loss": 0.2388, + "step": 6580 + }, + { + "epoch": 39.66867469879518, + "grad_norm": 2.7502694129943848, + "learning_rate": 3.305722891566265e-05, + "loss": 0.2215, + "step": 6585 + }, + { + "epoch": 39.69879518072289, + "grad_norm": 2.4091484546661377, + "learning_rate": 3.308232931726908e-05, + "loss": 0.2476, + "step": 6590 + }, + { + "epoch": 39.7289156626506, + "grad_norm": 2.429182767868042, + "learning_rate": 3.310742971887551e-05, + "loss": 0.2619, + "step": 6595 + }, + { + "epoch": 39.75903614457831, + "grad_norm": 2.5643534660339355, + "learning_rate": 3.313253012048193e-05, + "loss": 0.2515, + "step": 6600 + }, + { + "epoch": 39.78915662650603, + "grad_norm": 1.7576724290847778, + "learning_rate": 3.315763052208836e-05, + "loss": 0.2011, + "step": 6605 + }, + { + "epoch": 39.81927710843374, + "grad_norm": 2.449651002883911, + "learning_rate": 3.318273092369478e-05, + "loss": 0.2431, + "step": 6610 + }, + { + "epoch": 39.84939759036145, + "grad_norm": 2.387871265411377, + "learning_rate": 3.320783132530121e-05, + "loss": 0.2842, + "step": 6615 + }, + { + "epoch": 39.87951807228916, + "grad_norm": 1.8376022577285767, + "learning_rate": 3.323293172690763e-05, + "loss": 0.2144, + "step": 6620 + }, + { + "epoch": 39.90963855421687, + "grad_norm": 3.066810131072998, + "learning_rate": 3.325803212851406e-05, + "loss": 0.2541, + "step": 6625 + }, + { + "epoch": 39.93975903614458, + "grad_norm": 2.554441213607788, + "learning_rate": 3.3283132530120484e-05, + "loss": 0.2243, + "step": 6630 + }, + { + "epoch": 39.96987951807229, + "grad_norm": 2.362246036529541, + "learning_rate": 3.3308232931726906e-05, + "loss": 0.2236, + "step": 6635 + }, + { + "epoch": 40.0, + "grad_norm": 2.932560682296753, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.3024, + "step": 6640 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9142744789618561, + "eval_auc": 0.9637652096186251, + "eval_f1": 0.874133949191686, + "eval_loss": 0.22493180632591248, + "eval_precision": 0.8853801169590644, + "eval_recall": 0.863169897377423, + "eval_runtime": 20.6328, + "eval_samples_per_second": 123.25, + "eval_steps_per_second": 0.63, + "step": 6640 + }, + { + "epoch": 40.03012048192771, + "grad_norm": 2.6266329288482666, + "learning_rate": 3.335843373493976e-05, + "loss": 0.2429, + "step": 6645 + }, + { + "epoch": 40.06024096385542, + "grad_norm": 1.4775539636611938, + "learning_rate": 3.3383534136546186e-05, + "loss": 0.2261, + "step": 6650 + }, + { + "epoch": 40.09036144578313, + "grad_norm": 2.8822290897369385, + "learning_rate": 3.340863453815261e-05, + "loss": 0.2474, + "step": 6655 + }, + { + "epoch": 40.12048192771084, + "grad_norm": 2.0075509548187256, + "learning_rate": 3.343373493975904e-05, + "loss": 0.2164, + "step": 6660 + }, + { + "epoch": 40.15060240963855, + "grad_norm": 1.9930087327957153, + "learning_rate": 3.345883534136546e-05, + "loss": 0.2185, + "step": 6665 + }, + { + "epoch": 40.18072289156626, + "grad_norm": 1.7394765615463257, + "learning_rate": 3.348393574297189e-05, + "loss": 0.2029, + "step": 6670 + }, + { + "epoch": 40.21084337349398, + "grad_norm": 2.443556785583496, + "learning_rate": 3.350903614457832e-05, + "loss": 0.2051, + "step": 6675 + }, + { + "epoch": 40.24096385542169, + "grad_norm": 2.26416015625, + "learning_rate": 3.353413654618474e-05, + "loss": 0.2313, + "step": 6680 + }, + { + "epoch": 40.2710843373494, + "grad_norm": 2.6214842796325684, + "learning_rate": 3.355923694779117e-05, + "loss": 0.2137, + "step": 6685 + }, + { + "epoch": 40.30120481927711, + "grad_norm": 3.211836814880371, + "learning_rate": 3.358433734939759e-05, + "loss": 0.2527, + "step": 6690 + }, + { + "epoch": 40.33132530120482, + "grad_norm": 2.4180943965911865, + "learning_rate": 3.360943775100402e-05, + "loss": 0.2209, + "step": 6695 + }, + { + "epoch": 40.36144578313253, + "grad_norm": 3.3650221824645996, + "learning_rate": 3.363453815261044e-05, + "loss": 0.2426, + "step": 6700 + }, + { + "epoch": 40.39156626506024, + "grad_norm": 2.210585832595825, + "learning_rate": 3.365963855421687e-05, + "loss": 0.216, + "step": 6705 + }, + { + "epoch": 40.42168674698795, + "grad_norm": 2.640681743621826, + "learning_rate": 3.3684738955823294e-05, + "loss": 0.2473, + "step": 6710 + }, + { + "epoch": 40.45180722891566, + "grad_norm": 2.018688917160034, + "learning_rate": 3.370983935742972e-05, + "loss": 0.2067, + "step": 6715 + }, + { + "epoch": 40.48192771084337, + "grad_norm": 2.212965965270996, + "learning_rate": 3.3734939759036146e-05, + "loss": 0.2308, + "step": 6720 + }, + { + "epoch": 40.51204819277108, + "grad_norm": 1.9449925422668457, + "learning_rate": 3.376004016064257e-05, + "loss": 0.2535, + "step": 6725 + }, + { + "epoch": 40.54216867469879, + "grad_norm": 2.9005157947540283, + "learning_rate": 3.3785140562249e-05, + "loss": 0.231, + "step": 6730 + }, + { + "epoch": 40.5722891566265, + "grad_norm": 2.57045578956604, + "learning_rate": 3.381024096385542e-05, + "loss": 0.2938, + "step": 6735 + }, + { + "epoch": 40.602409638554214, + "grad_norm": 2.8470585346221924, + "learning_rate": 3.383534136546185e-05, + "loss": 0.2453, + "step": 6740 + }, + { + "epoch": 40.63253012048193, + "grad_norm": 2.2170114517211914, + "learning_rate": 3.386044176706827e-05, + "loss": 0.2318, + "step": 6745 + }, + { + "epoch": 40.66265060240964, + "grad_norm": 2.3872134685516357, + "learning_rate": 3.38855421686747e-05, + "loss": 0.2196, + "step": 6750 + }, + { + "epoch": 40.69277108433735, + "grad_norm": 1.892056941986084, + "learning_rate": 3.391064257028113e-05, + "loss": 0.2559, + "step": 6755 + }, + { + "epoch": 40.72289156626506, + "grad_norm": 1.9791001081466675, + "learning_rate": 3.393574297188755e-05, + "loss": 0.2077, + "step": 6760 + }, + { + "epoch": 40.75301204819277, + "grad_norm": 1.5290873050689697, + "learning_rate": 3.396084337349398e-05, + "loss": 0.2508, + "step": 6765 + }, + { + "epoch": 40.78313253012048, + "grad_norm": 3.184123992919922, + "learning_rate": 3.39859437751004e-05, + "loss": 0.2223, + "step": 6770 + }, + { + "epoch": 40.81325301204819, + "grad_norm": 3.206636428833008, + "learning_rate": 3.401104417670683e-05, + "loss": 0.2824, + "step": 6775 + }, + { + "epoch": 40.8433734939759, + "grad_norm": 1.9404165744781494, + "learning_rate": 3.4036144578313254e-05, + "loss": 0.2262, + "step": 6780 + }, + { + "epoch": 40.873493975903614, + "grad_norm": 2.239152431488037, + "learning_rate": 3.406124497991968e-05, + "loss": 0.2436, + "step": 6785 + }, + { + "epoch": 40.903614457831324, + "grad_norm": 3.0163211822509766, + "learning_rate": 3.4086345381526105e-05, + "loss": 0.2324, + "step": 6790 + }, + { + "epoch": 40.933734939759034, + "grad_norm": 2.691326141357422, + "learning_rate": 3.4111445783132534e-05, + "loss": 0.2229, + "step": 6795 + }, + { + "epoch": 40.963855421686745, + "grad_norm": 2.370250940322876, + "learning_rate": 3.413654618473896e-05, + "loss": 0.2414, + "step": 6800 + }, + { + "epoch": 40.993975903614455, + "grad_norm": 1.2468866109848022, + "learning_rate": 3.416164658634538e-05, + "loss": 0.2063, + "step": 6805 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.906409752261109, + "eval_auc": 0.9662151063390008, + "eval_f1": 0.8491761723700887, + "eval_loss": 0.2846982181072235, + "eval_precision": 0.9557774607703281, + "eval_recall": 0.7639680729760547, + "eval_runtime": 16.9377, + "eval_samples_per_second": 150.138, + "eval_steps_per_second": 0.768, + "step": 6806 + }, + { + "epoch": 41.024096385542165, + "grad_norm": 3.1601436138153076, + "learning_rate": 3.418674698795181e-05, + "loss": 0.2414, + "step": 6810 + }, + { + "epoch": 41.05421686746988, + "grad_norm": 2.834940195083618, + "learning_rate": 3.421184738955823e-05, + "loss": 0.1988, + "step": 6815 + }, + { + "epoch": 41.08433734939759, + "grad_norm": 2.317030668258667, + "learning_rate": 3.423694779116466e-05, + "loss": 0.2078, + "step": 6820 + }, + { + "epoch": 41.1144578313253, + "grad_norm": 2.0118911266326904, + "learning_rate": 3.426204819277108e-05, + "loss": 0.2099, + "step": 6825 + }, + { + "epoch": 41.144578313253014, + "grad_norm": 1.7952815294265747, + "learning_rate": 3.428714859437751e-05, + "loss": 0.2267, + "step": 6830 + }, + { + "epoch": 41.174698795180724, + "grad_norm": 3.234902858734131, + "learning_rate": 3.431224899598394e-05, + "loss": 0.2371, + "step": 6835 + }, + { + "epoch": 41.204819277108435, + "grad_norm": 2.2863574028015137, + "learning_rate": 3.433734939759036e-05, + "loss": 0.2259, + "step": 6840 + }, + { + "epoch": 41.234939759036145, + "grad_norm": 2.8692567348480225, + "learning_rate": 3.436244979919679e-05, + "loss": 0.2237, + "step": 6845 + }, + { + "epoch": 41.265060240963855, + "grad_norm": 2.949395179748535, + "learning_rate": 3.438755020080321e-05, + "loss": 0.2644, + "step": 6850 + }, + { + "epoch": 41.295180722891565, + "grad_norm": 2.0119643211364746, + "learning_rate": 3.441265060240964e-05, + "loss": 0.2103, + "step": 6855 + }, + { + "epoch": 41.325301204819276, + "grad_norm": 2.5554463863372803, + "learning_rate": 3.4437751004016065e-05, + "loss": 0.2422, + "step": 6860 + }, + { + "epoch": 41.355421686746986, + "grad_norm": 2.153459310531616, + "learning_rate": 3.4462851405622494e-05, + "loss": 0.251, + "step": 6865 + }, + { + "epoch": 41.3855421686747, + "grad_norm": 3.219743013381958, + "learning_rate": 3.4487951807228916e-05, + "loss": 0.2173, + "step": 6870 + }, + { + "epoch": 41.41566265060241, + "grad_norm": 2.026254892349243, + "learning_rate": 3.4513052208835345e-05, + "loss": 0.236, + "step": 6875 + }, + { + "epoch": 41.44578313253012, + "grad_norm": 3.212785482406616, + "learning_rate": 3.4538152610441774e-05, + "loss": 0.2829, + "step": 6880 + }, + { + "epoch": 41.475903614457835, + "grad_norm": 2.520616292953491, + "learning_rate": 3.4563253012048196e-05, + "loss": 0.2315, + "step": 6885 + }, + { + "epoch": 41.506024096385545, + "grad_norm": 2.249114513397217, + "learning_rate": 3.4588353413654625e-05, + "loss": 0.254, + "step": 6890 + }, + { + "epoch": 41.536144578313255, + "grad_norm": 2.5036075115203857, + "learning_rate": 3.461345381526104e-05, + "loss": 0.2379, + "step": 6895 + }, + { + "epoch": 41.566265060240966, + "grad_norm": 2.1759984493255615, + "learning_rate": 3.463855421686747e-05, + "loss": 0.2192, + "step": 6900 + }, + { + "epoch": 41.596385542168676, + "grad_norm": 3.1835083961486816, + "learning_rate": 3.466365461847389e-05, + "loss": 0.2439, + "step": 6905 + }, + { + "epoch": 41.626506024096386, + "grad_norm": 1.9755939245224, + "learning_rate": 3.468875502008032e-05, + "loss": 0.218, + "step": 6910 + }, + { + "epoch": 41.6566265060241, + "grad_norm": 2.0728402137756348, + "learning_rate": 3.471385542168675e-05, + "loss": 0.2295, + "step": 6915 + }, + { + "epoch": 41.68674698795181, + "grad_norm": 2.078345775604248, + "learning_rate": 3.473895582329317e-05, + "loss": 0.2214, + "step": 6920 + }, + { + "epoch": 41.71686746987952, + "grad_norm": 2.592635154724121, + "learning_rate": 3.47640562248996e-05, + "loss": 0.2701, + "step": 6925 + }, + { + "epoch": 41.74698795180723, + "grad_norm": 2.844268321990967, + "learning_rate": 3.4789156626506024e-05, + "loss": 0.2432, + "step": 6930 + }, + { + "epoch": 41.77710843373494, + "grad_norm": 2.249371290206909, + "learning_rate": 3.481425702811245e-05, + "loss": 0.2246, + "step": 6935 + }, + { + "epoch": 41.80722891566265, + "grad_norm": 2.175766944885254, + "learning_rate": 3.4839357429718875e-05, + "loss": 0.2312, + "step": 6940 + }, + { + "epoch": 41.83734939759036, + "grad_norm": 2.340944766998291, + "learning_rate": 3.4864457831325304e-05, + "loss": 0.2527, + "step": 6945 + }, + { + "epoch": 41.86746987951807, + "grad_norm": 2.598799705505371, + "learning_rate": 3.488955823293173e-05, + "loss": 0.2205, + "step": 6950 + }, + { + "epoch": 41.897590361445786, + "grad_norm": 2.0303046703338623, + "learning_rate": 3.4914658634538156e-05, + "loss": 0.2323, + "step": 6955 + }, + { + "epoch": 41.9277108433735, + "grad_norm": 2.12449049949646, + "learning_rate": 3.4939759036144585e-05, + "loss": 0.2037, + "step": 6960 + }, + { + "epoch": 41.95783132530121, + "grad_norm": 2.0875773429870605, + "learning_rate": 3.496485943775101e-05, + "loss": 0.1861, + "step": 6965 + }, + { + "epoch": 41.98795180722892, + "grad_norm": 1.7724308967590332, + "learning_rate": 3.4989959839357436e-05, + "loss": 0.2293, + "step": 6970 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.9182068423122296, + "eval_auc": 0.9686663719079422, + "eval_f1": 0.8751500600240096, + "eval_loss": 0.22627486288547516, + "eval_precision": 0.9239543726235742, + "eval_recall": 0.8312428734321551, + "eval_runtime": 17.6652, + "eval_samples_per_second": 143.956, + "eval_steps_per_second": 0.736, + "step": 6972 + }, + { + "epoch": 42.01807228915663, + "grad_norm": 3.5082550048828125, + "learning_rate": 3.501506024096386e-05, + "loss": 0.2477, + "step": 6975 + }, + { + "epoch": 42.04819277108434, + "grad_norm": 2.38348388671875, + "learning_rate": 3.504016064257029e-05, + "loss": 0.1945, + "step": 6980 + }, + { + "epoch": 42.07831325301205, + "grad_norm": 2.430093765258789, + "learning_rate": 3.50652610441767e-05, + "loss": 0.2482, + "step": 6985 + }, + { + "epoch": 42.10843373493976, + "grad_norm": 2.023977279663086, + "learning_rate": 3.509036144578313e-05, + "loss": 0.2388, + "step": 6990 + }, + { + "epoch": 42.13855421686747, + "grad_norm": 2.185081720352173, + "learning_rate": 3.511546184738956e-05, + "loss": 0.2147, + "step": 6995 + }, + { + "epoch": 42.16867469879518, + "grad_norm": 2.2125322818756104, + "learning_rate": 3.5140562248995983e-05, + "loss": 0.2178, + "step": 7000 + }, + { + "epoch": 42.19879518072289, + "grad_norm": 1.4559276103973389, + "learning_rate": 3.516566265060241e-05, + "loss": 0.2033, + "step": 7005 + }, + { + "epoch": 42.2289156626506, + "grad_norm": 1.966293454170227, + "learning_rate": 3.5190763052208835e-05, + "loss": 0.2295, + "step": 7010 + }, + { + "epoch": 42.25903614457831, + "grad_norm": 2.518218994140625, + "learning_rate": 3.5215863453815264e-05, + "loss": 0.2385, + "step": 7015 + }, + { + "epoch": 42.28915662650602, + "grad_norm": 3.4173643589019775, + "learning_rate": 3.5240963855421686e-05, + "loss": 0.2373, + "step": 7020 + }, + { + "epoch": 42.31927710843374, + "grad_norm": 1.9910823106765747, + "learning_rate": 3.5266064257028115e-05, + "loss": 0.2021, + "step": 7025 + }, + { + "epoch": 42.34939759036145, + "grad_norm": 2.9118754863739014, + "learning_rate": 3.529116465863454e-05, + "loss": 0.2254, + "step": 7030 + }, + { + "epoch": 42.37951807228916, + "grad_norm": 1.894925832748413, + "learning_rate": 3.5316265060240967e-05, + "loss": 0.1968, + "step": 7035 + }, + { + "epoch": 42.40963855421687, + "grad_norm": 2.5969414710998535, + "learning_rate": 3.534136546184739e-05, + "loss": 0.2411, + "step": 7040 + }, + { + "epoch": 42.43975903614458, + "grad_norm": 3.139529228210449, + "learning_rate": 3.536646586345382e-05, + "loss": 0.2214, + "step": 7045 + }, + { + "epoch": 42.46987951807229, + "grad_norm": 2.6836605072021484, + "learning_rate": 3.539156626506025e-05, + "loss": 0.2248, + "step": 7050 + }, + { + "epoch": 42.5, + "grad_norm": 1.436519742012024, + "learning_rate": 3.541666666666667e-05, + "loss": 0.2297, + "step": 7055 + }, + { + "epoch": 42.53012048192771, + "grad_norm": 1.9998631477355957, + "learning_rate": 3.54417670682731e-05, + "loss": 0.2392, + "step": 7060 + }, + { + "epoch": 42.56024096385542, + "grad_norm": 1.3359869718551636, + "learning_rate": 3.546686746987952e-05, + "loss": 0.2042, + "step": 7065 + }, + { + "epoch": 42.59036144578313, + "grad_norm": 2.925895929336548, + "learning_rate": 3.549196787148594e-05, + "loss": 0.2243, + "step": 7070 + }, + { + "epoch": 42.62048192771084, + "grad_norm": 2.230924606323242, + "learning_rate": 3.551706827309237e-05, + "loss": 0.2574, + "step": 7075 + }, + { + "epoch": 42.65060240963855, + "grad_norm": 1.888053059577942, + "learning_rate": 3.5542168674698794e-05, + "loss": 0.255, + "step": 7080 + }, + { + "epoch": 42.68072289156626, + "grad_norm": 3.2984719276428223, + "learning_rate": 3.556726907630522e-05, + "loss": 0.2314, + "step": 7085 + }, + { + "epoch": 42.71084337349397, + "grad_norm": 2.741931200027466, + "learning_rate": 3.5592369477911646e-05, + "loss": 0.2333, + "step": 7090 + }, + { + "epoch": 42.74096385542169, + "grad_norm": 2.12164044380188, + "learning_rate": 3.5617469879518075e-05, + "loss": 0.2533, + "step": 7095 + }, + { + "epoch": 42.7710843373494, + "grad_norm": 2.296083688735962, + "learning_rate": 3.56425702811245e-05, + "loss": 0.2438, + "step": 7100 + }, + { + "epoch": 42.80120481927711, + "grad_norm": 2.5614187717437744, + "learning_rate": 3.5667670682730926e-05, + "loss": 0.2952, + "step": 7105 + }, + { + "epoch": 42.83132530120482, + "grad_norm": 3.353217363357544, + "learning_rate": 3.569277108433735e-05, + "loss": 0.2304, + "step": 7110 + }, + { + "epoch": 42.86144578313253, + "grad_norm": 1.8721473217010498, + "learning_rate": 3.571787148594378e-05, + "loss": 0.2587, + "step": 7115 + }, + { + "epoch": 42.89156626506024, + "grad_norm": 2.8771235942840576, + "learning_rate": 3.57429718875502e-05, + "loss": 0.2924, + "step": 7120 + }, + { + "epoch": 42.92168674698795, + "grad_norm": 2.3226511478424072, + "learning_rate": 3.576807228915663e-05, + "loss": 0.2435, + "step": 7125 + }, + { + "epoch": 42.95180722891566, + "grad_norm": 2.316760540008545, + "learning_rate": 3.579317269076306e-05, + "loss": 0.2272, + "step": 7130 + }, + { + "epoch": 42.98192771084337, + "grad_norm": 2.159874439239502, + "learning_rate": 3.581827309236948e-05, + "loss": 0.2165, + "step": 7135 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.9071962249311837, + "eval_auc": 0.959778780383305, + "eval_f1": 0.8567961165048543, + "eval_loss": 0.26574328541755676, + "eval_precision": 0.9156939040207522, + "eval_recall": 0.8050171037628279, + "eval_runtime": 17.2547, + "eval_samples_per_second": 147.38, + "eval_steps_per_second": 0.753, + "step": 7138 + }, + { + "epoch": 43.01204819277108, + "grad_norm": 1.6516984701156616, + "learning_rate": 3.584337349397591e-05, + "loss": 0.2088, + "step": 7140 + }, + { + "epoch": 43.04216867469879, + "grad_norm": 2.384612798690796, + "learning_rate": 3.586847389558233e-05, + "loss": 0.2276, + "step": 7145 + }, + { + "epoch": 43.0722891566265, + "grad_norm": 1.8337193727493286, + "learning_rate": 3.589357429718876e-05, + "loss": 0.1962, + "step": 7150 + }, + { + "epoch": 43.102409638554214, + "grad_norm": 1.7397912740707397, + "learning_rate": 3.591867469879518e-05, + "loss": 0.2244, + "step": 7155 + }, + { + "epoch": 43.13253012048193, + "grad_norm": 2.2589216232299805, + "learning_rate": 3.5943775100401605e-05, + "loss": 0.2308, + "step": 7160 + }, + { + "epoch": 43.16265060240964, + "grad_norm": 1.7657392024993896, + "learning_rate": 3.5968875502008034e-05, + "loss": 0.2065, + "step": 7165 + }, + { + "epoch": 43.19277108433735, + "grad_norm": 2.2116403579711914, + "learning_rate": 3.5993975903614456e-05, + "loss": 0.1994, + "step": 7170 + }, + { + "epoch": 43.22289156626506, + "grad_norm": 2.4515957832336426, + "learning_rate": 3.6019076305220885e-05, + "loss": 0.2301, + "step": 7175 + }, + { + "epoch": 43.25301204819277, + "grad_norm": 2.409895181655884, + "learning_rate": 3.604417670682731e-05, + "loss": 0.2628, + "step": 7180 + }, + { + "epoch": 43.28313253012048, + "grad_norm": 1.354292869567871, + "learning_rate": 3.606927710843374e-05, + "loss": 0.1978, + "step": 7185 + }, + { + "epoch": 43.31325301204819, + "grad_norm": 2.4475462436676025, + "learning_rate": 3.609437751004016e-05, + "loss": 0.234, + "step": 7190 + }, + { + "epoch": 43.3433734939759, + "grad_norm": 1.8721057176589966, + "learning_rate": 3.611947791164659e-05, + "loss": 0.1964, + "step": 7195 + }, + { + "epoch": 43.373493975903614, + "grad_norm": 1.691802740097046, + "learning_rate": 3.614457831325301e-05, + "loss": 0.2253, + "step": 7200 + }, + { + "epoch": 43.403614457831324, + "grad_norm": 2.6589553356170654, + "learning_rate": 3.616967871485944e-05, + "loss": 0.2063, + "step": 7205 + }, + { + "epoch": 43.433734939759034, + "grad_norm": 3.7984652519226074, + "learning_rate": 3.619477911646587e-05, + "loss": 0.2669, + "step": 7210 + }, + { + "epoch": 43.463855421686745, + "grad_norm": 2.075361728668213, + "learning_rate": 3.621987951807229e-05, + "loss": 0.2199, + "step": 7215 + }, + { + "epoch": 43.493975903614455, + "grad_norm": 2.824955940246582, + "learning_rate": 3.624497991967872e-05, + "loss": 0.2655, + "step": 7220 + }, + { + "epoch": 43.524096385542165, + "grad_norm": 2.5480446815490723, + "learning_rate": 3.627008032128514e-05, + "loss": 0.2485, + "step": 7225 + }, + { + "epoch": 43.55421686746988, + "grad_norm": 1.8091715574264526, + "learning_rate": 3.629518072289157e-05, + "loss": 0.2128, + "step": 7230 + }, + { + "epoch": 43.58433734939759, + "grad_norm": 1.4671165943145752, + "learning_rate": 3.6320281124497993e-05, + "loss": 0.2471, + "step": 7235 + }, + { + "epoch": 43.6144578313253, + "grad_norm": 2.3526570796966553, + "learning_rate": 3.634538152610442e-05, + "loss": 0.2348, + "step": 7240 + }, + { + "epoch": 43.644578313253014, + "grad_norm": 2.2521297931671143, + "learning_rate": 3.6370481927710845e-05, + "loss": 0.1954, + "step": 7245 + }, + { + "epoch": 43.674698795180724, + "grad_norm": 2.4531917572021484, + "learning_rate": 3.639558232931727e-05, + "loss": 0.268, + "step": 7250 + }, + { + "epoch": 43.704819277108435, + "grad_norm": 1.406149983406067, + "learning_rate": 3.6420682730923696e-05, + "loss": 0.2, + "step": 7255 + }, + { + "epoch": 43.734939759036145, + "grad_norm": 1.7887499332427979, + "learning_rate": 3.644578313253012e-05, + "loss": 0.183, + "step": 7260 + }, + { + "epoch": 43.765060240963855, + "grad_norm": 3.3391366004943848, + "learning_rate": 3.647088353413655e-05, + "loss": 0.2299, + "step": 7265 + }, + { + "epoch": 43.795180722891565, + "grad_norm": 2.490065574645996, + "learning_rate": 3.649598393574297e-05, + "loss": 0.2554, + "step": 7270 + }, + { + "epoch": 43.825301204819276, + "grad_norm": 2.691765069961548, + "learning_rate": 3.65210843373494e-05, + "loss": 0.2079, + "step": 7275 + }, + { + "epoch": 43.855421686746986, + "grad_norm": 1.9577827453613281, + "learning_rate": 3.654618473895582e-05, + "loss": 0.2438, + "step": 7280 + }, + { + "epoch": 43.8855421686747, + "grad_norm": 2.4634768962860107, + "learning_rate": 3.657128514056225e-05, + "loss": 0.2022, + "step": 7285 + }, + { + "epoch": 43.91566265060241, + "grad_norm": 1.8126288652420044, + "learning_rate": 3.659638554216868e-05, + "loss": 0.2265, + "step": 7290 + }, + { + "epoch": 43.94578313253012, + "grad_norm": 3.2564098834991455, + "learning_rate": 3.66214859437751e-05, + "loss": 0.2232, + "step": 7295 + }, + { + "epoch": 43.975903614457835, + "grad_norm": 1.973140835762024, + "learning_rate": 3.664658634538153e-05, + "loss": 0.262, + "step": 7300 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.9134880062917814, + "eval_auc": 0.965799318587184, + "eval_f1": 0.8784530386740331, + "eval_loss": 0.23760782182216644, + "eval_precision": 0.8520900321543409, + "eval_recall": 0.9064994298745724, + "eval_runtime": 18.138, + "eval_samples_per_second": 140.203, + "eval_steps_per_second": 0.717, + "step": 7304 + }, + { + "epoch": 44.006024096385545, + "grad_norm": 3.11803936958313, + "learning_rate": 3.667168674698795e-05, + "loss": 0.2606, + "step": 7305 + }, + { + "epoch": 44.036144578313255, + "grad_norm": 1.4260988235473633, + "learning_rate": 3.669678714859438e-05, + "loss": 0.1956, + "step": 7310 + }, + { + "epoch": 44.066265060240966, + "grad_norm": 2.298755168914795, + "learning_rate": 3.6721887550200804e-05, + "loss": 0.221, + "step": 7315 + }, + { + "epoch": 44.096385542168676, + "grad_norm": 2.2086496353149414, + "learning_rate": 3.674698795180723e-05, + "loss": 0.2416, + "step": 7320 + }, + { + "epoch": 44.126506024096386, + "grad_norm": 2.1699838638305664, + "learning_rate": 3.6772088353413656e-05, + "loss": 0.2039, + "step": 7325 + }, + { + "epoch": 44.1566265060241, + "grad_norm": 2.87320613861084, + "learning_rate": 3.6797188755020085e-05, + "loss": 0.2373, + "step": 7330 + }, + { + "epoch": 44.18674698795181, + "grad_norm": 2.5652034282684326, + "learning_rate": 3.6822289156626514e-05, + "loss": 0.2004, + "step": 7335 + }, + { + "epoch": 44.21686746987952, + "grad_norm": 3.0507736206054688, + "learning_rate": 3.684738955823293e-05, + "loss": 0.2416, + "step": 7340 + }, + { + "epoch": 44.24698795180723, + "grad_norm": 2.3488194942474365, + "learning_rate": 3.687248995983936e-05, + "loss": 0.2158, + "step": 7345 + }, + { + "epoch": 44.27710843373494, + "grad_norm": 1.9353269338607788, + "learning_rate": 3.689759036144578e-05, + "loss": 0.2115, + "step": 7350 + }, + { + "epoch": 44.30722891566265, + "grad_norm": 2.5070390701293945, + "learning_rate": 3.692269076305221e-05, + "loss": 0.2172, + "step": 7355 + }, + { + "epoch": 44.33734939759036, + "grad_norm": 2.123479127883911, + "learning_rate": 3.694779116465863e-05, + "loss": 0.213, + "step": 7360 + }, + { + "epoch": 44.36746987951807, + "grad_norm": 1.4485774040222168, + "learning_rate": 3.697289156626506e-05, + "loss": 0.1941, + "step": 7365 + }, + { + "epoch": 44.397590361445786, + "grad_norm": 1.6342570781707764, + "learning_rate": 3.699799196787149e-05, + "loss": 0.2088, + "step": 7370 + }, + { + "epoch": 44.4277108433735, + "grad_norm": 1.9521421194076538, + "learning_rate": 3.702309236947791e-05, + "loss": 0.2017, + "step": 7375 + }, + { + "epoch": 44.45783132530121, + "grad_norm": 2.1190085411071777, + "learning_rate": 3.704819277108434e-05, + "loss": 0.2221, + "step": 7380 + }, + { + "epoch": 44.48795180722892, + "grad_norm": 2.233778953552246, + "learning_rate": 3.7073293172690764e-05, + "loss": 0.1708, + "step": 7385 + }, + { + "epoch": 44.51807228915663, + "grad_norm": 1.870052456855774, + "learning_rate": 3.709839357429719e-05, + "loss": 0.2155, + "step": 7390 + }, + { + "epoch": 44.54819277108434, + "grad_norm": 2.6740262508392334, + "learning_rate": 3.7123493975903615e-05, + "loss": 0.2393, + "step": 7395 + }, + { + "epoch": 44.57831325301205, + "grad_norm": 1.830087423324585, + "learning_rate": 3.7148594377510044e-05, + "loss": 0.2365, + "step": 7400 + }, + { + "epoch": 44.60843373493976, + "grad_norm": 3.084540367126465, + "learning_rate": 3.7173694779116466e-05, + "loss": 0.2712, + "step": 7405 + }, + { + "epoch": 44.63855421686747, + "grad_norm": 1.879089593887329, + "learning_rate": 3.7198795180722895e-05, + "loss": 0.228, + "step": 7410 + }, + { + "epoch": 44.66867469879518, + "grad_norm": 2.8133385181427, + "learning_rate": 3.7223895582329324e-05, + "loss": 0.2436, + "step": 7415 + }, + { + "epoch": 44.69879518072289, + "grad_norm": 2.9523673057556152, + "learning_rate": 3.724899598393575e-05, + "loss": 0.2195, + "step": 7420 + }, + { + "epoch": 44.7289156626506, + "grad_norm": 2.7336349487304688, + "learning_rate": 3.7274096385542176e-05, + "loss": 0.2222, + "step": 7425 + }, + { + "epoch": 44.75903614457831, + "grad_norm": 1.8540711402893066, + "learning_rate": 3.729919678714859e-05, + "loss": 0.2195, + "step": 7430 + }, + { + "epoch": 44.78915662650603, + "grad_norm": 1.6824413537979126, + "learning_rate": 3.732429718875502e-05, + "loss": 0.2417, + "step": 7435 + }, + { + "epoch": 44.81927710843374, + "grad_norm": 2.8414008617401123, + "learning_rate": 3.734939759036144e-05, + "loss": 0.2428, + "step": 7440 + }, + { + "epoch": 44.84939759036145, + "grad_norm": 2.3256123065948486, + "learning_rate": 3.737449799196787e-05, + "loss": 0.2146, + "step": 7445 + }, + { + "epoch": 44.87951807228916, + "grad_norm": 2.0897772312164307, + "learning_rate": 3.73995983935743e-05, + "loss": 0.258, + "step": 7450 + }, + { + "epoch": 44.90963855421687, + "grad_norm": 1.9921648502349854, + "learning_rate": 3.742469879518072e-05, + "loss": 0.1894, + "step": 7455 + }, + { + "epoch": 44.93975903614458, + "grad_norm": 1.5796568393707275, + "learning_rate": 3.744979919678715e-05, + "loss": 0.2267, + "step": 7460 + }, + { + "epoch": 44.96987951807229, + "grad_norm": 2.159181594848633, + "learning_rate": 3.7474899598393574e-05, + "loss": 0.2554, + "step": 7465 + }, + { + "epoch": 45.0, + "grad_norm": 2.91159725189209, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.2056, + "step": 7470 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.9154541879669682, + "eval_auc": 0.9655703786645787, + "eval_f1": 0.8743424897720631, + "eval_loss": 0.23049841821193695, + "eval_precision": 0.8968824940047961, + "eval_recall": 0.8529076396807298, + "eval_runtime": 17.0235, + "eval_samples_per_second": 149.381, + "eval_steps_per_second": 0.764, + "step": 7470 + }, + { + "epoch": 45.03012048192771, + "grad_norm": 2.273345708847046, + "learning_rate": 3.7525100401606426e-05, + "loss": 0.2279, + "step": 7475 + }, + { + "epoch": 45.06024096385542, + "grad_norm": 1.93620765209198, + "learning_rate": 3.7550200803212855e-05, + "loss": 0.2348, + "step": 7480 + }, + { + "epoch": 45.09036144578313, + "grad_norm": 2.0208992958068848, + "learning_rate": 3.757530120481928e-05, + "loss": 0.2378, + "step": 7485 + }, + { + "epoch": 45.12048192771084, + "grad_norm": 1.7389707565307617, + "learning_rate": 3.7600401606425706e-05, + "loss": 0.2, + "step": 7490 + }, + { + "epoch": 45.15060240963855, + "grad_norm": 2.7836644649505615, + "learning_rate": 3.7625502008032135e-05, + "loss": 0.2091, + "step": 7495 + }, + { + "epoch": 45.18072289156626, + "grad_norm": 1.9033209085464478, + "learning_rate": 3.765060240963856e-05, + "loss": 0.1727, + "step": 7500 + }, + { + "epoch": 45.21084337349398, + "grad_norm": 2.405941963195801, + "learning_rate": 3.7675702811244987e-05, + "loss": 0.2401, + "step": 7505 + }, + { + "epoch": 45.24096385542169, + "grad_norm": 1.9806922674179077, + "learning_rate": 3.770080321285141e-05, + "loss": 0.2247, + "step": 7510 + }, + { + "epoch": 45.2710843373494, + "grad_norm": 2.2501771450042725, + "learning_rate": 3.772590361445783e-05, + "loss": 0.211, + "step": 7515 + }, + { + "epoch": 45.30120481927711, + "grad_norm": 2.0017693042755127, + "learning_rate": 3.7751004016064253e-05, + "loss": 0.2372, + "step": 7520 + }, + { + "epoch": 45.33132530120482, + "grad_norm": 1.948451280593872, + "learning_rate": 3.777610441767068e-05, + "loss": 0.208, + "step": 7525 + }, + { + "epoch": 45.36144578313253, + "grad_norm": 2.418311357498169, + "learning_rate": 3.780120481927711e-05, + "loss": 0.1952, + "step": 7530 + }, + { + "epoch": 45.39156626506024, + "grad_norm": 2.2427890300750732, + "learning_rate": 3.7826305220883534e-05, + "loss": 0.2271, + "step": 7535 + }, + { + "epoch": 45.42168674698795, + "grad_norm": 2.134998083114624, + "learning_rate": 3.785140562248996e-05, + "loss": 0.2197, + "step": 7540 + }, + { + "epoch": 45.45180722891566, + "grad_norm": 1.6235309839248657, + "learning_rate": 3.7876506024096385e-05, + "loss": 0.2216, + "step": 7545 + }, + { + "epoch": 45.48192771084337, + "grad_norm": 2.081052780151367, + "learning_rate": 3.7901606425702814e-05, + "loss": 0.2205, + "step": 7550 + }, + { + "epoch": 45.51204819277108, + "grad_norm": 2.2003960609436035, + "learning_rate": 3.7926706827309237e-05, + "loss": 0.1913, + "step": 7555 + }, + { + "epoch": 45.54216867469879, + "grad_norm": 2.2834722995758057, + "learning_rate": 3.7951807228915666e-05, + "loss": 0.2239, + "step": 7560 + }, + { + "epoch": 45.5722891566265, + "grad_norm": 2.1172125339508057, + "learning_rate": 3.797690763052209e-05, + "loss": 0.1776, + "step": 7565 + }, + { + "epoch": 45.602409638554214, + "grad_norm": 2.7145802974700928, + "learning_rate": 3.800200803212852e-05, + "loss": 0.228, + "step": 7570 + }, + { + "epoch": 45.63253012048193, + "grad_norm": 2.9677984714508057, + "learning_rate": 3.802710843373494e-05, + "loss": 0.1937, + "step": 7575 + }, + { + "epoch": 45.66265060240964, + "grad_norm": 2.65963077545166, + "learning_rate": 3.805220883534137e-05, + "loss": 0.192, + "step": 7580 + }, + { + "epoch": 45.69277108433735, + "grad_norm": 2.723327398300171, + "learning_rate": 3.80773092369478e-05, + "loss": 0.2364, + "step": 7585 + }, + { + "epoch": 45.72289156626506, + "grad_norm": 2.2919390201568604, + "learning_rate": 3.810240963855422e-05, + "loss": 0.2316, + "step": 7590 + }, + { + "epoch": 45.75301204819277, + "grad_norm": 2.0183589458465576, + "learning_rate": 3.812751004016065e-05, + "loss": 0.1891, + "step": 7595 + }, + { + "epoch": 45.78313253012048, + "grad_norm": 2.712099552154541, + "learning_rate": 3.815261044176707e-05, + "loss": 0.2174, + "step": 7600 + }, + { + "epoch": 45.81325301204819, + "grad_norm": 3.526670217514038, + "learning_rate": 3.817771084337349e-05, + "loss": 0.2729, + "step": 7605 + }, + { + "epoch": 45.8433734939759, + "grad_norm": 2.002732515335083, + "learning_rate": 3.820281124497992e-05, + "loss": 0.2192, + "step": 7610 + }, + { + "epoch": 45.873493975903614, + "grad_norm": 3.051781177520752, + "learning_rate": 3.8227911646586345e-05, + "loss": 0.2627, + "step": 7615 + }, + { + "epoch": 45.903614457831324, + "grad_norm": 2.3048646450042725, + "learning_rate": 3.8253012048192774e-05, + "loss": 0.223, + "step": 7620 + }, + { + "epoch": 45.933734939759034, + "grad_norm": 2.2568962574005127, + "learning_rate": 3.8278112449799196e-05, + "loss": 0.2222, + "step": 7625 + }, + { + "epoch": 45.963855421686745, + "grad_norm": 2.023124933242798, + "learning_rate": 3.8303212851405625e-05, + "loss": 0.2063, + "step": 7630 + }, + { + "epoch": 45.993975903614455, + "grad_norm": 1.6241674423217773, + "learning_rate": 3.832831325301205e-05, + "loss": 0.1744, + "step": 7635 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.918600078647267, + "eval_auc": 0.9651871010661962, + "eval_f1": 0.8743169398907104, + "eval_loss": 0.25582775473594666, + "eval_precision": 0.935064935064935, + "eval_recall": 0.8209806157354618, + "eval_runtime": 17.1337, + "eval_samples_per_second": 148.421, + "eval_steps_per_second": 0.759, + "step": 7636 + }, + { + "epoch": 46.024096385542165, + "grad_norm": 2.8567662239074707, + "learning_rate": 3.8353413654618476e-05, + "loss": 0.2164, + "step": 7640 + }, + { + "epoch": 46.05421686746988, + "grad_norm": 2.854685068130493, + "learning_rate": 3.83785140562249e-05, + "loss": 0.2242, + "step": 7645 + }, + { + "epoch": 46.08433734939759, + "grad_norm": 2.8304920196533203, + "learning_rate": 3.840361445783133e-05, + "loss": 0.1927, + "step": 7650 + }, + { + "epoch": 46.1144578313253, + "grad_norm": 1.38346529006958, + "learning_rate": 3.842871485943775e-05, + "loss": 0.1966, + "step": 7655 + }, + { + "epoch": 46.144578313253014, + "grad_norm": 2.5765368938446045, + "learning_rate": 3.845381526104418e-05, + "loss": 0.1855, + "step": 7660 + }, + { + "epoch": 46.174698795180724, + "grad_norm": 1.645381212234497, + "learning_rate": 3.847891566265061e-05, + "loss": 0.1507, + "step": 7665 + }, + { + "epoch": 46.204819277108435, + "grad_norm": 2.0395102500915527, + "learning_rate": 3.850401606425703e-05, + "loss": 0.2106, + "step": 7670 + }, + { + "epoch": 46.234939759036145, + "grad_norm": 2.4283220767974854, + "learning_rate": 3.852911646586346e-05, + "loss": 0.2174, + "step": 7675 + }, + { + "epoch": 46.265060240963855, + "grad_norm": 2.396634340286255, + "learning_rate": 3.855421686746988e-05, + "loss": 0.2345, + "step": 7680 + }, + { + "epoch": 46.295180722891565, + "grad_norm": 3.298989772796631, + "learning_rate": 3.857931726907631e-05, + "loss": 0.227, + "step": 7685 + }, + { + "epoch": 46.325301204819276, + "grad_norm": 2.302367687225342, + "learning_rate": 3.860441767068273e-05, + "loss": 0.2717, + "step": 7690 + }, + { + "epoch": 46.355421686746986, + "grad_norm": 1.4514416456222534, + "learning_rate": 3.8629518072289155e-05, + "loss": 0.2327, + "step": 7695 + }, + { + "epoch": 46.3855421686747, + "grad_norm": 1.3491817712783813, + "learning_rate": 3.8654618473895584e-05, + "loss": 0.213, + "step": 7700 + }, + { + "epoch": 46.41566265060241, + "grad_norm": 2.660956382751465, + "learning_rate": 3.867971887550201e-05, + "loss": 0.2314, + "step": 7705 + }, + { + "epoch": 46.44578313253012, + "grad_norm": 2.8108885288238525, + "learning_rate": 3.8704819277108436e-05, + "loss": 0.2305, + "step": 7710 + }, + { + "epoch": 46.475903614457835, + "grad_norm": 2.0226879119873047, + "learning_rate": 3.872991967871486e-05, + "loss": 0.2256, + "step": 7715 + }, + { + "epoch": 46.506024096385545, + "grad_norm": 2.303884506225586, + "learning_rate": 3.875502008032129e-05, + "loss": 0.2363, + "step": 7720 + }, + { + "epoch": 46.536144578313255, + "grad_norm": 1.635791540145874, + "learning_rate": 3.878012048192771e-05, + "loss": 0.2129, + "step": 7725 + }, + { + "epoch": 46.566265060240966, + "grad_norm": 1.936125636100769, + "learning_rate": 3.880522088353414e-05, + "loss": 0.2077, + "step": 7730 + }, + { + "epoch": 46.596385542168676, + "grad_norm": 2.263965368270874, + "learning_rate": 3.883032128514056e-05, + "loss": 0.2505, + "step": 7735 + }, + { + "epoch": 46.626506024096386, + "grad_norm": 2.2494423389434814, + "learning_rate": 3.885542168674699e-05, + "loss": 0.229, + "step": 7740 + }, + { + "epoch": 46.6566265060241, + "grad_norm": 2.2650575637817383, + "learning_rate": 3.888052208835342e-05, + "loss": 0.1995, + "step": 7745 + }, + { + "epoch": 46.68674698795181, + "grad_norm": 2.2266595363616943, + "learning_rate": 3.890562248995984e-05, + "loss": 0.2237, + "step": 7750 + }, + { + "epoch": 46.71686746987952, + "grad_norm": 1.8683754205703735, + "learning_rate": 3.893072289156627e-05, + "loss": 0.222, + "step": 7755 + }, + { + "epoch": 46.74698795180723, + "grad_norm": 1.9006563425064087, + "learning_rate": 3.895582329317269e-05, + "loss": 0.1921, + "step": 7760 + }, + { + "epoch": 46.77710843373494, + "grad_norm": 3.2155590057373047, + "learning_rate": 3.898092369477912e-05, + "loss": 0.2461, + "step": 7765 + }, + { + "epoch": 46.80722891566265, + "grad_norm": 1.617855191230774, + "learning_rate": 3.9006024096385544e-05, + "loss": 0.2145, + "step": 7770 + }, + { + "epoch": 46.83734939759036, + "grad_norm": 3.959197759628296, + "learning_rate": 3.903112449799197e-05, + "loss": 0.2727, + "step": 7775 + }, + { + "epoch": 46.86746987951807, + "grad_norm": 3.9538347721099854, + "learning_rate": 3.9056224899598395e-05, + "loss": 0.2684, + "step": 7780 + }, + { + "epoch": 46.897590361445786, + "grad_norm": 2.1486432552337646, + "learning_rate": 3.908132530120482e-05, + "loss": 0.2024, + "step": 7785 + }, + { + "epoch": 46.9277108433735, + "grad_norm": 2.487645387649536, + "learning_rate": 3.9106425702811247e-05, + "loss": 0.2405, + "step": 7790 + }, + { + "epoch": 46.95783132530121, + "grad_norm": 2.512543201446533, + "learning_rate": 3.913152610441767e-05, + "loss": 0.2371, + "step": 7795 + }, + { + "epoch": 46.98795180722892, + "grad_norm": 1.5896553993225098, + "learning_rate": 3.91566265060241e-05, + "loss": 0.2009, + "step": 7800 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.9174203696421549, + "eval_auc": 0.9671657716678461, + "eval_f1": 0.8770491803278688, + "eval_loss": 0.2341395765542984, + "eval_precision": 0.901323706377858, + "eval_recall": 0.8540478905359179, + "eval_runtime": 17.092, + "eval_samples_per_second": 148.783, + "eval_steps_per_second": 0.761, + "step": 7802 + }, + { + "epoch": 47.01807228915663, + "grad_norm": 2.3858580589294434, + "learning_rate": 3.918172690763052e-05, + "loss": 0.182, + "step": 7805 + }, + { + "epoch": 47.04819277108434, + "grad_norm": 1.9192538261413574, + "learning_rate": 3.920682730923695e-05, + "loss": 0.2144, + "step": 7810 + }, + { + "epoch": 47.07831325301205, + "grad_norm": 3.998718023300171, + "learning_rate": 3.923192771084337e-05, + "loss": 0.22, + "step": 7815 + }, + { + "epoch": 47.10843373493976, + "grad_norm": 2.8246681690216064, + "learning_rate": 3.92570281124498e-05, + "loss": 0.2237, + "step": 7820 + }, + { + "epoch": 47.13855421686747, + "grad_norm": 1.7214534282684326, + "learning_rate": 3.928212851405623e-05, + "loss": 0.2247, + "step": 7825 + }, + { + "epoch": 47.16867469879518, + "grad_norm": 1.522997260093689, + "learning_rate": 3.930722891566265e-05, + "loss": 0.2075, + "step": 7830 + }, + { + "epoch": 47.19879518072289, + "grad_norm": 1.877416729927063, + "learning_rate": 3.933232931726908e-05, + "loss": 0.1689, + "step": 7835 + }, + { + "epoch": 47.2289156626506, + "grad_norm": 2.4283390045166016, + "learning_rate": 3.93574297188755e-05, + "loss": 0.2184, + "step": 7840 + }, + { + "epoch": 47.25903614457831, + "grad_norm": 2.131427526473999, + "learning_rate": 3.938253012048193e-05, + "loss": 0.1668, + "step": 7845 + }, + { + "epoch": 47.28915662650602, + "grad_norm": 2.9161033630371094, + "learning_rate": 3.9407630522088355e-05, + "loss": 0.2252, + "step": 7850 + }, + { + "epoch": 47.31927710843374, + "grad_norm": 3.200554609298706, + "learning_rate": 3.9432730923694784e-05, + "loss": 0.2335, + "step": 7855 + }, + { + "epoch": 47.34939759036145, + "grad_norm": 2.120020627975464, + "learning_rate": 3.9457831325301206e-05, + "loss": 0.2366, + "step": 7860 + }, + { + "epoch": 47.37951807228916, + "grad_norm": 1.453628659248352, + "learning_rate": 3.9482931726907635e-05, + "loss": 0.1861, + "step": 7865 + }, + { + "epoch": 47.40963855421687, + "grad_norm": 1.8164753913879395, + "learning_rate": 3.9508032128514064e-05, + "loss": 0.1764, + "step": 7870 + }, + { + "epoch": 47.43975903614458, + "grad_norm": 2.0672473907470703, + "learning_rate": 3.953313253012048e-05, + "loss": 0.2708, + "step": 7875 + }, + { + "epoch": 47.46987951807229, + "grad_norm": 3.7126882076263428, + "learning_rate": 3.955823293172691e-05, + "loss": 0.2495, + "step": 7880 + }, + { + "epoch": 47.5, + "grad_norm": 3.2293787002563477, + "learning_rate": 3.958333333333333e-05, + "loss": 0.2388, + "step": 7885 + }, + { + "epoch": 47.53012048192771, + "grad_norm": 2.1597959995269775, + "learning_rate": 3.960843373493976e-05, + "loss": 0.2125, + "step": 7890 + }, + { + "epoch": 47.56024096385542, + "grad_norm": 2.026249408721924, + "learning_rate": 3.963353413654618e-05, + "loss": 0.191, + "step": 7895 + }, + { + "epoch": 47.59036144578313, + "grad_norm": 2.365387439727783, + "learning_rate": 3.965863453815261e-05, + "loss": 0.2275, + "step": 7900 + }, + { + "epoch": 47.62048192771084, + "grad_norm": 1.6983262300491333, + "learning_rate": 3.968373493975904e-05, + "loss": 0.1965, + "step": 7905 + }, + { + "epoch": 47.65060240963855, + "grad_norm": 2.45988392829895, + "learning_rate": 3.970883534136546e-05, + "loss": 0.266, + "step": 7910 + }, + { + "epoch": 47.68072289156626, + "grad_norm": 2.1438827514648438, + "learning_rate": 3.973393574297189e-05, + "loss": 0.2275, + "step": 7915 + }, + { + "epoch": 47.71084337349397, + "grad_norm": 1.9596291780471802, + "learning_rate": 3.9759036144578314e-05, + "loss": 0.2521, + "step": 7920 + }, + { + "epoch": 47.74096385542169, + "grad_norm": 2.0082342624664307, + "learning_rate": 3.978413654618474e-05, + "loss": 0.2345, + "step": 7925 + }, + { + "epoch": 47.7710843373494, + "grad_norm": 2.714073419570923, + "learning_rate": 3.9809236947791165e-05, + "loss": 0.2403, + "step": 7930 + }, + { + "epoch": 47.80120481927711, + "grad_norm": 1.8705098628997803, + "learning_rate": 3.9834337349397595e-05, + "loss": 0.2063, + "step": 7935 + }, + { + "epoch": 47.83132530120482, + "grad_norm": 1.9358417987823486, + "learning_rate": 3.985943775100402e-05, + "loss": 0.2141, + "step": 7940 + }, + { + "epoch": 47.86144578313253, + "grad_norm": 2.227191686630249, + "learning_rate": 3.9884538152610446e-05, + "loss": 0.2179, + "step": 7945 + }, + { + "epoch": 47.89156626506024, + "grad_norm": 2.8488807678222656, + "learning_rate": 3.9909638554216875e-05, + "loss": 0.2186, + "step": 7950 + }, + { + "epoch": 47.92168674698795, + "grad_norm": 1.6021779775619507, + "learning_rate": 3.99347389558233e-05, + "loss": 0.2329, + "step": 7955 + }, + { + "epoch": 47.95180722891566, + "grad_norm": 2.1023037433624268, + "learning_rate": 3.995983935742972e-05, + "loss": 0.2615, + "step": 7960 + }, + { + "epoch": 47.98192771084337, + "grad_norm": 2.3818461894989014, + "learning_rate": 3.998493975903614e-05, + "loss": 0.2356, + "step": 7965 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.9213527329925285, + "eval_auc": 0.968722494699134, + "eval_f1": 0.882491186839013, + "eval_loss": 0.21619462966918945, + "eval_precision": 0.9103030303030303, + "eval_recall": 0.8563283922462942, + "eval_runtime": 17.2277, + "eval_samples_per_second": 147.611, + "eval_steps_per_second": 0.755, + "step": 7968 + }, + { + "epoch": 48.01204819277108, + "grad_norm": 1.3949185609817505, + "learning_rate": 4.001004016064257e-05, + "loss": 0.2073, + "step": 7970 + }, + { + "epoch": 48.04216867469879, + "grad_norm": 1.8420413732528687, + "learning_rate": 4.003514056224899e-05, + "loss": 0.2128, + "step": 7975 + }, + { + "epoch": 48.0722891566265, + "grad_norm": 1.9469246864318848, + "learning_rate": 4.006024096385542e-05, + "loss": 0.1887, + "step": 7980 + }, + { + "epoch": 48.102409638554214, + "grad_norm": 2.362492084503174, + "learning_rate": 4.008534136546185e-05, + "loss": 0.2148, + "step": 7985 + }, + { + "epoch": 48.13253012048193, + "grad_norm": 1.5186575651168823, + "learning_rate": 4.0110441767068274e-05, + "loss": 0.2052, + "step": 7990 + }, + { + "epoch": 48.16265060240964, + "grad_norm": 2.3660972118377686, + "learning_rate": 4.01355421686747e-05, + "loss": 0.2274, + "step": 7995 + }, + { + "epoch": 48.19277108433735, + "grad_norm": 1.7655103206634521, + "learning_rate": 4.0160642570281125e-05, + "loss": 0.1772, + "step": 8000 + }, + { + "epoch": 48.22289156626506, + "grad_norm": 2.0450825691223145, + "learning_rate": 4.0185742971887554e-05, + "loss": 0.2016, + "step": 8005 + }, + { + "epoch": 48.25301204819277, + "grad_norm": 2.284454107284546, + "learning_rate": 4.0210843373493976e-05, + "loss": 0.2097, + "step": 8010 + }, + { + "epoch": 48.28313253012048, + "grad_norm": 2.565540075302124, + "learning_rate": 4.0235943775100405e-05, + "loss": 0.2301, + "step": 8015 + }, + { + "epoch": 48.31325301204819, + "grad_norm": 2.9623656272888184, + "learning_rate": 4.026104417670683e-05, + "loss": 0.1995, + "step": 8020 + }, + { + "epoch": 48.3433734939759, + "grad_norm": 2.499049663543701, + "learning_rate": 4.028614457831326e-05, + "loss": 0.2371, + "step": 8025 + }, + { + "epoch": 48.373493975903614, + "grad_norm": 2.517566680908203, + "learning_rate": 4.031124497991968e-05, + "loss": 0.2569, + "step": 8030 + }, + { + "epoch": 48.403614457831324, + "grad_norm": 1.6054986715316772, + "learning_rate": 4.033634538152611e-05, + "loss": 0.2037, + "step": 8035 + }, + { + "epoch": 48.433734939759034, + "grad_norm": 2.0692198276519775, + "learning_rate": 4.036144578313254e-05, + "loss": 0.2151, + "step": 8040 + }, + { + "epoch": 48.463855421686745, + "grad_norm": 1.7841137647628784, + "learning_rate": 4.038654618473896e-05, + "loss": 0.2078, + "step": 8045 + }, + { + "epoch": 48.493975903614455, + "grad_norm": 1.7544052600860596, + "learning_rate": 4.041164658634538e-05, + "loss": 0.1812, + "step": 8050 + }, + { + "epoch": 48.524096385542165, + "grad_norm": 2.5118513107299805, + "learning_rate": 4.0436746987951804e-05, + "loss": 0.2402, + "step": 8055 + }, + { + "epoch": 48.55421686746988, + "grad_norm": 2.230863094329834, + "learning_rate": 4.046184738955823e-05, + "loss": 0.183, + "step": 8060 + }, + { + "epoch": 48.58433734939759, + "grad_norm": 2.237218141555786, + "learning_rate": 4.048694779116466e-05, + "loss": 0.2277, + "step": 8065 + }, + { + "epoch": 48.6144578313253, + "grad_norm": 3.2372775077819824, + "learning_rate": 4.0512048192771084e-05, + "loss": 0.2322, + "step": 8070 + }, + { + "epoch": 48.644578313253014, + "grad_norm": 3.1102917194366455, + "learning_rate": 4.053714859437751e-05, + "loss": 0.2296, + "step": 8075 + }, + { + "epoch": 48.674698795180724, + "grad_norm": 2.165656566619873, + "learning_rate": 4.0562248995983936e-05, + "loss": 0.239, + "step": 8080 + }, + { + "epoch": 48.704819277108435, + "grad_norm": 1.6019127368927002, + "learning_rate": 4.0587349397590365e-05, + "loss": 0.2006, + "step": 8085 + }, + { + "epoch": 48.734939759036145, + "grad_norm": 2.381197929382324, + "learning_rate": 4.061244979919679e-05, + "loss": 0.2113, + "step": 8090 + }, + { + "epoch": 48.765060240963855, + "grad_norm": 2.4255783557891846, + "learning_rate": 4.0637550200803216e-05, + "loss": 0.2358, + "step": 8095 + }, + { + "epoch": 48.795180722891565, + "grad_norm": 1.3936980962753296, + "learning_rate": 4.066265060240964e-05, + "loss": 0.2086, + "step": 8100 + }, + { + "epoch": 48.825301204819276, + "grad_norm": 2.304527997970581, + "learning_rate": 4.068775100401607e-05, + "loss": 0.2188, + "step": 8105 + }, + { + "epoch": 48.855421686746986, + "grad_norm": 2.2357938289642334, + "learning_rate": 4.071285140562249e-05, + "loss": 0.1996, + "step": 8110 + }, + { + "epoch": 48.8855421686747, + "grad_norm": 2.678866386413574, + "learning_rate": 4.073795180722892e-05, + "loss": 0.2467, + "step": 8115 + }, + { + "epoch": 48.91566265060241, + "grad_norm": 2.4957497119903564, + "learning_rate": 4.076305220883535e-05, + "loss": 0.2704, + "step": 8120 + }, + { + "epoch": 48.94578313253012, + "grad_norm": 2.6663835048675537, + "learning_rate": 4.078815261044177e-05, + "loss": 0.2316, + "step": 8125 + }, + { + "epoch": 48.975903614457835, + "grad_norm": 2.898577928543091, + "learning_rate": 4.08132530120482e-05, + "loss": 0.2556, + "step": 8130 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.9237121510027526, + "eval_auc": 0.9678888659226519, + "eval_f1": 0.8864168618266979, + "eval_loss": 0.2191127985715866, + "eval_precision": 0.910950661853189, + "eval_recall": 0.863169897377423, + "eval_runtime": 17.0353, + "eval_samples_per_second": 149.279, + "eval_steps_per_second": 0.763, + "step": 8134 + }, + { + "epoch": 49.006024096385545, + "grad_norm": 2.100454092025757, + "learning_rate": 4.083835341365462e-05, + "loss": 0.1929, + "step": 8135 + }, + { + "epoch": 49.036144578313255, + "grad_norm": 1.9053326845169067, + "learning_rate": 4.0863453815261044e-05, + "loss": 0.1936, + "step": 8140 + }, + { + "epoch": 49.066265060240966, + "grad_norm": 1.8544409275054932, + "learning_rate": 4.088855421686747e-05, + "loss": 0.1797, + "step": 8145 + }, + { + "epoch": 49.096385542168676, + "grad_norm": 2.9682719707489014, + "learning_rate": 4.0913654618473895e-05, + "loss": 0.2269, + "step": 8150 + }, + { + "epoch": 49.126506024096386, + "grad_norm": 2.228851795196533, + "learning_rate": 4.0938755020080324e-05, + "loss": 0.2095, + "step": 8155 + }, + { + "epoch": 49.1566265060241, + "grad_norm": 2.6955111026763916, + "learning_rate": 4.0963855421686746e-05, + "loss": 0.1792, + "step": 8160 + }, + { + "epoch": 49.18674698795181, + "grad_norm": 1.7635419368743896, + "learning_rate": 4.0988955823293175e-05, + "loss": 0.1999, + "step": 8165 + }, + { + "epoch": 49.21686746987952, + "grad_norm": 2.2137768268585205, + "learning_rate": 4.10140562248996e-05, + "loss": 0.2154, + "step": 8170 + }, + { + "epoch": 49.24698795180723, + "grad_norm": 2.5191023349761963, + "learning_rate": 4.103915662650603e-05, + "loss": 0.2557, + "step": 8175 + }, + { + "epoch": 49.27710843373494, + "grad_norm": 2.286573886871338, + "learning_rate": 4.106425702811245e-05, + "loss": 0.2602, + "step": 8180 + }, + { + "epoch": 49.30722891566265, + "grad_norm": 2.6866259574890137, + "learning_rate": 4.108935742971888e-05, + "loss": 0.21, + "step": 8185 + }, + { + "epoch": 49.33734939759036, + "grad_norm": 2.017263412475586, + "learning_rate": 4.11144578313253e-05, + "loss": 0.1535, + "step": 8190 + }, + { + "epoch": 49.36746987951807, + "grad_norm": 2.0354788303375244, + "learning_rate": 4.113955823293173e-05, + "loss": 0.2021, + "step": 8195 + }, + { + "epoch": 49.397590361445786, + "grad_norm": 1.6226084232330322, + "learning_rate": 4.116465863453816e-05, + "loss": 0.258, + "step": 8200 + }, + { + "epoch": 49.4277108433735, + "grad_norm": 2.9974417686462402, + "learning_rate": 4.118975903614458e-05, + "loss": 0.2214, + "step": 8205 + }, + { + "epoch": 49.45783132530121, + "grad_norm": 2.164848566055298, + "learning_rate": 4.121485943775101e-05, + "loss": 0.2227, + "step": 8210 + }, + { + "epoch": 49.48795180722892, + "grad_norm": 1.9364224672317505, + "learning_rate": 4.123995983935743e-05, + "loss": 0.2389, + "step": 8215 + }, + { + "epoch": 49.51807228915663, + "grad_norm": 1.5729063749313354, + "learning_rate": 4.126506024096386e-05, + "loss": 0.2055, + "step": 8220 + }, + { + "epoch": 49.54819277108434, + "grad_norm": 2.0302486419677734, + "learning_rate": 4.1290160642570284e-05, + "loss": 0.2079, + "step": 8225 + }, + { + "epoch": 49.57831325301205, + "grad_norm": 2.086446523666382, + "learning_rate": 4.1315261044176706e-05, + "loss": 0.1869, + "step": 8230 + }, + { + "epoch": 49.60843373493976, + "grad_norm": 2.3346996307373047, + "learning_rate": 4.1340361445783135e-05, + "loss": 0.2259, + "step": 8235 + }, + { + "epoch": 49.63855421686747, + "grad_norm": 1.7249573469161987, + "learning_rate": 4.136546184738956e-05, + "loss": 0.1928, + "step": 8240 + }, + { + "epoch": 49.66867469879518, + "grad_norm": 2.7298545837402344, + "learning_rate": 4.1390562248995986e-05, + "loss": 0.2336, + "step": 8245 + }, + { + "epoch": 49.69879518072289, + "grad_norm": 3.0118720531463623, + "learning_rate": 4.141566265060241e-05, + "loss": 0.2221, + "step": 8250 + }, + { + "epoch": 49.7289156626506, + "grad_norm": 2.4571619033813477, + "learning_rate": 4.144076305220884e-05, + "loss": 0.1981, + "step": 8255 + }, + { + "epoch": 49.75903614457831, + "grad_norm": 2.2176711559295654, + "learning_rate": 4.146586345381526e-05, + "loss": 0.2094, + "step": 8260 + }, + { + "epoch": 49.78915662650603, + "grad_norm": 2.423710584640503, + "learning_rate": 4.149096385542169e-05, + "loss": 0.2158, + "step": 8265 + }, + { + "epoch": 49.81927710843374, + "grad_norm": 2.3995325565338135, + "learning_rate": 4.151606425702811e-05, + "loss": 0.2123, + "step": 8270 + }, + { + "epoch": 49.84939759036145, + "grad_norm": 1.716963529586792, + "learning_rate": 4.154116465863454e-05, + "loss": 0.2062, + "step": 8275 + }, + { + "epoch": 49.87951807228916, + "grad_norm": 2.483274459838867, + "learning_rate": 4.156626506024097e-05, + "loss": 0.1824, + "step": 8280 + }, + { + "epoch": 49.90963855421687, + "grad_norm": 1.5357404947280884, + "learning_rate": 4.159136546184739e-05, + "loss": 0.1938, + "step": 8285 + }, + { + "epoch": 49.93975903614458, + "grad_norm": 2.8210079669952393, + "learning_rate": 4.161646586345382e-05, + "loss": 0.2232, + "step": 8290 + }, + { + "epoch": 49.96987951807229, + "grad_norm": 2.644474744796753, + "learning_rate": 4.164156626506024e-05, + "loss": 0.2048, + "step": 8295 + }, + { + "epoch": 50.0, + "grad_norm": 2.154975175857544, + "learning_rate": 4.166666666666667e-05, + "loss": 0.1875, + "step": 8300 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.9197797876523791, + "eval_auc": 0.9642733946486234, + "eval_f1": 0.8802816901408451, + "eval_loss": 0.25391778349876404, + "eval_precision": 0.9068923821039904, + "eval_recall": 0.855188141391106, + "eval_runtime": 17.7018, + "eval_samples_per_second": 143.658, + "eval_steps_per_second": 0.734, + "step": 8300 + }, + { + "epoch": 50.03012048192771, + "grad_norm": 2.3240296840667725, + "learning_rate": 4.1691767068273094e-05, + "loss": 0.1923, + "step": 8305 + }, + { + "epoch": 50.06024096385542, + "grad_norm": 2.1551833152770996, + "learning_rate": 4.1716867469879523e-05, + "loss": 0.2121, + "step": 8310 + }, + { + "epoch": 50.09036144578313, + "grad_norm": 2.293663501739502, + "learning_rate": 4.1741967871485946e-05, + "loss": 0.1768, + "step": 8315 + }, + { + "epoch": 50.12048192771084, + "grad_norm": 2.1318576335906982, + "learning_rate": 4.176706827309237e-05, + "loss": 0.1981, + "step": 8320 + }, + { + "epoch": 50.15060240963855, + "grad_norm": 2.407623052597046, + "learning_rate": 4.17921686746988e-05, + "loss": 0.2072, + "step": 8325 + }, + { + "epoch": 50.18072289156626, + "grad_norm": 2.1320385932922363, + "learning_rate": 4.181726907630522e-05, + "loss": 0.2063, + "step": 8330 + }, + { + "epoch": 50.21084337349398, + "grad_norm": 2.106816530227661, + "learning_rate": 4.184236947791165e-05, + "loss": 0.1472, + "step": 8335 + }, + { + "epoch": 50.24096385542169, + "grad_norm": 2.103788137435913, + "learning_rate": 4.186746987951807e-05, + "loss": 0.2001, + "step": 8340 + }, + { + "epoch": 50.2710843373494, + "grad_norm": 2.0746145248413086, + "learning_rate": 4.18925702811245e-05, + "loss": 0.206, + "step": 8345 + }, + { + "epoch": 50.30120481927711, + "grad_norm": 2.4276602268218994, + "learning_rate": 4.191767068273092e-05, + "loss": 0.261, + "step": 8350 + }, + { + "epoch": 50.33132530120482, + "grad_norm": 2.2360825538635254, + "learning_rate": 4.194277108433735e-05, + "loss": 0.2205, + "step": 8355 + }, + { + "epoch": 50.36144578313253, + "grad_norm": 2.1541240215301514, + "learning_rate": 4.196787148594378e-05, + "loss": 0.2181, + "step": 8360 + }, + { + "epoch": 50.39156626506024, + "grad_norm": 3.0024476051330566, + "learning_rate": 4.19929718875502e-05, + "loss": 0.2141, + "step": 8365 + }, + { + "epoch": 50.42168674698795, + "grad_norm": 2.068899631500244, + "learning_rate": 4.201807228915663e-05, + "loss": 0.1967, + "step": 8370 + }, + { + "epoch": 50.45180722891566, + "grad_norm": 1.6858114004135132, + "learning_rate": 4.2043172690763054e-05, + "loss": 0.1823, + "step": 8375 + }, + { + "epoch": 50.48192771084337, + "grad_norm": 2.16697359085083, + "learning_rate": 4.206827309236948e-05, + "loss": 0.1633, + "step": 8380 + }, + { + "epoch": 50.51204819277108, + "grad_norm": 2.2981698513031006, + "learning_rate": 4.2093373493975905e-05, + "loss": 0.2043, + "step": 8385 + }, + { + "epoch": 50.54216867469879, + "grad_norm": 1.885445475578308, + "learning_rate": 4.2118473895582334e-05, + "loss": 0.1958, + "step": 8390 + }, + { + "epoch": 50.5722891566265, + "grad_norm": 2.8808133602142334, + "learning_rate": 4.2143574297188756e-05, + "loss": 0.2118, + "step": 8395 + }, + { + "epoch": 50.602409638554214, + "grad_norm": 1.648272156715393, + "learning_rate": 4.2168674698795186e-05, + "loss": 0.2369, + "step": 8400 + }, + { + "epoch": 50.63253012048193, + "grad_norm": 2.7153096199035645, + "learning_rate": 4.219377510040161e-05, + "loss": 0.1965, + "step": 8405 + }, + { + "epoch": 50.66265060240964, + "grad_norm": 2.3628122806549072, + "learning_rate": 4.221887550200803e-05, + "loss": 0.2291, + "step": 8410 + }, + { + "epoch": 50.69277108433735, + "grad_norm": 2.9337639808654785, + "learning_rate": 4.224397590361446e-05, + "loss": 0.2315, + "step": 8415 + }, + { + "epoch": 50.72289156626506, + "grad_norm": 1.9613662958145142, + "learning_rate": 4.226907630522088e-05, + "loss": 0.2519, + "step": 8420 + }, + { + "epoch": 50.75301204819277, + "grad_norm": 1.7669310569763184, + "learning_rate": 4.229417670682731e-05, + "loss": 0.2419, + "step": 8425 + }, + { + "epoch": 50.78313253012048, + "grad_norm": 1.8924133777618408, + "learning_rate": 4.231927710843373e-05, + "loss": 0.2282, + "step": 8430 + }, + { + "epoch": 50.81325301204819, + "grad_norm": 2.1653175354003906, + "learning_rate": 4.234437751004016e-05, + "loss": 0.1903, + "step": 8435 + }, + { + "epoch": 50.8433734939759, + "grad_norm": 2.1018800735473633, + "learning_rate": 4.236947791164659e-05, + "loss": 0.2456, + "step": 8440 + }, + { + "epoch": 50.873493975903614, + "grad_norm": 1.8597257137298584, + "learning_rate": 4.239457831325301e-05, + "loss": 0.1953, + "step": 8445 + }, + { + "epoch": 50.903614457831324, + "grad_norm": 1.8078064918518066, + "learning_rate": 4.241967871485944e-05, + "loss": 0.1778, + "step": 8450 + }, + { + "epoch": 50.933734939759034, + "grad_norm": 2.412869691848755, + "learning_rate": 4.2444779116465865e-05, + "loss": 0.2315, + "step": 8455 + }, + { + "epoch": 50.963855421686745, + "grad_norm": 1.3655896186828613, + "learning_rate": 4.2469879518072294e-05, + "loss": 0.2115, + "step": 8460 + }, + { + "epoch": 50.993975903614455, + "grad_norm": 2.445127487182617, + "learning_rate": 4.2494979919678716e-05, + "loss": 0.2252, + "step": 8465 + }, + { + "epoch": 51.0, + "eval_accuracy": 0.9280377506881636, + "eval_auc": 0.9689456170153352, + "eval_f1": 0.8940359004053272, + "eval_loss": 0.22296485304832458, + "eval_precision": 0.908235294117647, + "eval_recall": 0.8802736602052451, + "eval_runtime": 17.0589, + "eval_samples_per_second": 149.072, + "eval_steps_per_second": 0.762, + "step": 8466 + }, + { + "epoch": 51.024096385542165, + "grad_norm": 1.491262435913086, + "learning_rate": 4.2520080321285145e-05, + "loss": 0.1959, + "step": 8470 + }, + { + "epoch": 51.05421686746988, + "grad_norm": 2.2632458209991455, + "learning_rate": 4.254518072289157e-05, + "loss": 0.2049, + "step": 8475 + }, + { + "epoch": 51.08433734939759, + "grad_norm": 2.91318416595459, + "learning_rate": 4.2570281124497996e-05, + "loss": 0.1958, + "step": 8480 + }, + { + "epoch": 51.1144578313253, + "grad_norm": 2.273866891860962, + "learning_rate": 4.2595381526104425e-05, + "loss": 0.2061, + "step": 8485 + }, + { + "epoch": 51.144578313253014, + "grad_norm": 2.750541925430298, + "learning_rate": 4.262048192771085e-05, + "loss": 0.1655, + "step": 8490 + }, + { + "epoch": 51.174698795180724, + "grad_norm": 3.153205633163452, + "learning_rate": 4.264558232931727e-05, + "loss": 0.1921, + "step": 8495 + }, + { + "epoch": 51.204819277108435, + "grad_norm": 3.316248893737793, + "learning_rate": 4.267068273092369e-05, + "loss": 0.2232, + "step": 8500 + }, + { + "epoch": 51.234939759036145, + "grad_norm": 2.0477054119110107, + "learning_rate": 4.269578313253012e-05, + "loss": 0.1949, + "step": 8505 + }, + { + "epoch": 51.265060240963855, + "grad_norm": 1.9106863737106323, + "learning_rate": 4.2720883534136544e-05, + "loss": 0.1902, + "step": 8510 + }, + { + "epoch": 51.295180722891565, + "grad_norm": 2.0968832969665527, + "learning_rate": 4.274598393574297e-05, + "loss": 0.1758, + "step": 8515 + }, + { + "epoch": 51.325301204819276, + "grad_norm": 1.4065873622894287, + "learning_rate": 4.27710843373494e-05, + "loss": 0.2099, + "step": 8520 + }, + { + "epoch": 51.355421686746986, + "grad_norm": 2.735079765319824, + "learning_rate": 4.2796184738955824e-05, + "loss": 0.1967, + "step": 8525 + }, + { + "epoch": 51.3855421686747, + "grad_norm": 1.4214318990707397, + "learning_rate": 4.282128514056225e-05, + "loss": 0.1991, + "step": 8530 + }, + { + "epoch": 51.41566265060241, + "grad_norm": 1.6565520763397217, + "learning_rate": 4.2846385542168675e-05, + "loss": 0.2003, + "step": 8535 + }, + { + "epoch": 51.44578313253012, + "grad_norm": 1.5868991613388062, + "learning_rate": 4.2871485943775104e-05, + "loss": 0.1788, + "step": 8540 + }, + { + "epoch": 51.475903614457835, + "grad_norm": 2.1586837768554688, + "learning_rate": 4.289658634538153e-05, + "loss": 0.219, + "step": 8545 + }, + { + "epoch": 51.506024096385545, + "grad_norm": 2.678581953048706, + "learning_rate": 4.2921686746987956e-05, + "loss": 0.1606, + "step": 8550 + }, + { + "epoch": 51.536144578313255, + "grad_norm": 2.364051580429077, + "learning_rate": 4.294678714859438e-05, + "loss": 0.2155, + "step": 8555 + }, + { + "epoch": 51.566265060240966, + "grad_norm": 1.729448676109314, + "learning_rate": 4.297188755020081e-05, + "loss": 0.1741, + "step": 8560 + }, + { + "epoch": 51.596385542168676, + "grad_norm": 2.4447762966156006, + "learning_rate": 4.299698795180723e-05, + "loss": 0.2083, + "step": 8565 + }, + { + "epoch": 51.626506024096386, + "grad_norm": 1.8021255731582642, + "learning_rate": 4.302208835341366e-05, + "loss": 0.2093, + "step": 8570 + }, + { + "epoch": 51.6566265060241, + "grad_norm": 3.061856508255005, + "learning_rate": 4.304718875502009e-05, + "loss": 0.1992, + "step": 8575 + }, + { + "epoch": 51.68674698795181, + "grad_norm": 1.822127342224121, + "learning_rate": 4.307228915662651e-05, + "loss": 0.2208, + "step": 8580 + }, + { + "epoch": 51.71686746987952, + "grad_norm": 2.0909178256988525, + "learning_rate": 4.309738955823293e-05, + "loss": 0.2104, + "step": 8585 + }, + { + "epoch": 51.74698795180723, + "grad_norm": 1.498009204864502, + "learning_rate": 4.3122489959839354e-05, + "loss": 0.2083, + "step": 8590 + }, + { + "epoch": 51.77710843373494, + "grad_norm": 2.5373308658599854, + "learning_rate": 4.3147590361445783e-05, + "loss": 0.2074, + "step": 8595 + }, + { + "epoch": 51.80722891566265, + "grad_norm": 2.005295991897583, + "learning_rate": 4.317269076305221e-05, + "loss": 0.2242, + "step": 8600 + }, + { + "epoch": 51.83734939759036, + "grad_norm": 2.867748737335205, + "learning_rate": 4.3197791164658635e-05, + "loss": 0.2493, + "step": 8605 + }, + { + "epoch": 51.86746987951807, + "grad_norm": 3.365629196166992, + "learning_rate": 4.3222891566265064e-05, + "loss": 0.1988, + "step": 8610 + }, + { + "epoch": 51.897590361445786, + "grad_norm": 1.7968188524246216, + "learning_rate": 4.3247991967871486e-05, + "loss": 0.2058, + "step": 8615 + }, + { + "epoch": 51.9277108433735, + "grad_norm": 1.8287718296051025, + "learning_rate": 4.3273092369477915e-05, + "loss": 0.1895, + "step": 8620 + }, + { + "epoch": 51.95783132530121, + "grad_norm": 2.291860342025757, + "learning_rate": 4.329819277108434e-05, + "loss": 0.2157, + "step": 8625 + }, + { + "epoch": 51.98795180722892, + "grad_norm": 2.7147622108459473, + "learning_rate": 4.3323293172690766e-05, + "loss": 0.1993, + "step": 8630 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.9303971686983877, + "eval_auc": 0.9749829236141434, + "eval_f1": 0.8908081431215299, + "eval_loss": 0.22926868498325348, + "eval_precision": 0.9704301075268817, + "eval_recall": 0.8232611174458381, + "eval_runtime": 19.2801, + "eval_samples_per_second": 131.897, + "eval_steps_per_second": 0.674, + "step": 8632 + }, + { + "epoch": 52.01807228915663, + "grad_norm": 1.3713630437850952, + "learning_rate": 4.334839357429719e-05, + "loss": 0.1696, + "step": 8635 + }, + { + "epoch": 52.04819277108434, + "grad_norm": 2.2953884601593018, + "learning_rate": 4.337349397590362e-05, + "loss": 0.2015, + "step": 8640 + }, + { + "epoch": 52.07831325301205, + "grad_norm": 1.6782729625701904, + "learning_rate": 4.339859437751004e-05, + "loss": 0.2073, + "step": 8645 + }, + { + "epoch": 52.10843373493976, + "grad_norm": 2.056797504425049, + "learning_rate": 4.342369477911647e-05, + "loss": 0.1909, + "step": 8650 + }, + { + "epoch": 52.13855421686747, + "grad_norm": 1.9719815254211426, + "learning_rate": 4.34487951807229e-05, + "loss": 0.2102, + "step": 8655 + }, + { + "epoch": 52.16867469879518, + "grad_norm": 1.7055913209915161, + "learning_rate": 4.347389558232932e-05, + "loss": 0.1694, + "step": 8660 + }, + { + "epoch": 52.19879518072289, + "grad_norm": 1.97348952293396, + "learning_rate": 4.349899598393575e-05, + "loss": 0.188, + "step": 8665 + }, + { + "epoch": 52.2289156626506, + "grad_norm": 2.388824701309204, + "learning_rate": 4.352409638554217e-05, + "loss": 0.1471, + "step": 8670 + }, + { + "epoch": 52.25903614457831, + "grad_norm": 2.0131945610046387, + "learning_rate": 4.3549196787148594e-05, + "loss": 0.1753, + "step": 8675 + }, + { + "epoch": 52.28915662650602, + "grad_norm": 2.1807432174682617, + "learning_rate": 4.357429718875502e-05, + "loss": 0.1792, + "step": 8680 + }, + { + "epoch": 52.31927710843374, + "grad_norm": 2.6073501110076904, + "learning_rate": 4.3599397590361446e-05, + "loss": 0.27, + "step": 8685 + }, + { + "epoch": 52.34939759036145, + "grad_norm": 2.618926525115967, + "learning_rate": 4.3624497991967875e-05, + "loss": 0.193, + "step": 8690 + }, + { + "epoch": 52.37951807228916, + "grad_norm": 1.5784326791763306, + "learning_rate": 4.36495983935743e-05, + "loss": 0.2148, + "step": 8695 + }, + { + "epoch": 52.40963855421687, + "grad_norm": 1.9290697574615479, + "learning_rate": 4.3674698795180726e-05, + "loss": 0.2036, + "step": 8700 + }, + { + "epoch": 52.43975903614458, + "grad_norm": 2.2212750911712646, + "learning_rate": 4.369979919678715e-05, + "loss": 0.1845, + "step": 8705 + }, + { + "epoch": 52.46987951807229, + "grad_norm": 1.0794906616210938, + "learning_rate": 4.372489959839358e-05, + "loss": 0.1834, + "step": 8710 + }, + { + "epoch": 52.5, + "grad_norm": 1.8574261665344238, + "learning_rate": 4.375e-05, + "loss": 0.196, + "step": 8715 + }, + { + "epoch": 52.53012048192771, + "grad_norm": 1.8506181240081787, + "learning_rate": 4.377510040160643e-05, + "loss": 0.1983, + "step": 8720 + }, + { + "epoch": 52.56024096385542, + "grad_norm": 1.2905638217926025, + "learning_rate": 4.380020080321285e-05, + "loss": 0.2053, + "step": 8725 + }, + { + "epoch": 52.59036144578313, + "grad_norm": 2.2692477703094482, + "learning_rate": 4.382530120481928e-05, + "loss": 0.2398, + "step": 8730 + }, + { + "epoch": 52.62048192771084, + "grad_norm": 1.1828069686889648, + "learning_rate": 4.385040160642571e-05, + "loss": 0.189, + "step": 8735 + }, + { + "epoch": 52.65060240963855, + "grad_norm": 2.9759180545806885, + "learning_rate": 4.387550200803213e-05, + "loss": 0.2304, + "step": 8740 + }, + { + "epoch": 52.68072289156626, + "grad_norm": 1.6620732545852661, + "learning_rate": 4.390060240963856e-05, + "loss": 0.2039, + "step": 8745 + }, + { + "epoch": 52.71084337349397, + "grad_norm": 2.220284938812256, + "learning_rate": 4.392570281124498e-05, + "loss": 0.1902, + "step": 8750 + }, + { + "epoch": 52.74096385542169, + "grad_norm": 2.7123513221740723, + "learning_rate": 4.395080321285141e-05, + "loss": 0.187, + "step": 8755 + }, + { + "epoch": 52.7710843373494, + "grad_norm": 1.9920449256896973, + "learning_rate": 4.3975903614457834e-05, + "loss": 0.1733, + "step": 8760 + }, + { + "epoch": 52.80120481927711, + "grad_norm": 1.8670777082443237, + "learning_rate": 4.4001004016064256e-05, + "loss": 0.2132, + "step": 8765 + }, + { + "epoch": 52.83132530120482, + "grad_norm": 2.346907138824463, + "learning_rate": 4.4026104417670685e-05, + "loss": 0.2032, + "step": 8770 + }, + { + "epoch": 52.86144578313253, + "grad_norm": 2.630713939666748, + "learning_rate": 4.405120481927711e-05, + "loss": 0.2217, + "step": 8775 + }, + { + "epoch": 52.89156626506024, + "grad_norm": 1.692564845085144, + "learning_rate": 4.407630522088354e-05, + "loss": 0.2216, + "step": 8780 + }, + { + "epoch": 52.92168674698795, + "grad_norm": 2.8092164993286133, + "learning_rate": 4.410140562248996e-05, + "loss": 0.1729, + "step": 8785 + }, + { + "epoch": 52.95180722891566, + "grad_norm": 2.36733341217041, + "learning_rate": 4.412650602409639e-05, + "loss": 0.1809, + "step": 8790 + }, + { + "epoch": 52.98192771084337, + "grad_norm": 2.001338005065918, + "learning_rate": 4.415160642570281e-05, + "loss": 0.2081, + "step": 8795 + }, + { + "epoch": 53.0, + "eval_accuracy": 0.9307904050334251, + "eval_auc": 0.9701998245136139, + "eval_f1": 0.8935912938331319, + "eval_loss": 0.2352074831724167, + "eval_precision": 0.9510939510939511, + "eval_recall": 0.8426453819840365, + "eval_runtime": 19.9178, + "eval_samples_per_second": 127.675, + "eval_steps_per_second": 0.653, + "step": 8798 + }, + { + "epoch": 53.01204819277108, + "grad_norm": 1.9767760038375854, + "learning_rate": 4.417670682730924e-05, + "loss": 0.193, + "step": 8800 + }, + { + "epoch": 53.04216867469879, + "grad_norm": 2.481797695159912, + "learning_rate": 4.420180722891566e-05, + "loss": 0.1708, + "step": 8805 + }, + { + "epoch": 53.0722891566265, + "grad_norm": 2.7323923110961914, + "learning_rate": 4.422690763052209e-05, + "loss": 0.1834, + "step": 8810 + }, + { + "epoch": 53.102409638554214, + "grad_norm": 2.4297308921813965, + "learning_rate": 4.425200803212852e-05, + "loss": 0.2178, + "step": 8815 + }, + { + "epoch": 53.13253012048193, + "grad_norm": 3.1860032081604004, + "learning_rate": 4.427710843373494e-05, + "loss": 0.2285, + "step": 8820 + }, + { + "epoch": 53.16265060240964, + "grad_norm": 1.8464399576187134, + "learning_rate": 4.430220883534137e-05, + "loss": 0.2045, + "step": 8825 + }, + { + "epoch": 53.19277108433735, + "grad_norm": 1.474560022354126, + "learning_rate": 4.4327309236947793e-05, + "loss": 0.1837, + "step": 8830 + }, + { + "epoch": 53.22289156626506, + "grad_norm": 2.9821767807006836, + "learning_rate": 4.435240963855422e-05, + "loss": 0.2869, + "step": 8835 + }, + { + "epoch": 53.25301204819277, + "grad_norm": 2.518110752105713, + "learning_rate": 4.4377510040160645e-05, + "loss": 0.1892, + "step": 8840 + }, + { + "epoch": 53.28313253012048, + "grad_norm": 2.1917898654937744, + "learning_rate": 4.4402610441767074e-05, + "loss": 0.1843, + "step": 8845 + }, + { + "epoch": 53.31325301204819, + "grad_norm": 2.0914201736450195, + "learning_rate": 4.4427710843373496e-05, + "loss": 0.1934, + "step": 8850 + }, + { + "epoch": 53.3433734939759, + "grad_norm": 2.946645498275757, + "learning_rate": 4.445281124497992e-05, + "loss": 0.2093, + "step": 8855 + }, + { + "epoch": 53.373493975903614, + "grad_norm": 2.390066385269165, + "learning_rate": 4.447791164658635e-05, + "loss": 0.1918, + "step": 8860 + }, + { + "epoch": 53.403614457831324, + "grad_norm": 1.206371784210205, + "learning_rate": 4.450301204819277e-05, + "loss": 0.1915, + "step": 8865 + }, + { + "epoch": 53.433734939759034, + "grad_norm": 2.3323042392730713, + "learning_rate": 4.45281124497992e-05, + "loss": 0.1999, + "step": 8870 + }, + { + "epoch": 53.463855421686745, + "grad_norm": 1.4416083097457886, + "learning_rate": 4.455321285140562e-05, + "loss": 0.2099, + "step": 8875 + }, + { + "epoch": 53.493975903614455, + "grad_norm": 1.983001470565796, + "learning_rate": 4.457831325301205e-05, + "loss": 0.203, + "step": 8880 + }, + { + "epoch": 53.524096385542165, + "grad_norm": 1.6679518222808838, + "learning_rate": 4.460341365461847e-05, + "loss": 0.1928, + "step": 8885 + }, + { + "epoch": 53.55421686746988, + "grad_norm": 2.298336982727051, + "learning_rate": 4.46285140562249e-05, + "loss": 0.2264, + "step": 8890 + }, + { + "epoch": 53.58433734939759, + "grad_norm": 1.883910894393921, + "learning_rate": 4.465361445783133e-05, + "loss": 0.1952, + "step": 8895 + }, + { + "epoch": 53.6144578313253, + "grad_norm": 2.8218042850494385, + "learning_rate": 4.467871485943775e-05, + "loss": 0.2028, + "step": 8900 + }, + { + "epoch": 53.644578313253014, + "grad_norm": 2.3828554153442383, + "learning_rate": 4.470381526104418e-05, + "loss": 0.1979, + "step": 8905 + }, + { + "epoch": 53.674698795180724, + "grad_norm": 3.0916848182678223, + "learning_rate": 4.4728915662650604e-05, + "loss": 0.1986, + "step": 8910 + }, + { + "epoch": 53.704819277108435, + "grad_norm": 3.2898638248443604, + "learning_rate": 4.475401606425703e-05, + "loss": 0.2263, + "step": 8915 + }, + { + "epoch": 53.734939759036145, + "grad_norm": 1.38592529296875, + "learning_rate": 4.4779116465863456e-05, + "loss": 0.2121, + "step": 8920 + }, + { + "epoch": 53.765060240963855, + "grad_norm": 2.301682710647583, + "learning_rate": 4.4804216867469885e-05, + "loss": 0.2023, + "step": 8925 + }, + { + "epoch": 53.795180722891565, + "grad_norm": 1.671717882156372, + "learning_rate": 4.482931726907631e-05, + "loss": 0.1967, + "step": 8930 + }, + { + "epoch": 53.825301204819276, + "grad_norm": 1.9744724035263062, + "learning_rate": 4.4854417670682736e-05, + "loss": 0.1752, + "step": 8935 + }, + { + "epoch": 53.855421686746986, + "grad_norm": 1.1811193227767944, + "learning_rate": 4.487951807228916e-05, + "loss": 0.1688, + "step": 8940 + }, + { + "epoch": 53.8855421686747, + "grad_norm": 2.8911030292510986, + "learning_rate": 4.490461847389558e-05, + "loss": 0.2126, + "step": 8945 + }, + { + "epoch": 53.91566265060241, + "grad_norm": 2.2526655197143555, + "learning_rate": 4.492971887550201e-05, + "loss": 0.233, + "step": 8950 + }, + { + "epoch": 53.94578313253012, + "grad_norm": 2.8007659912109375, + "learning_rate": 4.495481927710843e-05, + "loss": 0.2219, + "step": 8955 + }, + { + "epoch": 53.975903614457835, + "grad_norm": 3.165022373199463, + "learning_rate": 4.497991967871486e-05, + "loss": 0.2496, + "step": 8960 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.935116004718836, + "eval_auc": 0.9742937083613377, + "eval_f1": 0.9021932424422051, + "eval_loss": 0.1958591490983963, + "eval_precision": 0.9395061728395062, + "eval_recall": 0.8677309007981756, + "eval_runtime": 19.5495, + "eval_samples_per_second": 130.08, + "eval_steps_per_second": 0.665, + "step": 8964 + }, + { + "epoch": 54.006024096385545, + "grad_norm": 1.5866248607635498, + "learning_rate": 4.500502008032128e-05, + "loss": 0.1758, + "step": 8965 + }, + { + "epoch": 54.036144578313255, + "grad_norm": 2.1163887977600098, + "learning_rate": 4.503012048192771e-05, + "loss": 0.2267, + "step": 8970 + }, + { + "epoch": 54.066265060240966, + "grad_norm": 1.9508090019226074, + "learning_rate": 4.505522088353414e-05, + "loss": 0.1804, + "step": 8975 + }, + { + "epoch": 54.096385542168676, + "grad_norm": 1.2839763164520264, + "learning_rate": 4.5080321285140564e-05, + "loss": 0.1979, + "step": 8980 + }, + { + "epoch": 54.126506024096386, + "grad_norm": 1.4487197399139404, + "learning_rate": 4.510542168674699e-05, + "loss": 0.1666, + "step": 8985 + }, + { + "epoch": 54.1566265060241, + "grad_norm": 1.8158890008926392, + "learning_rate": 4.5130522088353415e-05, + "loss": 0.1926, + "step": 8990 + }, + { + "epoch": 54.18674698795181, + "grad_norm": 1.6030244827270508, + "learning_rate": 4.5155622489959844e-05, + "loss": 0.1505, + "step": 8995 + }, + { + "epoch": 54.21686746987952, + "grad_norm": 1.469895362854004, + "learning_rate": 4.5180722891566266e-05, + "loss": 0.1798, + "step": 9000 + }, + { + "epoch": 54.24698795180723, + "grad_norm": 2.093759536743164, + "learning_rate": 4.5205823293172695e-05, + "loss": 0.2196, + "step": 9005 + }, + { + "epoch": 54.27710843373494, + "grad_norm": 1.9970499277114868, + "learning_rate": 4.523092369477912e-05, + "loss": 0.1971, + "step": 9010 + }, + { + "epoch": 54.30722891566265, + "grad_norm": 1.7573943138122559, + "learning_rate": 4.525602409638555e-05, + "loss": 0.1899, + "step": 9015 + }, + { + "epoch": 54.33734939759036, + "grad_norm": 1.4472109079360962, + "learning_rate": 4.5281124497991976e-05, + "loss": 0.1951, + "step": 9020 + }, + { + "epoch": 54.36746987951807, + "grad_norm": 2.5054283142089844, + "learning_rate": 4.53062248995984e-05, + "loss": 0.2062, + "step": 9025 + }, + { + "epoch": 54.397590361445786, + "grad_norm": 2.5859766006469727, + "learning_rate": 4.533132530120482e-05, + "loss": 0.1788, + "step": 9030 + }, + { + "epoch": 54.4277108433735, + "grad_norm": 2.444463014602661, + "learning_rate": 4.535642570281124e-05, + "loss": 0.1626, + "step": 9035 + }, + { + "epoch": 54.45783132530121, + "grad_norm": 1.884308099746704, + "learning_rate": 4.538152610441767e-05, + "loss": 0.1794, + "step": 9040 + }, + { + "epoch": 54.48795180722892, + "grad_norm": 2.135911703109741, + "learning_rate": 4.5406626506024094e-05, + "loss": 0.1992, + "step": 9045 + }, + { + "epoch": 54.51807228915663, + "grad_norm": 2.4031646251678467, + "learning_rate": 4.543172690763052e-05, + "loss": 0.2086, + "step": 9050 + }, + { + "epoch": 54.54819277108434, + "grad_norm": 1.498276948928833, + "learning_rate": 4.545682730923695e-05, + "loss": 0.1702, + "step": 9055 + }, + { + "epoch": 54.57831325301205, + "grad_norm": 2.10369873046875, + "learning_rate": 4.5481927710843374e-05, + "loss": 0.1828, + "step": 9060 + }, + { + "epoch": 54.60843373493976, + "grad_norm": 1.6820237636566162, + "learning_rate": 4.5507028112449803e-05, + "loss": 0.198, + "step": 9065 + }, + { + "epoch": 54.63855421686747, + "grad_norm": 1.806857943534851, + "learning_rate": 4.5532128514056226e-05, + "loss": 0.1937, + "step": 9070 + }, + { + "epoch": 54.66867469879518, + "grad_norm": 2.2759451866149902, + "learning_rate": 4.5557228915662655e-05, + "loss": 0.1971, + "step": 9075 + }, + { + "epoch": 54.69879518072289, + "grad_norm": 1.972905158996582, + "learning_rate": 4.558232931726908e-05, + "loss": 0.1738, + "step": 9080 + }, + { + "epoch": 54.7289156626506, + "grad_norm": 2.6859445571899414, + "learning_rate": 4.5607429718875506e-05, + "loss": 0.1905, + "step": 9085 + }, + { + "epoch": 54.75903614457831, + "grad_norm": 1.83362877368927, + "learning_rate": 4.563253012048193e-05, + "loss": 0.2235, + "step": 9090 + }, + { + "epoch": 54.78915662650603, + "grad_norm": 1.7969437837600708, + "learning_rate": 4.565763052208836e-05, + "loss": 0.1856, + "step": 9095 + }, + { + "epoch": 54.81927710843374, + "grad_norm": 2.4315059185028076, + "learning_rate": 4.568273092369478e-05, + "loss": 0.2234, + "step": 9100 + }, + { + "epoch": 54.84939759036145, + "grad_norm": 2.577702045440674, + "learning_rate": 4.570783132530121e-05, + "loss": 0.235, + "step": 9105 + }, + { + "epoch": 54.87951807228916, + "grad_norm": 1.6569136381149292, + "learning_rate": 4.573293172690764e-05, + "loss": 0.1639, + "step": 9110 + }, + { + "epoch": 54.90963855421687, + "grad_norm": 1.192121148109436, + "learning_rate": 4.575803212851406e-05, + "loss": 0.1791, + "step": 9115 + }, + { + "epoch": 54.93975903614458, + "grad_norm": 2.4576196670532227, + "learning_rate": 4.578313253012048e-05, + "loss": 0.1732, + "step": 9120 + }, + { + "epoch": 54.96987951807229, + "grad_norm": 2.1103174686431885, + "learning_rate": 4.5808232931726905e-05, + "loss": 0.1979, + "step": 9125 + }, + { + "epoch": 55.0, + "grad_norm": 2.6083672046661377, + "learning_rate": 4.5833333333333334e-05, + "loss": 0.2174, + "step": 9130 + }, + { + "epoch": 55.0, + "eval_accuracy": 0.9178136059771923, + "eval_auc": 0.9665703225417875, + "eval_f1": 0.8707482993197279, + "eval_loss": 0.30000755190849304, + "eval_precision": 0.9513513513513514, + "eval_recall": 0.8027366020524516, + "eval_runtime": 19.6456, + "eval_samples_per_second": 129.443, + "eval_steps_per_second": 0.662, + "step": 9130 + }, + { + "epoch": 55.03012048192771, + "grad_norm": 1.9286142587661743, + "learning_rate": 4.585843373493976e-05, + "loss": 0.2294, + "step": 9135 + }, + { + "epoch": 55.06024096385542, + "grad_norm": 2.960637092590332, + "learning_rate": 4.5883534136546185e-05, + "loss": 0.1984, + "step": 9140 + }, + { + "epoch": 55.09036144578313, + "grad_norm": 2.789501905441284, + "learning_rate": 4.5908634538152614e-05, + "loss": 0.2446, + "step": 9145 + }, + { + "epoch": 55.12048192771084, + "grad_norm": 2.2632017135620117, + "learning_rate": 4.5933734939759037e-05, + "loss": 0.1907, + "step": 9150 + }, + { + "epoch": 55.15060240963855, + "grad_norm": 2.261414051055908, + "learning_rate": 4.5958835341365466e-05, + "loss": 0.2248, + "step": 9155 + }, + { + "epoch": 55.18072289156626, + "grad_norm": 1.362753987312317, + "learning_rate": 4.598393574297189e-05, + "loss": 0.2021, + "step": 9160 + }, + { + "epoch": 55.21084337349398, + "grad_norm": 1.8857378959655762, + "learning_rate": 4.600903614457832e-05, + "loss": 0.1757, + "step": 9165 + }, + { + "epoch": 55.24096385542169, + "grad_norm": 1.6326899528503418, + "learning_rate": 4.603413654618474e-05, + "loss": 0.1731, + "step": 9170 + }, + { + "epoch": 55.2710843373494, + "grad_norm": 2.264676809310913, + "learning_rate": 4.605923694779117e-05, + "loss": 0.1987, + "step": 9175 + }, + { + "epoch": 55.30120481927711, + "grad_norm": 1.5106638669967651, + "learning_rate": 4.608433734939759e-05, + "loss": 0.1774, + "step": 9180 + }, + { + "epoch": 55.33132530120482, + "grad_norm": 1.6665120124816895, + "learning_rate": 4.610943775100402e-05, + "loss": 0.189, + "step": 9185 + }, + { + "epoch": 55.36144578313253, + "grad_norm": 1.8084211349487305, + "learning_rate": 4.613453815261045e-05, + "loss": 0.2032, + "step": 9190 + }, + { + "epoch": 55.39156626506024, + "grad_norm": 1.4120748043060303, + "learning_rate": 4.615963855421687e-05, + "loss": 0.1611, + "step": 9195 + }, + { + "epoch": 55.42168674698795, + "grad_norm": 1.8941086530685425, + "learning_rate": 4.61847389558233e-05, + "loss": 0.1983, + "step": 9200 + }, + { + "epoch": 55.45180722891566, + "grad_norm": 2.4490437507629395, + "learning_rate": 4.6209839357429716e-05, + "loss": 0.1884, + "step": 9205 + }, + { + "epoch": 55.48192771084337, + "grad_norm": 1.140552282333374, + "learning_rate": 4.6234939759036145e-05, + "loss": 0.2301, + "step": 9210 + }, + { + "epoch": 55.51204819277108, + "grad_norm": 2.700495958328247, + "learning_rate": 4.6260040160642574e-05, + "loss": 0.1932, + "step": 9215 + }, + { + "epoch": 55.54216867469879, + "grad_norm": 1.8277531862258911, + "learning_rate": 4.6285140562248996e-05, + "loss": 0.2021, + "step": 9220 + }, + { + "epoch": 55.5722891566265, + "grad_norm": 1.8265845775604248, + "learning_rate": 4.6310240963855425e-05, + "loss": 0.1984, + "step": 9225 + }, + { + "epoch": 55.602409638554214, + "grad_norm": 2.271312952041626, + "learning_rate": 4.633534136546185e-05, + "loss": 0.2141, + "step": 9230 + }, + { + "epoch": 55.63253012048193, + "grad_norm": 1.8416059017181396, + "learning_rate": 4.6360441767068276e-05, + "loss": 0.1959, + "step": 9235 + }, + { + "epoch": 55.66265060240964, + "grad_norm": 1.6881704330444336, + "learning_rate": 4.63855421686747e-05, + "loss": 0.2153, + "step": 9240 + }, + { + "epoch": 55.69277108433735, + "grad_norm": 1.877532958984375, + "learning_rate": 4.641064257028113e-05, + "loss": 0.1944, + "step": 9245 + }, + { + "epoch": 55.72289156626506, + "grad_norm": 2.533802032470703, + "learning_rate": 4.643574297188755e-05, + "loss": 0.2371, + "step": 9250 + }, + { + "epoch": 55.75301204819277, + "grad_norm": 1.2995754480361938, + "learning_rate": 4.646084337349398e-05, + "loss": 0.2245, + "step": 9255 + }, + { + "epoch": 55.78313253012048, + "grad_norm": 2.5549495220184326, + "learning_rate": 4.64859437751004e-05, + "loss": 0.1839, + "step": 9260 + }, + { + "epoch": 55.81325301204819, + "grad_norm": 3.4564034938812256, + "learning_rate": 4.651104417670683e-05, + "loss": 0.1955, + "step": 9265 + }, + { + "epoch": 55.8433734939759, + "grad_norm": 1.7227681875228882, + "learning_rate": 4.653614457831326e-05, + "loss": 0.1779, + "step": 9270 + }, + { + "epoch": 55.873493975903614, + "grad_norm": 1.8119142055511475, + "learning_rate": 4.656124497991968e-05, + "loss": 0.1914, + "step": 9275 + }, + { + "epoch": 55.903614457831324, + "grad_norm": 1.8299942016601562, + "learning_rate": 4.658634538152611e-05, + "loss": 0.1888, + "step": 9280 + }, + { + "epoch": 55.933734939759034, + "grad_norm": 2.523125648498535, + "learning_rate": 4.661144578313253e-05, + "loss": 0.195, + "step": 9285 + }, + { + "epoch": 55.963855421686745, + "grad_norm": 3.1764206886291504, + "learning_rate": 4.663654618473896e-05, + "loss": 0.2003, + "step": 9290 + }, + { + "epoch": 55.993975903614455, + "grad_norm": 1.5687943696975708, + "learning_rate": 4.6661646586345384e-05, + "loss": 0.2186, + "step": 9295 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.9087691702713331, + "eval_auc": 0.9684418807431753, + "eval_f1": 0.8533501896333755, + "eval_loss": 0.29460248351097107, + "eval_precision": 0.9574468085106383, + "eval_recall": 0.7696693272519954, + "eval_runtime": 18.3085, + "eval_samples_per_second": 138.897, + "eval_steps_per_second": 0.71, + "step": 9296 + }, + { + "epoch": 56.024096385542165, + "grad_norm": 1.6623972654342651, + "learning_rate": 4.668674698795181e-05, + "loss": 0.1697, + "step": 9300 + }, + { + "epoch": 56.05421686746988, + "grad_norm": 2.72373104095459, + "learning_rate": 4.6711847389558236e-05, + "loss": 0.2215, + "step": 9305 + }, + { + "epoch": 56.08433734939759, + "grad_norm": 1.5855753421783447, + "learning_rate": 4.673694779116466e-05, + "loss": 0.1525, + "step": 9310 + }, + { + "epoch": 56.1144578313253, + "grad_norm": 1.6013188362121582, + "learning_rate": 4.676204819277109e-05, + "loss": 0.2053, + "step": 9315 + }, + { + "epoch": 56.144578313253014, + "grad_norm": 2.0472776889801025, + "learning_rate": 4.678714859437751e-05, + "loss": 0.1781, + "step": 9320 + }, + { + "epoch": 56.174698795180724, + "grad_norm": 2.218830108642578, + "learning_rate": 4.681224899598394e-05, + "loss": 0.2152, + "step": 9325 + }, + { + "epoch": 56.204819277108435, + "grad_norm": 1.771793246269226, + "learning_rate": 4.683734939759036e-05, + "loss": 0.1593, + "step": 9330 + }, + { + "epoch": 56.234939759036145, + "grad_norm": 1.8977620601654053, + "learning_rate": 4.686244979919679e-05, + "loss": 0.1865, + "step": 9335 + }, + { + "epoch": 56.265060240963855, + "grad_norm": 1.4574172496795654, + "learning_rate": 4.688755020080321e-05, + "loss": 0.1508, + "step": 9340 + }, + { + "epoch": 56.295180722891565, + "grad_norm": 2.013129949569702, + "learning_rate": 4.691265060240964e-05, + "loss": 0.1881, + "step": 9345 + }, + { + "epoch": 56.325301204819276, + "grad_norm": 1.7704567909240723, + "learning_rate": 4.693775100401607e-05, + "loss": 0.1996, + "step": 9350 + }, + { + "epoch": 56.355421686746986, + "grad_norm": 1.887007713317871, + "learning_rate": 4.696285140562249e-05, + "loss": 0.1963, + "step": 9355 + }, + { + "epoch": 56.3855421686747, + "grad_norm": 1.9213777780532837, + "learning_rate": 4.698795180722892e-05, + "loss": 0.1655, + "step": 9360 + }, + { + "epoch": 56.41566265060241, + "grad_norm": 2.012073278427124, + "learning_rate": 4.7013052208835344e-05, + "loss": 0.138, + "step": 9365 + }, + { + "epoch": 56.44578313253012, + "grad_norm": 2.9040980339050293, + "learning_rate": 4.703815261044177e-05, + "loss": 0.1949, + "step": 9370 + }, + { + "epoch": 56.475903614457835, + "grad_norm": 1.981165885925293, + "learning_rate": 4.7063253012048195e-05, + "loss": 0.1696, + "step": 9375 + }, + { + "epoch": 56.506024096385545, + "grad_norm": 2.3808298110961914, + "learning_rate": 4.7088353413654624e-05, + "loss": 0.2098, + "step": 9380 + }, + { + "epoch": 56.536144578313255, + "grad_norm": 2.0263671875, + "learning_rate": 4.7113453815261047e-05, + "loss": 0.1811, + "step": 9385 + }, + { + "epoch": 56.566265060240966, + "grad_norm": 1.884153127670288, + "learning_rate": 4.713855421686747e-05, + "loss": 0.2036, + "step": 9390 + }, + { + "epoch": 56.596385542168676, + "grad_norm": 1.0312684774398804, + "learning_rate": 4.71636546184739e-05, + "loss": 0.1657, + "step": 9395 + }, + { + "epoch": 56.626506024096386, + "grad_norm": 2.476505994796753, + "learning_rate": 4.718875502008032e-05, + "loss": 0.1784, + "step": 9400 + }, + { + "epoch": 56.6566265060241, + "grad_norm": 3.1120269298553467, + "learning_rate": 4.721385542168675e-05, + "loss": 0.1962, + "step": 9405 + }, + { + "epoch": 56.68674698795181, + "grad_norm": 2.2251052856445312, + "learning_rate": 4.723895582329317e-05, + "loss": 0.2168, + "step": 9410 + }, + { + "epoch": 56.71686746987952, + "grad_norm": 1.7547193765640259, + "learning_rate": 4.72640562248996e-05, + "loss": 0.1842, + "step": 9415 + }, + { + "epoch": 56.74698795180723, + "grad_norm": 2.9130563735961914, + "learning_rate": 4.728915662650602e-05, + "loss": 0.2157, + "step": 9420 + }, + { + "epoch": 56.77710843373494, + "grad_norm": 2.223615884780884, + "learning_rate": 4.731425702811245e-05, + "loss": 0.1958, + "step": 9425 + }, + { + "epoch": 56.80722891566265, + "grad_norm": 2.0641613006591797, + "learning_rate": 4.733935742971888e-05, + "loss": 0.1992, + "step": 9430 + }, + { + "epoch": 56.83734939759036, + "grad_norm": 1.5671474933624268, + "learning_rate": 4.73644578313253e-05, + "loss": 0.1937, + "step": 9435 + }, + { + "epoch": 56.86746987951807, + "grad_norm": 1.8134636878967285, + "learning_rate": 4.738955823293173e-05, + "loss": 0.2251, + "step": 9440 + }, + { + "epoch": 56.897590361445786, + "grad_norm": 1.9263204336166382, + "learning_rate": 4.7414658634538155e-05, + "loss": 0.1919, + "step": 9445 + }, + { + "epoch": 56.9277108433735, + "grad_norm": 1.2124485969543457, + "learning_rate": 4.7439759036144584e-05, + "loss": 0.172, + "step": 9450 + }, + { + "epoch": 56.95783132530121, + "grad_norm": 1.6786320209503174, + "learning_rate": 4.7464859437751006e-05, + "loss": 0.1771, + "step": 9455 + }, + { + "epoch": 56.98795180722892, + "grad_norm": 2.007603883743286, + "learning_rate": 4.7489959839357435e-05, + "loss": 0.1914, + "step": 9460 + }, + { + "epoch": 57.0, + "eval_accuracy": 0.94494691309477, + "eval_auc": 0.9780734414632444, + "eval_f1": 0.9185098952270081, + "eval_loss": 0.1861359030008316, + "eval_precision": 0.9381688466111772, + "eval_recall": 0.8996579247434435, + "eval_runtime": 17.0537, + "eval_samples_per_second": 149.117, + "eval_steps_per_second": 0.762, + "step": 9462 + }, + { + "epoch": 57.01807228915663, + "grad_norm": 1.681352972984314, + "learning_rate": 4.751506024096386e-05, + "loss": 0.1718, + "step": 9465 + }, + { + "epoch": 57.04819277108434, + "grad_norm": 2.399091958999634, + "learning_rate": 4.7540160642570286e-05, + "loss": 0.171, + "step": 9470 + }, + { + "epoch": 57.07831325301205, + "grad_norm": 1.1098817586898804, + "learning_rate": 4.756526104417671e-05, + "loss": 0.2415, + "step": 9475 + }, + { + "epoch": 57.10843373493976, + "grad_norm": 3.2166264057159424, + "learning_rate": 4.759036144578313e-05, + "loss": 0.2354, + "step": 9480 + }, + { + "epoch": 57.13855421686747, + "grad_norm": 2.6321725845336914, + "learning_rate": 4.761546184738956e-05, + "loss": 0.1686, + "step": 9485 + }, + { + "epoch": 57.16867469879518, + "grad_norm": 1.4256139993667603, + "learning_rate": 4.764056224899598e-05, + "loss": 0.1904, + "step": 9490 + }, + { + "epoch": 57.19879518072289, + "grad_norm": 1.756381630897522, + "learning_rate": 4.766566265060241e-05, + "loss": 0.2274, + "step": 9495 + }, + { + "epoch": 57.2289156626506, + "grad_norm": 2.3295838832855225, + "learning_rate": 4.7690763052208834e-05, + "loss": 0.1896, + "step": 9500 + }, + { + "epoch": 57.25903614457831, + "grad_norm": 1.5230826139450073, + "learning_rate": 4.771586345381526e-05, + "loss": 0.1804, + "step": 9505 + }, + { + "epoch": 57.28915662650602, + "grad_norm": 2.0946292877197266, + "learning_rate": 4.774096385542169e-05, + "loss": 0.1768, + "step": 9510 + }, + { + "epoch": 57.31927710843374, + "grad_norm": 2.077497959136963, + "learning_rate": 4.7766064257028114e-05, + "loss": 0.1733, + "step": 9515 + }, + { + "epoch": 57.34939759036145, + "grad_norm": 2.689943313598633, + "learning_rate": 4.779116465863454e-05, + "loss": 0.1704, + "step": 9520 + }, + { + "epoch": 57.37951807228916, + "grad_norm": 1.5451295375823975, + "learning_rate": 4.7816265060240965e-05, + "loss": 0.2037, + "step": 9525 + }, + { + "epoch": 57.40963855421687, + "grad_norm": 2.226560115814209, + "learning_rate": 4.7841365461847394e-05, + "loss": 0.1868, + "step": 9530 + }, + { + "epoch": 57.43975903614458, + "grad_norm": 1.874423861503601, + "learning_rate": 4.786646586345382e-05, + "loss": 0.1626, + "step": 9535 + }, + { + "epoch": 57.46987951807229, + "grad_norm": 1.8824931383132935, + "learning_rate": 4.7891566265060246e-05, + "loss": 0.1873, + "step": 9540 + }, + { + "epoch": 57.5, + "grad_norm": 2.245616912841797, + "learning_rate": 4.791666666666667e-05, + "loss": 0.1997, + "step": 9545 + }, + { + "epoch": 57.53012048192771, + "grad_norm": 1.600521206855774, + "learning_rate": 4.79417670682731e-05, + "loss": 0.1621, + "step": 9550 + }, + { + "epoch": 57.56024096385542, + "grad_norm": 1.4455299377441406, + "learning_rate": 4.796686746987952e-05, + "loss": 0.1607, + "step": 9555 + }, + { + "epoch": 57.59036144578313, + "grad_norm": 2.1505472660064697, + "learning_rate": 4.799196787148594e-05, + "loss": 0.1754, + "step": 9560 + }, + { + "epoch": 57.62048192771084, + "grad_norm": 2.4378881454467773, + "learning_rate": 4.801706827309237e-05, + "loss": 0.2255, + "step": 9565 + }, + { + "epoch": 57.65060240963855, + "grad_norm": 3.2311930656433105, + "learning_rate": 4.804216867469879e-05, + "loss": 0.225, + "step": 9570 + }, + { + "epoch": 57.68072289156626, + "grad_norm": 1.9619172811508179, + "learning_rate": 4.806726907630522e-05, + "loss": 0.2063, + "step": 9575 + }, + { + "epoch": 57.71084337349397, + "grad_norm": 2.327970504760742, + "learning_rate": 4.8092369477911644e-05, + "loss": 0.2296, + "step": 9580 + }, + { + "epoch": 57.74096385542169, + "grad_norm": 2.125100612640381, + "learning_rate": 4.8117469879518074e-05, + "loss": 0.161, + "step": 9585 + }, + { + "epoch": 57.7710843373494, + "grad_norm": 1.8094075918197632, + "learning_rate": 4.81425702811245e-05, + "loss": 0.1591, + "step": 9590 + }, + { + "epoch": 57.80120481927711, + "grad_norm": 2.7225515842437744, + "learning_rate": 4.8167670682730925e-05, + "loss": 0.2082, + "step": 9595 + }, + { + "epoch": 57.83132530120482, + "grad_norm": 2.394782066345215, + "learning_rate": 4.8192771084337354e-05, + "loss": 0.1697, + "step": 9600 + }, + { + "epoch": 57.86144578313253, + "grad_norm": 2.6002819538116455, + "learning_rate": 4.8217871485943776e-05, + "loss": 0.1993, + "step": 9605 + }, + { + "epoch": 57.89156626506024, + "grad_norm": 2.59199857711792, + "learning_rate": 4.8242971887550205e-05, + "loss": 0.1996, + "step": 9610 + }, + { + "epoch": 57.92168674698795, + "grad_norm": 1.7686564922332764, + "learning_rate": 4.826807228915663e-05, + "loss": 0.2004, + "step": 9615 + }, + { + "epoch": 57.95180722891566, + "grad_norm": 1.7516053915023804, + "learning_rate": 4.8293172690763057e-05, + "loss": 0.1889, + "step": 9620 + }, + { + "epoch": 57.98192771084337, + "grad_norm": 2.2338221073150635, + "learning_rate": 4.831827309236948e-05, + "loss": 0.195, + "step": 9625 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.9189933149823044, + "eval_auc": 0.9644585314171279, + "eval_f1": 0.8740831295843521, + "eval_loss": 0.26911577582359314, + "eval_precision": 0.9420289855072463, + "eval_recall": 0.8152793614595211, + "eval_runtime": 17.0243, + "eval_samples_per_second": 149.374, + "eval_steps_per_second": 0.764, + "step": 9628 + }, + { + "epoch": 58.01204819277108, + "grad_norm": 0.897562563419342, + "learning_rate": 4.834337349397591e-05, + "loss": 0.1596, + "step": 9630 + }, + { + "epoch": 58.04216867469879, + "grad_norm": 1.6326240301132202, + "learning_rate": 4.836847389558233e-05, + "loss": 0.1571, + "step": 9635 + }, + { + "epoch": 58.0722891566265, + "grad_norm": 1.8651673793792725, + "learning_rate": 4.839357429718876e-05, + "loss": 0.1737, + "step": 9640 + }, + { + "epoch": 58.102409638554214, + "grad_norm": 2.2550816535949707, + "learning_rate": 4.841867469879519e-05, + "loss": 0.1726, + "step": 9645 + }, + { + "epoch": 58.13253012048193, + "grad_norm": 1.8229386806488037, + "learning_rate": 4.8443775100401604e-05, + "loss": 0.1772, + "step": 9650 + }, + { + "epoch": 58.16265060240964, + "grad_norm": 2.511829376220703, + "learning_rate": 4.846887550200803e-05, + "loss": 0.1739, + "step": 9655 + }, + { + "epoch": 58.19277108433735, + "grad_norm": 1.3670412302017212, + "learning_rate": 4.8493975903614455e-05, + "loss": 0.1759, + "step": 9660 + }, + { + "epoch": 58.22289156626506, + "grad_norm": 1.1315114498138428, + "learning_rate": 4.8519076305220884e-05, + "loss": 0.177, + "step": 9665 + }, + { + "epoch": 58.25301204819277, + "grad_norm": 1.233046293258667, + "learning_rate": 4.854417670682731e-05, + "loss": 0.1466, + "step": 9670 + }, + { + "epoch": 58.28313253012048, + "grad_norm": 2.8945133686065674, + "learning_rate": 4.8569277108433736e-05, + "loss": 0.171, + "step": 9675 + }, + { + "epoch": 58.31325301204819, + "grad_norm": 2.6257240772247314, + "learning_rate": 4.8594377510040165e-05, + "loss": 0.1765, + "step": 9680 + }, + { + "epoch": 58.3433734939759, + "grad_norm": 2.419882297515869, + "learning_rate": 4.861947791164659e-05, + "loss": 0.1816, + "step": 9685 + }, + { + "epoch": 58.373493975903614, + "grad_norm": 2.089569091796875, + "learning_rate": 4.8644578313253016e-05, + "loss": 0.1584, + "step": 9690 + }, + { + "epoch": 58.403614457831324, + "grad_norm": 1.8195586204528809, + "learning_rate": 4.866967871485944e-05, + "loss": 0.1887, + "step": 9695 + }, + { + "epoch": 58.433734939759034, + "grad_norm": 2.7612900733947754, + "learning_rate": 4.869477911646587e-05, + "loss": 0.1992, + "step": 9700 + }, + { + "epoch": 58.463855421686745, + "grad_norm": 1.2157365083694458, + "learning_rate": 4.871987951807229e-05, + "loss": 0.196, + "step": 9705 + }, + { + "epoch": 58.493975903614455, + "grad_norm": 1.898940086364746, + "learning_rate": 4.874497991967872e-05, + "loss": 0.1826, + "step": 9710 + }, + { + "epoch": 58.524096385542165, + "grad_norm": 2.632331609725952, + "learning_rate": 4.877008032128514e-05, + "loss": 0.1896, + "step": 9715 + }, + { + "epoch": 58.55421686746988, + "grad_norm": 1.7673542499542236, + "learning_rate": 4.879518072289157e-05, + "loss": 0.1877, + "step": 9720 + }, + { + "epoch": 58.58433734939759, + "grad_norm": 2.230239152908325, + "learning_rate": 4.8820281124498e-05, + "loss": 0.2212, + "step": 9725 + }, + { + "epoch": 58.6144578313253, + "grad_norm": 1.7951022386550903, + "learning_rate": 4.884538152610442e-05, + "loss": 0.2038, + "step": 9730 + }, + { + "epoch": 58.644578313253014, + "grad_norm": 1.6010494232177734, + "learning_rate": 4.887048192771085e-05, + "loss": 0.1843, + "step": 9735 + }, + { + "epoch": 58.674698795180724, + "grad_norm": 3.0509254932403564, + "learning_rate": 4.8895582329317266e-05, + "loss": 0.2334, + "step": 9740 + }, + { + "epoch": 58.704819277108435, + "grad_norm": 2.3465235233306885, + "learning_rate": 4.8920682730923695e-05, + "loss": 0.2078, + "step": 9745 + }, + { + "epoch": 58.734939759036145, + "grad_norm": 2.3988800048828125, + "learning_rate": 4.8945783132530124e-05, + "loss": 0.1669, + "step": 9750 + }, + { + "epoch": 58.765060240963855, + "grad_norm": 2.1776366233825684, + "learning_rate": 4.8970883534136546e-05, + "loss": 0.2175, + "step": 9755 + }, + { + "epoch": 58.795180722891565, + "grad_norm": 3.2049734592437744, + "learning_rate": 4.8995983935742975e-05, + "loss": 0.2254, + "step": 9760 + }, + { + "epoch": 58.825301204819276, + "grad_norm": 2.559453010559082, + "learning_rate": 4.90210843373494e-05, + "loss": 0.2251, + "step": 9765 + }, + { + "epoch": 58.855421686746986, + "grad_norm": 1.55681574344635, + "learning_rate": 4.904618473895583e-05, + "loss": 0.2225, + "step": 9770 + }, + { + "epoch": 58.8855421686747, + "grad_norm": 2.9513471126556396, + "learning_rate": 4.907128514056225e-05, + "loss": 0.2351, + "step": 9775 + }, + { + "epoch": 58.91566265060241, + "grad_norm": 1.4565945863723755, + "learning_rate": 4.909638554216868e-05, + "loss": 0.19, + "step": 9780 + }, + { + "epoch": 58.94578313253012, + "grad_norm": 2.205860137939453, + "learning_rate": 4.91214859437751e-05, + "loss": 0.1828, + "step": 9785 + }, + { + "epoch": 58.975903614457835, + "grad_norm": 1.4514609575271606, + "learning_rate": 4.914658634538153e-05, + "loss": 0.1996, + "step": 9790 + }, + { + "epoch": 59.0, + "eval_accuracy": 0.9315768777034998, + "eval_auc": 0.9756372332285252, + "eval_f1": 0.8941605839416058, + "eval_loss": 0.23759578168392181, + "eval_precision": 0.9582790091264668, + "eval_recall": 0.8380843785632839, + "eval_runtime": 17.2202, + "eval_samples_per_second": 147.676, + "eval_steps_per_second": 0.755, + "step": 9794 + }, + { + "epoch": 59.006024096385545, + "grad_norm": 1.3549339771270752, + "learning_rate": 4.917168674698795e-05, + "loss": 0.1943, + "step": 9795 + }, + { + "epoch": 59.036144578313255, + "grad_norm": 1.6545672416687012, + "learning_rate": 4.919678714859438e-05, + "loss": 0.1442, + "step": 9800 + }, + { + "epoch": 59.066265060240966, + "grad_norm": 3.495600461959839, + "learning_rate": 4.922188755020081e-05, + "loss": 0.1783, + "step": 9805 + }, + { + "epoch": 59.096385542168676, + "grad_norm": 1.1288702487945557, + "learning_rate": 4.924698795180723e-05, + "loss": 0.1477, + "step": 9810 + }, + { + "epoch": 59.126506024096386, + "grad_norm": 2.1384871006011963, + "learning_rate": 4.927208835341366e-05, + "loss": 0.2319, + "step": 9815 + }, + { + "epoch": 59.1566265060241, + "grad_norm": 2.3611960411071777, + "learning_rate": 4.9297188755020084e-05, + "loss": 0.1767, + "step": 9820 + }, + { + "epoch": 59.18674698795181, + "grad_norm": 2.8789052963256836, + "learning_rate": 4.932228915662651e-05, + "loss": 0.1613, + "step": 9825 + }, + { + "epoch": 59.21686746987952, + "grad_norm": 1.6236155033111572, + "learning_rate": 4.9347389558232935e-05, + "loss": 0.2431, + "step": 9830 + }, + { + "epoch": 59.24698795180723, + "grad_norm": 1.9372285604476929, + "learning_rate": 4.937248995983936e-05, + "loss": 0.1726, + "step": 9835 + }, + { + "epoch": 59.27710843373494, + "grad_norm": 2.4922103881835938, + "learning_rate": 4.9397590361445786e-05, + "loss": 0.195, + "step": 9840 + }, + { + "epoch": 59.30722891566265, + "grad_norm": 2.4770331382751465, + "learning_rate": 4.942269076305221e-05, + "loss": 0.1671, + "step": 9845 + }, + { + "epoch": 59.33734939759036, + "grad_norm": 2.4791817665100098, + "learning_rate": 4.944779116465864e-05, + "loss": 0.1679, + "step": 9850 + }, + { + "epoch": 59.36746987951807, + "grad_norm": 1.4872862100601196, + "learning_rate": 4.947289156626506e-05, + "loss": 0.148, + "step": 9855 + }, + { + "epoch": 59.397590361445786, + "grad_norm": 1.551857829093933, + "learning_rate": 4.949799196787149e-05, + "loss": 0.1942, + "step": 9860 + }, + { + "epoch": 59.4277108433735, + "grad_norm": 2.071345806121826, + "learning_rate": 4.952309236947791e-05, + "loss": 0.1539, + "step": 9865 + }, + { + "epoch": 59.45783132530121, + "grad_norm": 2.5802876949310303, + "learning_rate": 4.954819277108434e-05, + "loss": 0.1834, + "step": 9870 + }, + { + "epoch": 59.48795180722892, + "grad_norm": 1.9511005878448486, + "learning_rate": 4.957329317269076e-05, + "loss": 0.2367, + "step": 9875 + }, + { + "epoch": 59.51807228915663, + "grad_norm": 2.43558669090271, + "learning_rate": 4.959839357429719e-05, + "loss": 0.1732, + "step": 9880 + }, + { + "epoch": 59.54819277108434, + "grad_norm": 2.888108015060425, + "learning_rate": 4.962349397590362e-05, + "loss": 0.1688, + "step": 9885 + }, + { + "epoch": 59.57831325301205, + "grad_norm": 1.2000738382339478, + "learning_rate": 4.964859437751004e-05, + "loss": 0.1844, + "step": 9890 + }, + { + "epoch": 59.60843373493976, + "grad_norm": 1.5006636381149292, + "learning_rate": 4.967369477911647e-05, + "loss": 0.2022, + "step": 9895 + }, + { + "epoch": 59.63855421686747, + "grad_norm": 1.8979883193969727, + "learning_rate": 4.9698795180722894e-05, + "loss": 0.1858, + "step": 9900 + }, + { + "epoch": 59.66867469879518, + "grad_norm": 1.771418809890747, + "learning_rate": 4.972389558232932e-05, + "loss": 0.1665, + "step": 9905 + }, + { + "epoch": 59.69879518072289, + "grad_norm": 1.7620320320129395, + "learning_rate": 4.9748995983935746e-05, + "loss": 0.1907, + "step": 9910 + }, + { + "epoch": 59.7289156626506, + "grad_norm": 1.8053280115127563, + "learning_rate": 4.9774096385542175e-05, + "loss": 0.1822, + "step": 9915 + }, + { + "epoch": 59.75903614457831, + "grad_norm": 1.4249768257141113, + "learning_rate": 4.97991967871486e-05, + "loss": 0.1966, + "step": 9920 + }, + { + "epoch": 59.78915662650603, + "grad_norm": 1.8042192459106445, + "learning_rate": 4.982429718875502e-05, + "loss": 0.1919, + "step": 9925 + }, + { + "epoch": 59.81927710843374, + "grad_norm": 1.9030768871307373, + "learning_rate": 4.984939759036145e-05, + "loss": 0.1794, + "step": 9930 + }, + { + "epoch": 59.84939759036145, + "grad_norm": 1.8618736267089844, + "learning_rate": 4.987449799196787e-05, + "loss": 0.18, + "step": 9935 + }, + { + "epoch": 59.87951807228916, + "grad_norm": 3.4829823970794678, + "learning_rate": 4.98995983935743e-05, + "loss": 0.1943, + "step": 9940 + }, + { + "epoch": 59.90963855421687, + "grad_norm": 2.407097101211548, + "learning_rate": 4.992469879518072e-05, + "loss": 0.2142, + "step": 9945 + }, + { + "epoch": 59.93975903614458, + "grad_norm": 1.5124931335449219, + "learning_rate": 4.994979919678715e-05, + "loss": 0.1798, + "step": 9950 + }, + { + "epoch": 59.96987951807229, + "grad_norm": 1.4459714889526367, + "learning_rate": 4.997489959839357e-05, + "loss": 0.1883, + "step": 9955 + }, + { + "epoch": 60.0, + "grad_norm": 1.3064192533493042, + "learning_rate": 5e-05, + "loss": 0.1891, + "step": 9960 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.9327565867086118, + "eval_auc": 0.9759270869123019, + "eval_f1": 0.8968014484007242, + "eval_loss": 0.2146846204996109, + "eval_precision": 0.9525641025641025, + "eval_recall": 0.8472063854047891, + "eval_runtime": 17.0585, + "eval_samples_per_second": 149.075, + "eval_steps_per_second": 0.762, + "step": 9960 + }, + { + "epoch": 60.03012048192771, + "grad_norm": 1.8910998106002808, + "learning_rate": 4.9997211066488176e-05, + "loss": 0.1941, + "step": 9965 + }, + { + "epoch": 60.06024096385542, + "grad_norm": 1.4629839658737183, + "learning_rate": 4.999442213297636e-05, + "loss": 0.1729, + "step": 9970 + }, + { + "epoch": 60.09036144578313, + "grad_norm": 2.8674991130828857, + "learning_rate": 4.9991633199464524e-05, + "loss": 0.1982, + "step": 9975 + }, + { + "epoch": 60.12048192771084, + "grad_norm": 2.215824842453003, + "learning_rate": 4.99888442659527e-05, + "loss": 0.187, + "step": 9980 + }, + { + "epoch": 60.15060240963855, + "grad_norm": 1.5279314517974854, + "learning_rate": 4.998605533244088e-05, + "loss": 0.1858, + "step": 9985 + }, + { + "epoch": 60.18072289156626, + "grad_norm": 2.4290390014648438, + "learning_rate": 4.998326639892905e-05, + "loss": 0.1554, + "step": 9990 + }, + { + "epoch": 60.21084337349398, + "grad_norm": 1.4494363069534302, + "learning_rate": 4.9980477465417226e-05, + "loss": 0.1648, + "step": 9995 + }, + { + "epoch": 60.24096385542169, + "grad_norm": 3.501488208770752, + "learning_rate": 4.9977688531905406e-05, + "loss": 0.304, + "step": 10000 + }, + { + "epoch": 60.2710843373494, + "grad_norm": 1.657923936843872, + "learning_rate": 4.997489959839357e-05, + "loss": 0.1939, + "step": 10005 + }, + { + "epoch": 60.30120481927711, + "grad_norm": 1.2460441589355469, + "learning_rate": 4.997211066488175e-05, + "loss": 0.1355, + "step": 10010 + }, + { + "epoch": 60.33132530120482, + "grad_norm": 2.036921739578247, + "learning_rate": 4.996932173136993e-05, + "loss": 0.1821, + "step": 10015 + }, + { + "epoch": 60.36144578313253, + "grad_norm": 2.1364831924438477, + "learning_rate": 4.99665327978581e-05, + "loss": 0.1886, + "step": 10020 + }, + { + "epoch": 60.39156626506024, + "grad_norm": 1.8020092248916626, + "learning_rate": 4.9963743864346275e-05, + "loss": 0.197, + "step": 10025 + }, + { + "epoch": 60.42168674698795, + "grad_norm": 1.8530651330947876, + "learning_rate": 4.9960954930834456e-05, + "loss": 0.1697, + "step": 10030 + }, + { + "epoch": 60.45180722891566, + "grad_norm": 1.9424024820327759, + "learning_rate": 4.995816599732262e-05, + "loss": 0.164, + "step": 10035 + }, + { + "epoch": 60.48192771084337, + "grad_norm": 1.4572926759719849, + "learning_rate": 4.99553770638108e-05, + "loss": 0.155, + "step": 10040 + }, + { + "epoch": 60.51204819277108, + "grad_norm": 1.6664016246795654, + "learning_rate": 4.995258813029898e-05, + "loss": 0.1671, + "step": 10045 + }, + { + "epoch": 60.54216867469879, + "grad_norm": 2.254645347595215, + "learning_rate": 4.994979919678715e-05, + "loss": 0.1583, + "step": 10050 + }, + { + "epoch": 60.5722891566265, + "grad_norm": 2.37227725982666, + "learning_rate": 4.9947010263275325e-05, + "loss": 0.1317, + "step": 10055 + }, + { + "epoch": 60.602409638554214, + "grad_norm": 1.5512620210647583, + "learning_rate": 4.99442213297635e-05, + "loss": 0.1791, + "step": 10060 + }, + { + "epoch": 60.63253012048193, + "grad_norm": 2.3960814476013184, + "learning_rate": 4.994143239625167e-05, + "loss": 0.1995, + "step": 10065 + }, + { + "epoch": 60.66265060240964, + "grad_norm": 2.3821523189544678, + "learning_rate": 4.9938643462739846e-05, + "loss": 0.2163, + "step": 10070 + }, + { + "epoch": 60.69277108433735, + "grad_norm": 1.7303850650787354, + "learning_rate": 4.993585452922803e-05, + "loss": 0.2091, + "step": 10075 + }, + { + "epoch": 60.72289156626506, + "grad_norm": 1.8073005676269531, + "learning_rate": 4.99330655957162e-05, + "loss": 0.1943, + "step": 10080 + }, + { + "epoch": 60.75301204819277, + "grad_norm": 1.7040612697601318, + "learning_rate": 4.9930276662204374e-05, + "loss": 0.1603, + "step": 10085 + }, + { + "epoch": 60.78313253012048, + "grad_norm": 1.420932650566101, + "learning_rate": 4.992748772869255e-05, + "loss": 0.1601, + "step": 10090 + }, + { + "epoch": 60.81325301204819, + "grad_norm": 1.76956307888031, + "learning_rate": 4.992469879518072e-05, + "loss": 0.2245, + "step": 10095 + }, + { + "epoch": 60.8433734939759, + "grad_norm": 1.641416311264038, + "learning_rate": 4.99219098616689e-05, + "loss": 0.1664, + "step": 10100 + }, + { + "epoch": 60.873493975903614, + "grad_norm": 1.379858136177063, + "learning_rate": 4.9919120928157076e-05, + "loss": 0.141, + "step": 10105 + }, + { + "epoch": 60.903614457831324, + "grad_norm": 1.7981139421463013, + "learning_rate": 4.991633199464525e-05, + "loss": 0.1728, + "step": 10110 + }, + { + "epoch": 60.933734939759034, + "grad_norm": 1.6015158891677856, + "learning_rate": 4.991354306113343e-05, + "loss": 0.178, + "step": 10115 + }, + { + "epoch": 60.963855421686745, + "grad_norm": 2.2964370250701904, + "learning_rate": 4.99107541276216e-05, + "loss": 0.1792, + "step": 10120 + }, + { + "epoch": 60.993975903614455, + "grad_norm": 2.3170790672302246, + "learning_rate": 4.990796519410977e-05, + "loss": 0.1353, + "step": 10125 + }, + { + "epoch": 61.0, + "eval_accuracy": 0.9158474243020055, + "eval_auc": 0.9712860058504588, + "eval_f1": 0.8654088050314466, + "eval_loss": 0.29633164405822754, + "eval_precision": 0.9649368863955119, + "eval_recall": 0.7844925883694412, + "eval_runtime": 16.6957, + "eval_samples_per_second": 152.315, + "eval_steps_per_second": 0.779, + "step": 10126 + }, + { + "epoch": 61.024096385542165, + "grad_norm": 1.8975228071212769, + "learning_rate": 4.990517626059795e-05, + "loss": 0.1957, + "step": 10130 + }, + { + "epoch": 61.05421686746988, + "grad_norm": 1.653012990951538, + "learning_rate": 4.9902387327086126e-05, + "loss": 0.1494, + "step": 10135 + }, + { + "epoch": 61.08433734939759, + "grad_norm": 1.3063195943832397, + "learning_rate": 4.98995983935743e-05, + "loss": 0.1789, + "step": 10140 + }, + { + "epoch": 61.1144578313253, + "grad_norm": 2.9485018253326416, + "learning_rate": 4.989680946006248e-05, + "loss": 0.1966, + "step": 10145 + }, + { + "epoch": 61.144578313253014, + "grad_norm": 2.3813064098358154, + "learning_rate": 4.989402052655065e-05, + "loss": 0.1674, + "step": 10150 + }, + { + "epoch": 61.174698795180724, + "grad_norm": 2.1475465297698975, + "learning_rate": 4.989123159303882e-05, + "loss": 0.1944, + "step": 10155 + }, + { + "epoch": 61.204819277108435, + "grad_norm": 2.3563380241394043, + "learning_rate": 4.9888442659527e-05, + "loss": 0.2116, + "step": 10160 + }, + { + "epoch": 61.234939759036145, + "grad_norm": 2.4149973392486572, + "learning_rate": 4.9885653726015175e-05, + "loss": 0.2233, + "step": 10165 + }, + { + "epoch": 61.265060240963855, + "grad_norm": 1.2588717937469482, + "learning_rate": 4.988286479250335e-05, + "loss": 0.1677, + "step": 10170 + }, + { + "epoch": 61.295180722891565, + "grad_norm": 1.472192645072937, + "learning_rate": 4.988007585899152e-05, + "loss": 0.1813, + "step": 10175 + }, + { + "epoch": 61.325301204819276, + "grad_norm": 1.9705734252929688, + "learning_rate": 4.98772869254797e-05, + "loss": 0.1787, + "step": 10180 + }, + { + "epoch": 61.355421686746986, + "grad_norm": 1.8915636539459229, + "learning_rate": 4.987449799196787e-05, + "loss": 0.1878, + "step": 10185 + }, + { + "epoch": 61.3855421686747, + "grad_norm": 1.9380899667739868, + "learning_rate": 4.987170905845605e-05, + "loss": 0.1863, + "step": 10190 + }, + { + "epoch": 61.41566265060241, + "grad_norm": 2.0398693084716797, + "learning_rate": 4.9868920124944225e-05, + "loss": 0.1687, + "step": 10195 + }, + { + "epoch": 61.44578313253012, + "grad_norm": 1.4080811738967896, + "learning_rate": 4.98661311914324e-05, + "loss": 0.1652, + "step": 10200 + }, + { + "epoch": 61.475903614457835, + "grad_norm": 2.3035523891448975, + "learning_rate": 4.986334225792057e-05, + "loss": 0.1546, + "step": 10205 + }, + { + "epoch": 61.506024096385545, + "grad_norm": 2.251307964324951, + "learning_rate": 4.9860553324408746e-05, + "loss": 0.2048, + "step": 10210 + }, + { + "epoch": 61.536144578313255, + "grad_norm": 1.9995403289794922, + "learning_rate": 4.985776439089692e-05, + "loss": 0.2228, + "step": 10215 + }, + { + "epoch": 61.566265060240966, + "grad_norm": 1.5359236001968384, + "learning_rate": 4.98549754573851e-05, + "loss": 0.1831, + "step": 10220 + }, + { + "epoch": 61.596385542168676, + "grad_norm": 2.048762083053589, + "learning_rate": 4.9852186523873275e-05, + "loss": 0.1967, + "step": 10225 + }, + { + "epoch": 61.626506024096386, + "grad_norm": 1.4467693567276, + "learning_rate": 4.984939759036145e-05, + "loss": 0.2185, + "step": 10230 + }, + { + "epoch": 61.6566265060241, + "grad_norm": 1.6843082904815674, + "learning_rate": 4.984660865684962e-05, + "loss": 0.1966, + "step": 10235 + }, + { + "epoch": 61.68674698795181, + "grad_norm": 1.7061634063720703, + "learning_rate": 4.9843819723337796e-05, + "loss": 0.1718, + "step": 10240 + }, + { + "epoch": 61.71686746987952, + "grad_norm": 1.6093111038208008, + "learning_rate": 4.984103078982597e-05, + "loss": 0.1719, + "step": 10245 + }, + { + "epoch": 61.74698795180723, + "grad_norm": 1.2672582864761353, + "learning_rate": 4.983824185631415e-05, + "loss": 0.1679, + "step": 10250 + }, + { + "epoch": 61.77710843373494, + "grad_norm": 1.7078900337219238, + "learning_rate": 4.9835452922802324e-05, + "loss": 0.1701, + "step": 10255 + }, + { + "epoch": 61.80722891566265, + "grad_norm": 1.8595340251922607, + "learning_rate": 4.98326639892905e-05, + "loss": 0.155, + "step": 10260 + }, + { + "epoch": 61.83734939759036, + "grad_norm": 2.7607834339141846, + "learning_rate": 4.982987505577867e-05, + "loss": 0.2112, + "step": 10265 + }, + { + "epoch": 61.86746987951807, + "grad_norm": 2.582641839981079, + "learning_rate": 4.9827086122266845e-05, + "loss": 0.2258, + "step": 10270 + }, + { + "epoch": 61.897590361445786, + "grad_norm": 1.8134492635726929, + "learning_rate": 4.982429718875502e-05, + "loss": 0.2046, + "step": 10275 + }, + { + "epoch": 61.9277108433735, + "grad_norm": 1.6217050552368164, + "learning_rate": 4.98215082552432e-05, + "loss": 0.1539, + "step": 10280 + }, + { + "epoch": 61.95783132530121, + "grad_norm": 2.1390674114227295, + "learning_rate": 4.9818719321731374e-05, + "loss": 0.1442, + "step": 10285 + }, + { + "epoch": 61.98795180722892, + "grad_norm": 1.8469104766845703, + "learning_rate": 4.981593038821955e-05, + "loss": 0.1799, + "step": 10290 + }, + { + "epoch": 62.0, + "eval_accuracy": 0.9264648053480141, + "eval_auc": 0.9765399888575726, + "eval_f1": 0.8856269113149847, + "eval_loss": 0.24341483414173126, + "eval_precision": 0.9551451187335093, + "eval_recall": 0.8255416191562144, + "eval_runtime": 19.8091, + "eval_samples_per_second": 128.375, + "eval_steps_per_second": 0.656, + "step": 10292 + }, + { + "epoch": 62.01807228915663, + "grad_norm": 1.5650662183761597, + "learning_rate": 4.981314145470772e-05, + "loss": 0.172, + "step": 10295 + }, + { + "epoch": 62.04819277108434, + "grad_norm": 1.2761918306350708, + "learning_rate": 4.9810352521195895e-05, + "loss": 0.1496, + "step": 10300 + }, + { + "epoch": 62.07831325301205, + "grad_norm": 1.1782655715942383, + "learning_rate": 4.980756358768407e-05, + "loss": 0.1902, + "step": 10305 + }, + { + "epoch": 62.10843373493976, + "grad_norm": 1.1633213758468628, + "learning_rate": 4.980477465417225e-05, + "loss": 0.1556, + "step": 10310 + }, + { + "epoch": 62.13855421686747, + "grad_norm": 2.094839096069336, + "learning_rate": 4.980198572066042e-05, + "loss": 0.1414, + "step": 10315 + }, + { + "epoch": 62.16867469879518, + "grad_norm": 2.7337560653686523, + "learning_rate": 4.97991967871486e-05, + "loss": 0.1442, + "step": 10320 + }, + { + "epoch": 62.19879518072289, + "grad_norm": 1.796980381011963, + "learning_rate": 4.979640785363677e-05, + "loss": 0.1309, + "step": 10325 + }, + { + "epoch": 62.2289156626506, + "grad_norm": 2.00614857673645, + "learning_rate": 4.9793618920124945e-05, + "loss": 0.1709, + "step": 10330 + }, + { + "epoch": 62.25903614457831, + "grad_norm": 1.2246202230453491, + "learning_rate": 4.979082998661312e-05, + "loss": 0.2058, + "step": 10335 + }, + { + "epoch": 62.28915662650602, + "grad_norm": 1.9987736940383911, + "learning_rate": 4.97880410531013e-05, + "loss": 0.1649, + "step": 10340 + }, + { + "epoch": 62.31927710843374, + "grad_norm": 1.9422751665115356, + "learning_rate": 4.978525211958947e-05, + "loss": 0.1839, + "step": 10345 + }, + { + "epoch": 62.34939759036145, + "grad_norm": 2.593979597091675, + "learning_rate": 4.9782463186077647e-05, + "loss": 0.1791, + "step": 10350 + }, + { + "epoch": 62.37951807228916, + "grad_norm": 2.39090633392334, + "learning_rate": 4.977967425256582e-05, + "loss": 0.2201, + "step": 10355 + }, + { + "epoch": 62.40963855421687, + "grad_norm": 1.6905205249786377, + "learning_rate": 4.9776885319053994e-05, + "loss": 0.1624, + "step": 10360 + }, + { + "epoch": 62.43975903614458, + "grad_norm": 1.8943250179290771, + "learning_rate": 4.9774096385542175e-05, + "loss": 0.176, + "step": 10365 + }, + { + "epoch": 62.46987951807229, + "grad_norm": 1.9889397621154785, + "learning_rate": 4.977130745203035e-05, + "loss": 0.1907, + "step": 10370 + }, + { + "epoch": 62.5, + "grad_norm": 2.0285348892211914, + "learning_rate": 4.976851851851852e-05, + "loss": 0.2148, + "step": 10375 + }, + { + "epoch": 62.53012048192771, + "grad_norm": 1.7169373035430908, + "learning_rate": 4.9765729585006696e-05, + "loss": 0.1899, + "step": 10380 + }, + { + "epoch": 62.56024096385542, + "grad_norm": 2.2081243991851807, + "learning_rate": 4.976294065149487e-05, + "loss": 0.1959, + "step": 10385 + }, + { + "epoch": 62.59036144578313, + "grad_norm": 1.8505403995513916, + "learning_rate": 4.9760151717983044e-05, + "loss": 0.1847, + "step": 10390 + }, + { + "epoch": 62.62048192771084, + "grad_norm": 1.6287717819213867, + "learning_rate": 4.9757362784471224e-05, + "loss": 0.1766, + "step": 10395 + }, + { + "epoch": 62.65060240963855, + "grad_norm": 2.9438302516937256, + "learning_rate": 4.97545738509594e-05, + "loss": 0.1632, + "step": 10400 + }, + { + "epoch": 62.68072289156626, + "grad_norm": 2.115572214126587, + "learning_rate": 4.975178491744757e-05, + "loss": 0.1832, + "step": 10405 + }, + { + "epoch": 62.71084337349397, + "grad_norm": 1.9674654006958008, + "learning_rate": 4.9748995983935746e-05, + "loss": 0.1474, + "step": 10410 + }, + { + "epoch": 62.74096385542169, + "grad_norm": 1.9791538715362549, + "learning_rate": 4.974620705042392e-05, + "loss": 0.1703, + "step": 10415 + }, + { + "epoch": 62.7710843373494, + "grad_norm": 1.506995439529419, + "learning_rate": 4.974341811691209e-05, + "loss": 0.1613, + "step": 10420 + }, + { + "epoch": 62.80120481927711, + "grad_norm": 1.310712456703186, + "learning_rate": 4.9740629183400274e-05, + "loss": 0.204, + "step": 10425 + }, + { + "epoch": 62.83132530120482, + "grad_norm": 2.5213351249694824, + "learning_rate": 4.973784024988845e-05, + "loss": 0.1617, + "step": 10430 + }, + { + "epoch": 62.86144578313253, + "grad_norm": 0.9870374798774719, + "learning_rate": 4.973505131637662e-05, + "loss": 0.1408, + "step": 10435 + }, + { + "epoch": 62.89156626506024, + "grad_norm": 2.704153060913086, + "learning_rate": 4.9732262382864795e-05, + "loss": 0.1841, + "step": 10440 + }, + { + "epoch": 62.92168674698795, + "grad_norm": 1.7165454626083374, + "learning_rate": 4.972947344935297e-05, + "loss": 0.2026, + "step": 10445 + }, + { + "epoch": 62.95180722891566, + "grad_norm": 3.0817670822143555, + "learning_rate": 4.972668451584114e-05, + "loss": 0.1867, + "step": 10450 + }, + { + "epoch": 62.98192771084337, + "grad_norm": 2.2709290981292725, + "learning_rate": 4.972389558232932e-05, + "loss": 0.1682, + "step": 10455 + }, + { + "epoch": 63.0, + "eval_accuracy": 0.936295713723948, + "eval_auc": 0.9788920129055042, + "eval_f1": 0.9027611044417767, + "eval_loss": 0.20549307763576508, + "eval_precision": 0.9531051964512041, + "eval_recall": 0.8574686431014823, + "eval_runtime": 19.5681, + "eval_samples_per_second": 129.956, + "eval_steps_per_second": 0.664, + "step": 10458 + }, + { + "epoch": 63.01204819277108, + "grad_norm": 1.6144845485687256, + "learning_rate": 4.97211066488175e-05, + "loss": 0.1778, + "step": 10460 + }, + { + "epoch": 63.04216867469879, + "grad_norm": 1.5116358995437622, + "learning_rate": 4.971831771530567e-05, + "loss": 0.1997, + "step": 10465 + }, + { + "epoch": 63.0722891566265, + "grad_norm": 1.9036710262298584, + "learning_rate": 4.9715528781793845e-05, + "loss": 0.1625, + "step": 10470 + }, + { + "epoch": 63.102409638554214, + "grad_norm": 1.6996465921401978, + "learning_rate": 4.971273984828202e-05, + "loss": 0.152, + "step": 10475 + }, + { + "epoch": 63.13253012048193, + "grad_norm": 1.7426986694335938, + "learning_rate": 4.970995091477019e-05, + "loss": 0.1615, + "step": 10480 + }, + { + "epoch": 63.16265060240964, + "grad_norm": 1.8857890367507935, + "learning_rate": 4.970716198125837e-05, + "loss": 0.2047, + "step": 10485 + }, + { + "epoch": 63.19277108433735, + "grad_norm": 1.2623646259307861, + "learning_rate": 4.970437304774655e-05, + "loss": 0.1901, + "step": 10490 + }, + { + "epoch": 63.22289156626506, + "grad_norm": 1.2678370475769043, + "learning_rate": 4.970158411423472e-05, + "loss": 0.1536, + "step": 10495 + }, + { + "epoch": 63.25301204819277, + "grad_norm": 2.6455559730529785, + "learning_rate": 4.9698795180722894e-05, + "loss": 0.1528, + "step": 10500 + }, + { + "epoch": 63.28313253012048, + "grad_norm": 2.2459418773651123, + "learning_rate": 4.969600624721107e-05, + "loss": 0.1432, + "step": 10505 + }, + { + "epoch": 63.31325301204819, + "grad_norm": 2.285522222518921, + "learning_rate": 4.969321731369924e-05, + "loss": 0.1685, + "step": 10510 + }, + { + "epoch": 63.3433734939759, + "grad_norm": 1.965384840965271, + "learning_rate": 4.969042838018742e-05, + "loss": 0.1829, + "step": 10515 + }, + { + "epoch": 63.373493975903614, + "grad_norm": 1.9819344282150269, + "learning_rate": 4.9687639446675596e-05, + "loss": 0.1623, + "step": 10520 + }, + { + "epoch": 63.403614457831324, + "grad_norm": 1.7619704008102417, + "learning_rate": 4.968485051316376e-05, + "loss": 0.2063, + "step": 10525 + }, + { + "epoch": 63.433734939759034, + "grad_norm": 1.49800705909729, + "learning_rate": 4.9682061579651944e-05, + "loss": 0.16, + "step": 10530 + }, + { + "epoch": 63.463855421686745, + "grad_norm": 1.3514907360076904, + "learning_rate": 4.967927264614012e-05, + "loss": 0.1473, + "step": 10535 + }, + { + "epoch": 63.493975903614455, + "grad_norm": 2.645862579345703, + "learning_rate": 4.967648371262829e-05, + "loss": 0.2033, + "step": 10540 + }, + { + "epoch": 63.524096385542165, + "grad_norm": 2.3720364570617676, + "learning_rate": 4.967369477911647e-05, + "loss": 0.1581, + "step": 10545 + }, + { + "epoch": 63.55421686746988, + "grad_norm": 1.090379238128662, + "learning_rate": 4.9670905845604646e-05, + "loss": 0.1836, + "step": 10550 + }, + { + "epoch": 63.58433734939759, + "grad_norm": 2.2446887493133545, + "learning_rate": 4.966811691209281e-05, + "loss": 0.1688, + "step": 10555 + }, + { + "epoch": 63.6144578313253, + "grad_norm": 2.3685302734375, + "learning_rate": 4.9665327978580993e-05, + "loss": 0.1821, + "step": 10560 + }, + { + "epoch": 63.644578313253014, + "grad_norm": 1.32175874710083, + "learning_rate": 4.966253904506917e-05, + "loss": 0.1941, + "step": 10565 + }, + { + "epoch": 63.674698795180724, + "grad_norm": 2.90374493598938, + "learning_rate": 4.965975011155734e-05, + "loss": 0.249, + "step": 10570 + }, + { + "epoch": 63.704819277108435, + "grad_norm": 1.3915349245071411, + "learning_rate": 4.965696117804552e-05, + "loss": 0.1556, + "step": 10575 + }, + { + "epoch": 63.734939759036145, + "grad_norm": 1.482227087020874, + "learning_rate": 4.9654172244533695e-05, + "loss": 0.1871, + "step": 10580 + }, + { + "epoch": 63.765060240963855, + "grad_norm": 2.8194668292999268, + "learning_rate": 4.965138331102186e-05, + "loss": 0.2093, + "step": 10585 + }, + { + "epoch": 63.795180722891565, + "grad_norm": 2.2497358322143555, + "learning_rate": 4.964859437751004e-05, + "loss": 0.1644, + "step": 10590 + }, + { + "epoch": 63.825301204819276, + "grad_norm": 1.5044275522232056, + "learning_rate": 4.964580544399822e-05, + "loss": 0.1752, + "step": 10595 + }, + { + "epoch": 63.855421686746986, + "grad_norm": 2.974330186843872, + "learning_rate": 4.964301651048639e-05, + "loss": 0.1891, + "step": 10600 + }, + { + "epoch": 63.8855421686747, + "grad_norm": 2.525432825088501, + "learning_rate": 4.964022757697457e-05, + "loss": 0.2096, + "step": 10605 + }, + { + "epoch": 63.91566265060241, + "grad_norm": 1.7562264204025269, + "learning_rate": 4.963743864346274e-05, + "loss": 0.1663, + "step": 10610 + }, + { + "epoch": 63.94578313253012, + "grad_norm": 1.7163867950439453, + "learning_rate": 4.963464970995091e-05, + "loss": 0.1809, + "step": 10615 + }, + { + "epoch": 63.975903614457835, + "grad_norm": 2.554893732070923, + "learning_rate": 4.963186077643909e-05, + "loss": 0.1798, + "step": 10620 + }, + { + "epoch": 64.0, + "eval_accuracy": 0.9272512780180888, + "eval_auc": 0.9788444454178477, + "eval_f1": 0.8862937922556853, + "eval_loss": 0.23211508989334106, + "eval_precision": 0.9613333333333334, + "eval_recall": 0.82212086659065, + "eval_runtime": 19.9559, + "eval_samples_per_second": 127.431, + "eval_steps_per_second": 0.651, + "step": 10624 + }, + { + "epoch": 64.00602409638554, + "grad_norm": 1.369818925857544, + "learning_rate": 4.9629071842927266e-05, + "loss": 0.13, + "step": 10625 + }, + { + "epoch": 64.03614457831326, + "grad_norm": 1.9708218574523926, + "learning_rate": 4.962628290941545e-05, + "loss": 0.1507, + "step": 10630 + }, + { + "epoch": 64.06626506024097, + "grad_norm": 1.8631243705749512, + "learning_rate": 4.962349397590362e-05, + "loss": 0.1506, + "step": 10635 + }, + { + "epoch": 64.09638554216868, + "grad_norm": 1.812243938446045, + "learning_rate": 4.962070504239179e-05, + "loss": 0.1948, + "step": 10640 + }, + { + "epoch": 64.12650602409639, + "grad_norm": 1.4348286390304565, + "learning_rate": 4.961791610887997e-05, + "loss": 0.1647, + "step": 10645 + }, + { + "epoch": 64.1566265060241, + "grad_norm": 2.407808780670166, + "learning_rate": 4.961512717536814e-05, + "loss": 0.1722, + "step": 10650 + }, + { + "epoch": 64.1867469879518, + "grad_norm": 2.4277946949005127, + "learning_rate": 4.9612338241856316e-05, + "loss": 0.1609, + "step": 10655 + }, + { + "epoch": 64.21686746987952, + "grad_norm": 1.4109902381896973, + "learning_rate": 4.9609549308344496e-05, + "loss": 0.1399, + "step": 10660 + }, + { + "epoch": 64.24698795180723, + "grad_norm": 1.2755377292633057, + "learning_rate": 4.960676037483267e-05, + "loss": 0.1523, + "step": 10665 + }, + { + "epoch": 64.27710843373494, + "grad_norm": 1.7213119268417358, + "learning_rate": 4.960397144132084e-05, + "loss": 0.1724, + "step": 10670 + }, + { + "epoch": 64.30722891566265, + "grad_norm": 2.4596023559570312, + "learning_rate": 4.960118250780902e-05, + "loss": 0.1481, + "step": 10675 + }, + { + "epoch": 64.33734939759036, + "grad_norm": 2.551539897918701, + "learning_rate": 4.959839357429719e-05, + "loss": 0.2016, + "step": 10680 + }, + { + "epoch": 64.36746987951807, + "grad_norm": 2.1785504817962646, + "learning_rate": 4.9595604640785365e-05, + "loss": 0.1646, + "step": 10685 + }, + { + "epoch": 64.39759036144578, + "grad_norm": 1.351625680923462, + "learning_rate": 4.9592815707273546e-05, + "loss": 0.1707, + "step": 10690 + }, + { + "epoch": 64.42771084337349, + "grad_norm": 1.3918484449386597, + "learning_rate": 4.959002677376171e-05, + "loss": 0.1519, + "step": 10695 + }, + { + "epoch": 64.4578313253012, + "grad_norm": 1.8310281038284302, + "learning_rate": 4.958723784024989e-05, + "loss": 0.2019, + "step": 10700 + }, + { + "epoch": 64.48795180722891, + "grad_norm": 2.043283224105835, + "learning_rate": 4.958444890673807e-05, + "loss": 0.1472, + "step": 10705 + }, + { + "epoch": 64.51807228915662, + "grad_norm": 1.7341029644012451, + "learning_rate": 4.958165997322624e-05, + "loss": 0.2088, + "step": 10710 + }, + { + "epoch": 64.54819277108433, + "grad_norm": 1.8614193201065063, + "learning_rate": 4.9578871039714415e-05, + "loss": 0.1419, + "step": 10715 + }, + { + "epoch": 64.57831325301204, + "grad_norm": 2.1345086097717285, + "learning_rate": 4.9576082106202596e-05, + "loss": 0.2617, + "step": 10720 + }, + { + "epoch": 64.60843373493977, + "grad_norm": 1.1087085008621216, + "learning_rate": 4.957329317269076e-05, + "loss": 0.1431, + "step": 10725 + }, + { + "epoch": 64.63855421686748, + "grad_norm": 1.2613799571990967, + "learning_rate": 4.9570504239178936e-05, + "loss": 0.1741, + "step": 10730 + }, + { + "epoch": 64.66867469879519, + "grad_norm": 1.9829838275909424, + "learning_rate": 4.956771530566712e-05, + "loss": 0.1856, + "step": 10735 + }, + { + "epoch": 64.6987951807229, + "grad_norm": 2.0306339263916016, + "learning_rate": 4.956492637215529e-05, + "loss": 0.1717, + "step": 10740 + }, + { + "epoch": 64.7289156626506, + "grad_norm": 1.563502311706543, + "learning_rate": 4.9562137438643465e-05, + "loss": 0.1443, + "step": 10745 + }, + { + "epoch": 64.75903614457832, + "grad_norm": 1.6432170867919922, + "learning_rate": 4.9559348505131645e-05, + "loss": 0.1986, + "step": 10750 + }, + { + "epoch": 64.78915662650603, + "grad_norm": 2.0203120708465576, + "learning_rate": 4.955655957161981e-05, + "loss": 0.1864, + "step": 10755 + }, + { + "epoch": 64.81927710843374, + "grad_norm": 1.6433354616165161, + "learning_rate": 4.9553770638107986e-05, + "loss": 0.1864, + "step": 10760 + }, + { + "epoch": 64.84939759036145, + "grad_norm": 1.7345216274261475, + "learning_rate": 4.9550981704596166e-05, + "loss": 0.2027, + "step": 10765 + }, + { + "epoch": 64.87951807228916, + "grad_norm": 1.6810556650161743, + "learning_rate": 4.954819277108434e-05, + "loss": 0.1628, + "step": 10770 + }, + { + "epoch": 64.90963855421687, + "grad_norm": 2.2163572311401367, + "learning_rate": 4.9545403837572514e-05, + "loss": 0.17, + "step": 10775 + }, + { + "epoch": 64.93975903614458, + "grad_norm": 1.1497232913970947, + "learning_rate": 4.9542614904060695e-05, + "loss": 0.1854, + "step": 10780 + }, + { + "epoch": 64.96987951807229, + "grad_norm": 1.6407033205032349, + "learning_rate": 4.953982597054886e-05, + "loss": 0.1385, + "step": 10785 + }, + { + "epoch": 65.0, + "grad_norm": 1.4199328422546387, + "learning_rate": 4.9537037037037035e-05, + "loss": 0.1668, + "step": 10790 + }, + { + "epoch": 65.0, + "eval_accuracy": 0.9311836413684624, + "eval_auc": 0.9761231744693317, + "eval_f1": 0.8921749845964264, + "eval_loss": 0.25203993916511536, + "eval_precision": 0.9705093833780161, + "eval_recall": 0.8255416191562144, + "eval_runtime": 16.9177, + "eval_samples_per_second": 150.316, + "eval_steps_per_second": 0.768, + "step": 10790 + }, + { + "epoch": 65.03012048192771, + "grad_norm": 1.2270861864089966, + "learning_rate": 4.9534248103525216e-05, + "loss": 0.1709, + "step": 10795 + }, + { + "epoch": 65.06024096385542, + "grad_norm": 1.7802960872650146, + "learning_rate": 4.953145917001339e-05, + "loss": 0.1783, + "step": 10800 + }, + { + "epoch": 65.09036144578313, + "grad_norm": 1.1382660865783691, + "learning_rate": 4.9528670236501564e-05, + "loss": 0.1685, + "step": 10805 + }, + { + "epoch": 65.12048192771084, + "grad_norm": 1.8339431285858154, + "learning_rate": 4.952588130298974e-05, + "loss": 0.1886, + "step": 10810 + }, + { + "epoch": 65.15060240963855, + "grad_norm": 2.0604560375213623, + "learning_rate": 4.952309236947791e-05, + "loss": 0.1906, + "step": 10815 + }, + { + "epoch": 65.18072289156626, + "grad_norm": 1.6225755214691162, + "learning_rate": 4.9520303435966085e-05, + "loss": 0.1861, + "step": 10820 + }, + { + "epoch": 65.21084337349397, + "grad_norm": 1.7505550384521484, + "learning_rate": 4.9517514502454266e-05, + "loss": 0.1536, + "step": 10825 + }, + { + "epoch": 65.24096385542168, + "grad_norm": 1.8951244354248047, + "learning_rate": 4.951472556894244e-05, + "loss": 0.1856, + "step": 10830 + }, + { + "epoch": 65.2710843373494, + "grad_norm": 2.078463077545166, + "learning_rate": 4.951193663543061e-05, + "loss": 0.1505, + "step": 10835 + }, + { + "epoch": 65.3012048192771, + "grad_norm": 1.7262752056121826, + "learning_rate": 4.950914770191879e-05, + "loss": 0.1592, + "step": 10840 + }, + { + "epoch": 65.33132530120481, + "grad_norm": 1.910554051399231, + "learning_rate": 4.950635876840696e-05, + "loss": 0.1633, + "step": 10845 + }, + { + "epoch": 65.36144578313252, + "grad_norm": 1.8438043594360352, + "learning_rate": 4.9503569834895135e-05, + "loss": 0.1582, + "step": 10850 + }, + { + "epoch": 65.39156626506023, + "grad_norm": 1.3439587354660034, + "learning_rate": 4.9500780901383315e-05, + "loss": 0.1395, + "step": 10855 + }, + { + "epoch": 65.42168674698796, + "grad_norm": 2.09580135345459, + "learning_rate": 4.949799196787149e-05, + "loss": 0.1641, + "step": 10860 + }, + { + "epoch": 65.45180722891567, + "grad_norm": 2.5676605701446533, + "learning_rate": 4.949520303435966e-05, + "loss": 0.1582, + "step": 10865 + }, + { + "epoch": 65.48192771084338, + "grad_norm": 2.134660482406616, + "learning_rate": 4.9492414100847837e-05, + "loss": 0.1755, + "step": 10870 + }, + { + "epoch": 65.51204819277109, + "grad_norm": 1.9631792306900024, + "learning_rate": 4.948962516733601e-05, + "loss": 0.1695, + "step": 10875 + }, + { + "epoch": 65.5421686746988, + "grad_norm": 1.410919189453125, + "learning_rate": 4.9486836233824184e-05, + "loss": 0.1883, + "step": 10880 + }, + { + "epoch": 65.57228915662651, + "grad_norm": 1.7867623567581177, + "learning_rate": 4.9484047300312365e-05, + "loss": 0.1514, + "step": 10885 + }, + { + "epoch": 65.60240963855422, + "grad_norm": 1.1021161079406738, + "learning_rate": 4.948125836680054e-05, + "loss": 0.1328, + "step": 10890 + }, + { + "epoch": 65.63253012048193, + "grad_norm": 2.3028223514556885, + "learning_rate": 4.947846943328871e-05, + "loss": 0.2114, + "step": 10895 + }, + { + "epoch": 65.66265060240964, + "grad_norm": 1.3753268718719482, + "learning_rate": 4.9475680499776886e-05, + "loss": 0.1843, + "step": 10900 + }, + { + "epoch": 65.69277108433735, + "grad_norm": 2.1384854316711426, + "learning_rate": 4.947289156626506e-05, + "loss": 0.1462, + "step": 10905 + }, + { + "epoch": 65.72289156626506, + "grad_norm": 1.6459414958953857, + "learning_rate": 4.947010263275324e-05, + "loss": 0.1894, + "step": 10910 + }, + { + "epoch": 65.75301204819277, + "grad_norm": 2.067556381225586, + "learning_rate": 4.9467313699241414e-05, + "loss": 0.1661, + "step": 10915 + }, + { + "epoch": 65.78313253012048, + "grad_norm": 1.947026252746582, + "learning_rate": 4.946452476572959e-05, + "loss": 0.1738, + "step": 10920 + }, + { + "epoch": 65.8132530120482, + "grad_norm": 2.365640640258789, + "learning_rate": 4.946173583221776e-05, + "loss": 0.2179, + "step": 10925 + }, + { + "epoch": 65.8433734939759, + "grad_norm": 1.8187806606292725, + "learning_rate": 4.9458946898705936e-05, + "loss": 0.2086, + "step": 10930 + }, + { + "epoch": 65.87349397590361, + "grad_norm": 1.6368297338485718, + "learning_rate": 4.945615796519411e-05, + "loss": 0.1925, + "step": 10935 + }, + { + "epoch": 65.90361445783132, + "grad_norm": 1.8355932235717773, + "learning_rate": 4.945336903168229e-05, + "loss": 0.1627, + "step": 10940 + }, + { + "epoch": 65.93373493975903, + "grad_norm": 0.8479874134063721, + "learning_rate": 4.9450580098170464e-05, + "loss": 0.1584, + "step": 10945 + }, + { + "epoch": 65.96385542168674, + "grad_norm": 1.4935309886932373, + "learning_rate": 4.944779116465864e-05, + "loss": 0.1427, + "step": 10950 + }, + { + "epoch": 65.99397590361446, + "grad_norm": 1.5013028383255005, + "learning_rate": 4.944500223114681e-05, + "loss": 0.188, + "step": 10955 + }, + { + "epoch": 66.0, + "eval_accuracy": 0.9370821863940229, + "eval_auc": 0.9763887310910682, + "eval_f1": 0.9046483909415971, + "eval_loss": 0.21822898089885712, + "eval_precision": 0.947565543071161, + "eval_recall": 0.8654503990877993, + "eval_runtime": 17.1304, + "eval_samples_per_second": 148.449, + "eval_steps_per_second": 0.759, + "step": 10956 + }, + { + "epoch": 66.02409638554217, + "grad_norm": 1.1407597064971924, + "learning_rate": 4.9442213297634985e-05, + "loss": 0.1436, + "step": 10960 + }, + { + "epoch": 66.05421686746988, + "grad_norm": 1.0020407438278198, + "learning_rate": 4.943942436412316e-05, + "loss": 0.1508, + "step": 10965 + }, + { + "epoch": 66.08433734939759, + "grad_norm": 2.138779401779175, + "learning_rate": 4.943663543061134e-05, + "loss": 0.1629, + "step": 10970 + }, + { + "epoch": 66.1144578313253, + "grad_norm": 2.0138418674468994, + "learning_rate": 4.943384649709951e-05, + "loss": 0.1957, + "step": 10975 + }, + { + "epoch": 66.144578313253, + "grad_norm": 1.890173316001892, + "learning_rate": 4.943105756358769e-05, + "loss": 0.1823, + "step": 10980 + }, + { + "epoch": 66.17469879518072, + "grad_norm": 1.5144275426864624, + "learning_rate": 4.942826863007586e-05, + "loss": 0.189, + "step": 10985 + }, + { + "epoch": 66.20481927710843, + "grad_norm": 1.6190605163574219, + "learning_rate": 4.9425479696564035e-05, + "loss": 0.1674, + "step": 10990 + }, + { + "epoch": 66.23493975903614, + "grad_norm": 1.4035422801971436, + "learning_rate": 4.942269076305221e-05, + "loss": 0.1799, + "step": 10995 + }, + { + "epoch": 66.26506024096386, + "grad_norm": 2.2024824619293213, + "learning_rate": 4.941990182954039e-05, + "loss": 0.1513, + "step": 11000 + }, + { + "epoch": 66.29518072289157, + "grad_norm": 1.7090049982070923, + "learning_rate": 4.941711289602856e-05, + "loss": 0.2451, + "step": 11005 + }, + { + "epoch": 66.32530120481928, + "grad_norm": 2.0364186763763428, + "learning_rate": 4.941432396251674e-05, + "loss": 0.1637, + "step": 11010 + }, + { + "epoch": 66.355421686747, + "grad_norm": 1.7846271991729736, + "learning_rate": 4.941153502900491e-05, + "loss": 0.1448, + "step": 11015 + }, + { + "epoch": 66.3855421686747, + "grad_norm": 2.3138959407806396, + "learning_rate": 4.9408746095493084e-05, + "loss": 0.2147, + "step": 11020 + }, + { + "epoch": 66.41566265060241, + "grad_norm": 1.288283109664917, + "learning_rate": 4.940595716198126e-05, + "loss": 0.1407, + "step": 11025 + }, + { + "epoch": 66.44578313253012, + "grad_norm": 1.8594008684158325, + "learning_rate": 4.940316822846944e-05, + "loss": 0.1456, + "step": 11030 + }, + { + "epoch": 66.47590361445783, + "grad_norm": 1.7284523248672485, + "learning_rate": 4.940037929495761e-05, + "loss": 0.1282, + "step": 11035 + }, + { + "epoch": 66.50602409638554, + "grad_norm": 2.2721471786499023, + "learning_rate": 4.9397590361445786e-05, + "loss": 0.1992, + "step": 11040 + }, + { + "epoch": 66.53614457831326, + "grad_norm": 0.9815234541893005, + "learning_rate": 4.939480142793396e-05, + "loss": 0.1513, + "step": 11045 + }, + { + "epoch": 66.56626506024097, + "grad_norm": 2.513406276702881, + "learning_rate": 4.9392012494422134e-05, + "loss": 0.1658, + "step": 11050 + }, + { + "epoch": 66.59638554216868, + "grad_norm": 1.996604561805725, + "learning_rate": 4.938922356091031e-05, + "loss": 0.1563, + "step": 11055 + }, + { + "epoch": 66.62650602409639, + "grad_norm": 1.9300235509872437, + "learning_rate": 4.938643462739849e-05, + "loss": 0.2027, + "step": 11060 + }, + { + "epoch": 66.6566265060241, + "grad_norm": 2.3639075756073, + "learning_rate": 4.938364569388666e-05, + "loss": 0.1711, + "step": 11065 + }, + { + "epoch": 66.6867469879518, + "grad_norm": 2.160346746444702, + "learning_rate": 4.9380856760374836e-05, + "loss": 0.1821, + "step": 11070 + }, + { + "epoch": 66.71686746987952, + "grad_norm": 1.9300659894943237, + "learning_rate": 4.937806782686301e-05, + "loss": 0.171, + "step": 11075 + }, + { + "epoch": 66.74698795180723, + "grad_norm": 1.5317730903625488, + "learning_rate": 4.937527889335118e-05, + "loss": 0.1387, + "step": 11080 + }, + { + "epoch": 66.77710843373494, + "grad_norm": 1.837944746017456, + "learning_rate": 4.937248995983936e-05, + "loss": 0.1894, + "step": 11085 + }, + { + "epoch": 66.80722891566265, + "grad_norm": 1.6817915439605713, + "learning_rate": 4.936970102632754e-05, + "loss": 0.2015, + "step": 11090 + }, + { + "epoch": 66.83734939759036, + "grad_norm": 2.3749804496765137, + "learning_rate": 4.936691209281571e-05, + "loss": 0.1769, + "step": 11095 + }, + { + "epoch": 66.86746987951807, + "grad_norm": 2.061269998550415, + "learning_rate": 4.9364123159303885e-05, + "loss": 0.1608, + "step": 11100 + }, + { + "epoch": 66.89759036144578, + "grad_norm": 1.4038336277008057, + "learning_rate": 4.936133422579206e-05, + "loss": 0.1681, + "step": 11105 + }, + { + "epoch": 66.92771084337349, + "grad_norm": 1.8982125520706177, + "learning_rate": 4.935854529228023e-05, + "loss": 0.2014, + "step": 11110 + }, + { + "epoch": 66.9578313253012, + "grad_norm": 1.63504159450531, + "learning_rate": 4.935575635876841e-05, + "loss": 0.1569, + "step": 11115 + }, + { + "epoch": 66.98795180722891, + "grad_norm": 1.2141400575637817, + "learning_rate": 4.935296742525659e-05, + "loss": 0.165, + "step": 11120 + }, + { + "epoch": 67.0, + "eval_accuracy": 0.9311836413684624, + "eval_auc": 0.976971176155753, + "eval_f1": 0.8936170212765957, + "eval_loss": 0.24524444341659546, + "eval_precision": 0.95703125, + "eval_recall": 0.8380843785632839, + "eval_runtime": 16.6804, + "eval_samples_per_second": 152.454, + "eval_steps_per_second": 0.779, + "step": 11122 + }, + { + "epoch": 67.01807228915662, + "grad_norm": 1.8608736991882324, + "learning_rate": 4.935017849174476e-05, + "loss": 0.1377, + "step": 11125 + }, + { + "epoch": 67.04819277108433, + "grad_norm": 1.6621134281158447, + "learning_rate": 4.9347389558232935e-05, + "loss": 0.1636, + "step": 11130 + }, + { + "epoch": 67.07831325301204, + "grad_norm": 2.7037923336029053, + "learning_rate": 4.934460062472111e-05, + "loss": 0.1416, + "step": 11135 + }, + { + "epoch": 67.10843373493977, + "grad_norm": 1.8798949718475342, + "learning_rate": 4.934181169120928e-05, + "loss": 0.1838, + "step": 11140 + }, + { + "epoch": 67.13855421686748, + "grad_norm": 2.596620559692383, + "learning_rate": 4.9339022757697456e-05, + "loss": 0.1955, + "step": 11145 + }, + { + "epoch": 67.16867469879519, + "grad_norm": 2.7191731929779053, + "learning_rate": 4.933623382418564e-05, + "loss": 0.1783, + "step": 11150 + }, + { + "epoch": 67.1987951807229, + "grad_norm": 0.9744617938995361, + "learning_rate": 4.933344489067381e-05, + "loss": 0.1811, + "step": 11155 + }, + { + "epoch": 67.2289156626506, + "grad_norm": 2.0423519611358643, + "learning_rate": 4.933065595716198e-05, + "loss": 0.1685, + "step": 11160 + }, + { + "epoch": 67.25903614457832, + "grad_norm": 1.806595802307129, + "learning_rate": 4.932786702365016e-05, + "loss": 0.2129, + "step": 11165 + }, + { + "epoch": 67.28915662650603, + "grad_norm": 1.54165518283844, + "learning_rate": 4.932507809013833e-05, + "loss": 0.1548, + "step": 11170 + }, + { + "epoch": 67.31927710843374, + "grad_norm": 0.8470763564109802, + "learning_rate": 4.932228915662651e-05, + "loss": 0.1466, + "step": 11175 + }, + { + "epoch": 67.34939759036145, + "grad_norm": 0.8902156352996826, + "learning_rate": 4.9319500223114686e-05, + "loss": 0.1688, + "step": 11180 + }, + { + "epoch": 67.37951807228916, + "grad_norm": 1.4657398462295532, + "learning_rate": 4.931671128960286e-05, + "loss": 0.182, + "step": 11185 + }, + { + "epoch": 67.40963855421687, + "grad_norm": 1.6776376962661743, + "learning_rate": 4.9313922356091034e-05, + "loss": 0.1539, + "step": 11190 + }, + { + "epoch": 67.43975903614458, + "grad_norm": 2.4649460315704346, + "learning_rate": 4.931113342257921e-05, + "loss": 0.1457, + "step": 11195 + }, + { + "epoch": 67.46987951807229, + "grad_norm": 1.9362566471099854, + "learning_rate": 4.930834448906738e-05, + "loss": 0.1798, + "step": 11200 + }, + { + "epoch": 67.5, + "grad_norm": 1.598980188369751, + "learning_rate": 4.930555555555556e-05, + "loss": 0.1737, + "step": 11205 + }, + { + "epoch": 67.53012048192771, + "grad_norm": 2.1399192810058594, + "learning_rate": 4.9302766622043736e-05, + "loss": 0.1474, + "step": 11210 + }, + { + "epoch": 67.56024096385542, + "grad_norm": 2.315030813217163, + "learning_rate": 4.929997768853191e-05, + "loss": 0.1898, + "step": 11215 + }, + { + "epoch": 67.59036144578313, + "grad_norm": 1.8207380771636963, + "learning_rate": 4.9297188755020084e-05, + "loss": 0.1664, + "step": 11220 + }, + { + "epoch": 67.62048192771084, + "grad_norm": 1.6651870012283325, + "learning_rate": 4.929439982150826e-05, + "loss": 0.1708, + "step": 11225 + }, + { + "epoch": 67.65060240963855, + "grad_norm": 1.4469952583312988, + "learning_rate": 4.929161088799643e-05, + "loss": 0.1634, + "step": 11230 + }, + { + "epoch": 67.68072289156626, + "grad_norm": 1.6479268074035645, + "learning_rate": 4.928882195448461e-05, + "loss": 0.1632, + "step": 11235 + }, + { + "epoch": 67.71084337349397, + "grad_norm": 1.4887217283248901, + "learning_rate": 4.9286033020972786e-05, + "loss": 0.186, + "step": 11240 + }, + { + "epoch": 67.74096385542168, + "grad_norm": 1.9350062608718872, + "learning_rate": 4.928324408746095e-05, + "loss": 0.1922, + "step": 11245 + }, + { + "epoch": 67.7710843373494, + "grad_norm": 2.2155404090881348, + "learning_rate": 4.928045515394913e-05, + "loss": 0.1657, + "step": 11250 + }, + { + "epoch": 67.8012048192771, + "grad_norm": 2.231834650039673, + "learning_rate": 4.927766622043731e-05, + "loss": 0.1776, + "step": 11255 + }, + { + "epoch": 67.83132530120481, + "grad_norm": 1.9670336246490479, + "learning_rate": 4.927487728692548e-05, + "loss": 0.1964, + "step": 11260 + }, + { + "epoch": 67.86144578313252, + "grad_norm": 1.4637037515640259, + "learning_rate": 4.927208835341366e-05, + "loss": 0.1729, + "step": 11265 + }, + { + "epoch": 67.89156626506023, + "grad_norm": 1.7573186159133911, + "learning_rate": 4.9269299419901835e-05, + "loss": 0.1619, + "step": 11270 + }, + { + "epoch": 67.92168674698796, + "grad_norm": 2.7523934841156006, + "learning_rate": 4.926651048639e-05, + "loss": 0.1985, + "step": 11275 + }, + { + "epoch": 67.95180722891567, + "grad_norm": 2.1700620651245117, + "learning_rate": 4.926372155287818e-05, + "loss": 0.1618, + "step": 11280 + }, + { + "epoch": 67.98192771084338, + "grad_norm": 1.83929443359375, + "learning_rate": 4.9260932619366356e-05, + "loss": 0.1571, + "step": 11285 + }, + { + "epoch": 68.0, + "eval_accuracy": 0.9429807314195832, + "eval_auc": 0.9793717943277653, + "eval_f1": 0.9143532191376255, + "eval_loss": 0.2007821649312973, + "eval_precision": 0.9485294117647058, + "eval_recall": 0.8825541619156214, + "eval_runtime": 19.754, + "eval_samples_per_second": 128.733, + "eval_steps_per_second": 0.658, + "step": 11288 + }, + { + "epoch": 68.01204819277109, + "grad_norm": 1.3829892873764038, + "learning_rate": 4.925814368585453e-05, + "loss": 0.1796, + "step": 11290 + }, + { + "epoch": 68.0421686746988, + "grad_norm": 1.3722093105316162, + "learning_rate": 4.925535475234271e-05, + "loss": 0.1445, + "step": 11295 + }, + { + "epoch": 68.07228915662651, + "grad_norm": 2.054945945739746, + "learning_rate": 4.9252565818830885e-05, + "loss": 0.1793, + "step": 11300 + }, + { + "epoch": 68.10240963855422, + "grad_norm": 0.9681638479232788, + "learning_rate": 4.924977688531905e-05, + "loss": 0.1451, + "step": 11305 + }, + { + "epoch": 68.13253012048193, + "grad_norm": 1.8551998138427734, + "learning_rate": 4.924698795180723e-05, + "loss": 0.1644, + "step": 11310 + }, + { + "epoch": 68.16265060240964, + "grad_norm": 1.469698190689087, + "learning_rate": 4.9244199018295406e-05, + "loss": 0.1554, + "step": 11315 + }, + { + "epoch": 68.19277108433735, + "grad_norm": 1.8895114660263062, + "learning_rate": 4.924141008478358e-05, + "loss": 0.1435, + "step": 11320 + }, + { + "epoch": 68.22289156626506, + "grad_norm": 2.335062026977539, + "learning_rate": 4.923862115127176e-05, + "loss": 0.1786, + "step": 11325 + }, + { + "epoch": 68.25301204819277, + "grad_norm": 1.1214507818222046, + "learning_rate": 4.923583221775993e-05, + "loss": 0.1738, + "step": 11330 + }, + { + "epoch": 68.28313253012048, + "grad_norm": 2.89261794090271, + "learning_rate": 4.92330432842481e-05, + "loss": 0.2027, + "step": 11335 + }, + { + "epoch": 68.3132530120482, + "grad_norm": 2.151472568511963, + "learning_rate": 4.923025435073628e-05, + "loss": 0.1594, + "step": 11340 + }, + { + "epoch": 68.3433734939759, + "grad_norm": 1.9266464710235596, + "learning_rate": 4.9227465417224456e-05, + "loss": 0.1756, + "step": 11345 + }, + { + "epoch": 68.37349397590361, + "grad_norm": 2.3010458946228027, + "learning_rate": 4.922467648371263e-05, + "loss": 0.1746, + "step": 11350 + }, + { + "epoch": 68.40361445783132, + "grad_norm": 1.6794465780258179, + "learning_rate": 4.922188755020081e-05, + "loss": 0.1677, + "step": 11355 + }, + { + "epoch": 68.43373493975903, + "grad_norm": 1.511877417564392, + "learning_rate": 4.921909861668898e-05, + "loss": 0.1727, + "step": 11360 + }, + { + "epoch": 68.46385542168674, + "grad_norm": 2.0173590183258057, + "learning_rate": 4.921630968317715e-05, + "loss": 0.1544, + "step": 11365 + }, + { + "epoch": 68.49397590361446, + "grad_norm": 2.6422698497772217, + "learning_rate": 4.921352074966533e-05, + "loss": 0.1244, + "step": 11370 + }, + { + "epoch": 68.52409638554217, + "grad_norm": 1.666720986366272, + "learning_rate": 4.9210731816153505e-05, + "loss": 0.1348, + "step": 11375 + }, + { + "epoch": 68.55421686746988, + "grad_norm": 1.0499719381332397, + "learning_rate": 4.920794288264168e-05, + "loss": 0.1697, + "step": 11380 + }, + { + "epoch": 68.58433734939759, + "grad_norm": 1.7370655536651611, + "learning_rate": 4.920515394912986e-05, + "loss": 0.2181, + "step": 11385 + }, + { + "epoch": 68.6144578313253, + "grad_norm": 2.2380411624908447, + "learning_rate": 4.9202365015618026e-05, + "loss": 0.1668, + "step": 11390 + }, + { + "epoch": 68.644578313253, + "grad_norm": 1.3639131784439087, + "learning_rate": 4.91995760821062e-05, + "loss": 0.1411, + "step": 11395 + }, + { + "epoch": 68.67469879518072, + "grad_norm": 2.3739328384399414, + "learning_rate": 4.919678714859438e-05, + "loss": 0.1631, + "step": 11400 + }, + { + "epoch": 68.70481927710843, + "grad_norm": 1.231765866279602, + "learning_rate": 4.9193998215082555e-05, + "loss": 0.151, + "step": 11405 + }, + { + "epoch": 68.73493975903614, + "grad_norm": 1.5961732864379883, + "learning_rate": 4.919120928157073e-05, + "loss": 0.1494, + "step": 11410 + }, + { + "epoch": 68.76506024096386, + "grad_norm": 1.471126675605774, + "learning_rate": 4.918842034805891e-05, + "loss": 0.1733, + "step": 11415 + }, + { + "epoch": 68.79518072289157, + "grad_norm": 1.7205326557159424, + "learning_rate": 4.9185631414547076e-05, + "loss": 0.1823, + "step": 11420 + }, + { + "epoch": 68.82530120481928, + "grad_norm": 2.2642674446105957, + "learning_rate": 4.918284248103525e-05, + "loss": 0.2046, + "step": 11425 + }, + { + "epoch": 68.855421686747, + "grad_norm": 1.4918270111083984, + "learning_rate": 4.918005354752343e-05, + "loss": 0.1705, + "step": 11430 + }, + { + "epoch": 68.8855421686747, + "grad_norm": 1.3444596529006958, + "learning_rate": 4.9177264614011604e-05, + "loss": 0.1763, + "step": 11435 + }, + { + "epoch": 68.91566265060241, + "grad_norm": 1.2696788311004639, + "learning_rate": 4.9174475680499785e-05, + "loss": 0.1538, + "step": 11440 + }, + { + "epoch": 68.94578313253012, + "grad_norm": 1.9824987649917603, + "learning_rate": 4.917168674698795e-05, + "loss": 0.1859, + "step": 11445 + }, + { + "epoch": 68.97590361445783, + "grad_norm": 1.523636817932129, + "learning_rate": 4.9168897813476126e-05, + "loss": 0.1697, + "step": 11450 + }, + { + "epoch": 69.0, + "eval_accuracy": 0.9288242233582383, + "eval_auc": 0.9759845785520594, + "eval_f1": 0.8914217156568687, + "eval_loss": 0.2424307018518448, + "eval_precision": 0.9405063291139241, + "eval_recall": 0.8472063854047891, + "eval_runtime": 19.8135, + "eval_samples_per_second": 128.347, + "eval_steps_per_second": 0.656, + "step": 11454 + }, + { + "epoch": 69.00602409638554, + "grad_norm": 1.9285058975219727, + "learning_rate": 4.9166108879964306e-05, + "loss": 0.1525, + "step": 11455 + }, + { + "epoch": 69.03614457831326, + "grad_norm": 0.9996278882026672, + "learning_rate": 4.916331994645248e-05, + "loss": 0.177, + "step": 11460 + }, + { + "epoch": 69.06626506024097, + "grad_norm": 1.2690097093582153, + "learning_rate": 4.9160531012940654e-05, + "loss": 0.1339, + "step": 11465 + }, + { + "epoch": 69.09638554216868, + "grad_norm": 1.4849321842193604, + "learning_rate": 4.9157742079428834e-05, + "loss": 0.1809, + "step": 11470 + }, + { + "epoch": 69.12650602409639, + "grad_norm": 1.2057774066925049, + "learning_rate": 4.9154953145917e-05, + "loss": 0.1179, + "step": 11475 + }, + { + "epoch": 69.1566265060241, + "grad_norm": 2.3532774448394775, + "learning_rate": 4.9152164212405175e-05, + "loss": 0.1738, + "step": 11480 + }, + { + "epoch": 69.1867469879518, + "grad_norm": 1.6331244707107544, + "learning_rate": 4.9149375278893356e-05, + "loss": 0.1651, + "step": 11485 + }, + { + "epoch": 69.21686746987952, + "grad_norm": 1.5818071365356445, + "learning_rate": 4.914658634538153e-05, + "loss": 0.1389, + "step": 11490 + }, + { + "epoch": 69.24698795180723, + "grad_norm": 1.0956642627716064, + "learning_rate": 4.91437974118697e-05, + "loss": 0.1587, + "step": 11495 + }, + { + "epoch": 69.27710843373494, + "grad_norm": 1.2369745969772339, + "learning_rate": 4.9141008478357884e-05, + "loss": 0.1464, + "step": 11500 + }, + { + "epoch": 69.30722891566265, + "grad_norm": 1.6326029300689697, + "learning_rate": 4.913821954484605e-05, + "loss": 0.1822, + "step": 11505 + }, + { + "epoch": 69.33734939759036, + "grad_norm": 1.6316596269607544, + "learning_rate": 4.9135430611334225e-05, + "loss": 0.1119, + "step": 11510 + }, + { + "epoch": 69.36746987951807, + "grad_norm": 0.746854305267334, + "learning_rate": 4.9132641677822405e-05, + "loss": 0.1604, + "step": 11515 + }, + { + "epoch": 69.39759036144578, + "grad_norm": 2.0561535358428955, + "learning_rate": 4.912985274431058e-05, + "loss": 0.1949, + "step": 11520 + }, + { + "epoch": 69.42771084337349, + "grad_norm": 1.2303466796875, + "learning_rate": 4.912706381079875e-05, + "loss": 0.1913, + "step": 11525 + }, + { + "epoch": 69.4578313253012, + "grad_norm": 1.2517952919006348, + "learning_rate": 4.912427487728693e-05, + "loss": 0.1378, + "step": 11530 + }, + { + "epoch": 69.48795180722891, + "grad_norm": 1.7751539945602417, + "learning_rate": 4.91214859437751e-05, + "loss": 0.171, + "step": 11535 + }, + { + "epoch": 69.51807228915662, + "grad_norm": 2.344428539276123, + "learning_rate": 4.9118697010263274e-05, + "loss": 0.1763, + "step": 11540 + }, + { + "epoch": 69.54819277108433, + "grad_norm": 1.3794399499893188, + "learning_rate": 4.9115908076751455e-05, + "loss": 0.1585, + "step": 11545 + }, + { + "epoch": 69.57831325301204, + "grad_norm": 1.4221460819244385, + "learning_rate": 4.911311914323963e-05, + "loss": 0.1443, + "step": 11550 + }, + { + "epoch": 69.60843373493977, + "grad_norm": 1.758724570274353, + "learning_rate": 4.91103302097278e-05, + "loss": 0.1969, + "step": 11555 + }, + { + "epoch": 69.63855421686748, + "grad_norm": 1.4691616296768188, + "learning_rate": 4.9107541276215976e-05, + "loss": 0.142, + "step": 11560 + }, + { + "epoch": 69.66867469879519, + "grad_norm": 2.206136703491211, + "learning_rate": 4.910475234270415e-05, + "loss": 0.2204, + "step": 11565 + }, + { + "epoch": 69.6987951807229, + "grad_norm": 1.3491761684417725, + "learning_rate": 4.9101963409192324e-05, + "loss": 0.1707, + "step": 11570 + }, + { + "epoch": 69.7289156626506, + "grad_norm": 1.2227740287780762, + "learning_rate": 4.9099174475680504e-05, + "loss": 0.1353, + "step": 11575 + }, + { + "epoch": 69.75903614457832, + "grad_norm": 1.7694668769836426, + "learning_rate": 4.909638554216868e-05, + "loss": 0.1803, + "step": 11580 + }, + { + "epoch": 69.78915662650603, + "grad_norm": 1.7386656999588013, + "learning_rate": 4.909359660865685e-05, + "loss": 0.1719, + "step": 11585 + }, + { + "epoch": 69.81927710843374, + "grad_norm": 1.1455605030059814, + "learning_rate": 4.9090807675145026e-05, + "loss": 0.1606, + "step": 11590 + }, + { + "epoch": 69.84939759036145, + "grad_norm": 2.165029287338257, + "learning_rate": 4.90880187416332e-05, + "loss": 0.1926, + "step": 11595 + }, + { + "epoch": 69.87951807228916, + "grad_norm": 1.8540332317352295, + "learning_rate": 4.908522980812137e-05, + "loss": 0.201, + "step": 11600 + }, + { + "epoch": 69.90963855421687, + "grad_norm": 1.1603822708129883, + "learning_rate": 4.9082440874609554e-05, + "loss": 0.155, + "step": 11605 + }, + { + "epoch": 69.93975903614458, + "grad_norm": 0.9770561456680298, + "learning_rate": 4.907965194109773e-05, + "loss": 0.1622, + "step": 11610 + }, + { + "epoch": 69.96987951807229, + "grad_norm": 1.5205050706863403, + "learning_rate": 4.90768630075859e-05, + "loss": 0.1565, + "step": 11615 + }, + { + "epoch": 70.0, + "grad_norm": 2.5346810817718506, + "learning_rate": 4.9074074074074075e-05, + "loss": 0.211, + "step": 11620 + }, + { + "epoch": 70.0, + "eval_accuracy": 0.9193865513173417, + "eval_auc": 0.9755270409189901, + "eval_f1": 0.8709880427942102, + "eval_loss": 0.31897255778312683, + "eval_precision": 0.9719101123595506, + "eval_recall": 0.7890535917901939, + "eval_runtime": 19.8528, + "eval_samples_per_second": 128.093, + "eval_steps_per_second": 0.655, + "step": 11620 + }, + { + "epoch": 70.03012048192771, + "grad_norm": 2.514573812484741, + "learning_rate": 4.907128514056225e-05, + "loss": 0.1649, + "step": 11625 + }, + { + "epoch": 70.06024096385542, + "grad_norm": 2.3070337772369385, + "learning_rate": 4.906849620705042e-05, + "loss": 0.1677, + "step": 11630 + }, + { + "epoch": 70.09036144578313, + "grad_norm": 1.6494768857955933, + "learning_rate": 4.9065707273538603e-05, + "loss": 0.1683, + "step": 11635 + }, + { + "epoch": 70.12048192771084, + "grad_norm": 1.5687743425369263, + "learning_rate": 4.906291834002678e-05, + "loss": 0.1636, + "step": 11640 + }, + { + "epoch": 70.15060240963855, + "grad_norm": 1.975725769996643, + "learning_rate": 4.906012940651495e-05, + "loss": 0.1856, + "step": 11645 + }, + { + "epoch": 70.18072289156626, + "grad_norm": 1.9229167699813843, + "learning_rate": 4.9057340473003125e-05, + "loss": 0.1753, + "step": 11650 + }, + { + "epoch": 70.21084337349397, + "grad_norm": 2.0328242778778076, + "learning_rate": 4.90545515394913e-05, + "loss": 0.1452, + "step": 11655 + }, + { + "epoch": 70.24096385542168, + "grad_norm": 2.073943614959717, + "learning_rate": 4.905176260597947e-05, + "loss": 0.1803, + "step": 11660 + }, + { + "epoch": 70.2710843373494, + "grad_norm": 1.092247724533081, + "learning_rate": 4.904897367246765e-05, + "loss": 0.1511, + "step": 11665 + }, + { + "epoch": 70.3012048192771, + "grad_norm": 0.8671462535858154, + "learning_rate": 4.904618473895583e-05, + "loss": 0.1468, + "step": 11670 + }, + { + "epoch": 70.33132530120481, + "grad_norm": 1.3966654539108276, + "learning_rate": 4.9043395805444e-05, + "loss": 0.1574, + "step": 11675 + }, + { + "epoch": 70.36144578313252, + "grad_norm": 1.2109041213989258, + "learning_rate": 4.9040606871932174e-05, + "loss": 0.1571, + "step": 11680 + }, + { + "epoch": 70.39156626506023, + "grad_norm": 1.3622057437896729, + "learning_rate": 4.903781793842035e-05, + "loss": 0.1584, + "step": 11685 + }, + { + "epoch": 70.42168674698796, + "grad_norm": 1.86616051197052, + "learning_rate": 4.903502900490852e-05, + "loss": 0.1633, + "step": 11690 + }, + { + "epoch": 70.45180722891567, + "grad_norm": 1.5176563262939453, + "learning_rate": 4.90322400713967e-05, + "loss": 0.1258, + "step": 11695 + }, + { + "epoch": 70.48192771084338, + "grad_norm": 1.8466945886611938, + "learning_rate": 4.9029451137884876e-05, + "loss": 0.1433, + "step": 11700 + }, + { + "epoch": 70.51204819277109, + "grad_norm": 1.281245231628418, + "learning_rate": 4.902666220437305e-05, + "loss": 0.189, + "step": 11705 + }, + { + "epoch": 70.5421686746988, + "grad_norm": 1.0728424787521362, + "learning_rate": 4.9023873270861224e-05, + "loss": 0.1588, + "step": 11710 + }, + { + "epoch": 70.57228915662651, + "grad_norm": 1.2949163913726807, + "learning_rate": 4.90210843373494e-05, + "loss": 0.1213, + "step": 11715 + }, + { + "epoch": 70.60240963855422, + "grad_norm": 1.456634521484375, + "learning_rate": 4.901829540383758e-05, + "loss": 0.172, + "step": 11720 + }, + { + "epoch": 70.63253012048193, + "grad_norm": 2.1834113597869873, + "learning_rate": 4.901550647032575e-05, + "loss": 0.1694, + "step": 11725 + }, + { + "epoch": 70.66265060240964, + "grad_norm": 2.4465279579162598, + "learning_rate": 4.9012717536813926e-05, + "loss": 0.1921, + "step": 11730 + }, + { + "epoch": 70.69277108433735, + "grad_norm": 1.586053729057312, + "learning_rate": 4.90099286033021e-05, + "loss": 0.1699, + "step": 11735 + }, + { + "epoch": 70.72289156626506, + "grad_norm": 1.616501808166504, + "learning_rate": 4.9007139669790274e-05, + "loss": 0.1403, + "step": 11740 + }, + { + "epoch": 70.75301204819277, + "grad_norm": 1.987012267112732, + "learning_rate": 4.900435073627845e-05, + "loss": 0.1609, + "step": 11745 + }, + { + "epoch": 70.78313253012048, + "grad_norm": 2.1528191566467285, + "learning_rate": 4.900156180276663e-05, + "loss": 0.1683, + "step": 11750 + }, + { + "epoch": 70.8132530120482, + "grad_norm": 2.372396469116211, + "learning_rate": 4.89987728692548e-05, + "loss": 0.1601, + "step": 11755 + }, + { + "epoch": 70.8433734939759, + "grad_norm": 2.0807836055755615, + "learning_rate": 4.8995983935742975e-05, + "loss": 0.1776, + "step": 11760 + }, + { + "epoch": 70.87349397590361, + "grad_norm": 2.007038116455078, + "learning_rate": 4.899319500223115e-05, + "loss": 0.155, + "step": 11765 + }, + { + "epoch": 70.90361445783132, + "grad_norm": 2.0115725994110107, + "learning_rate": 4.899040606871932e-05, + "loss": 0.165, + "step": 11770 + }, + { + "epoch": 70.93373493975903, + "grad_norm": 2.330153226852417, + "learning_rate": 4.89876171352075e-05, + "loss": 0.1782, + "step": 11775 + }, + { + "epoch": 70.96385542168674, + "grad_norm": 0.9279620051383972, + "learning_rate": 4.898482820169568e-05, + "loss": 0.1541, + "step": 11780 + }, + { + "epoch": 70.99397590361446, + "grad_norm": 1.6002525091171265, + "learning_rate": 4.898203926818385e-05, + "loss": 0.1468, + "step": 11785 + }, + { + "epoch": 71.0, + "eval_accuracy": 0.9402280770743217, + "eval_auc": 0.9803796090842267, + "eval_f1": 0.9079903147699758, + "eval_loss": 0.22381989657878876, + "eval_precision": 0.967741935483871, + "eval_recall": 0.855188141391106, + "eval_runtime": 16.9098, + "eval_samples_per_second": 150.386, + "eval_steps_per_second": 0.769, + "step": 11786 + }, + { + "epoch": 71.02409638554217, + "grad_norm": 1.6419475078582764, + "learning_rate": 4.8979250334672025e-05, + "loss": 0.1328, + "step": 11790 + }, + { + "epoch": 71.05421686746988, + "grad_norm": 2.771260976791382, + "learning_rate": 4.89764614011602e-05, + "loss": 0.1447, + "step": 11795 + }, + { + "epoch": 71.08433734939759, + "grad_norm": 2.3673524856567383, + "learning_rate": 4.897367246764837e-05, + "loss": 0.1637, + "step": 11800 + }, + { + "epoch": 71.1144578313253, + "grad_norm": 2.1549925804138184, + "learning_rate": 4.8970883534136546e-05, + "loss": 0.1472, + "step": 11805 + }, + { + "epoch": 71.144578313253, + "grad_norm": 1.625942349433899, + "learning_rate": 4.896809460062473e-05, + "loss": 0.1634, + "step": 11810 + }, + { + "epoch": 71.17469879518072, + "grad_norm": 1.6430217027664185, + "learning_rate": 4.89653056671129e-05, + "loss": 0.1704, + "step": 11815 + }, + { + "epoch": 71.20481927710843, + "grad_norm": 2.000575542449951, + "learning_rate": 4.8962516733601075e-05, + "loss": 0.1896, + "step": 11820 + }, + { + "epoch": 71.23493975903614, + "grad_norm": 1.9632614850997925, + "learning_rate": 4.895972780008925e-05, + "loss": 0.1703, + "step": 11825 + }, + { + "epoch": 71.26506024096386, + "grad_norm": 1.683100700378418, + "learning_rate": 4.895693886657742e-05, + "loss": 0.1493, + "step": 11830 + }, + { + "epoch": 71.29518072289157, + "grad_norm": 2.165752410888672, + "learning_rate": 4.8954149933065596e-05, + "loss": 0.1407, + "step": 11835 + }, + { + "epoch": 71.32530120481928, + "grad_norm": 2.3268678188323975, + "learning_rate": 4.8951360999553777e-05, + "loss": 0.1606, + "step": 11840 + }, + { + "epoch": 71.355421686747, + "grad_norm": 2.8651413917541504, + "learning_rate": 4.894857206604195e-05, + "loss": 0.1942, + "step": 11845 + }, + { + "epoch": 71.3855421686747, + "grad_norm": 1.9885239601135254, + "learning_rate": 4.8945783132530124e-05, + "loss": 0.1584, + "step": 11850 + }, + { + "epoch": 71.41566265060241, + "grad_norm": 1.9506202936172485, + "learning_rate": 4.89429941990183e-05, + "loss": 0.1572, + "step": 11855 + }, + { + "epoch": 71.44578313253012, + "grad_norm": 1.9616752862930298, + "learning_rate": 4.894020526550647e-05, + "loss": 0.1415, + "step": 11860 + }, + { + "epoch": 71.47590361445783, + "grad_norm": 1.929392695426941, + "learning_rate": 4.8937416331994646e-05, + "loss": 0.1685, + "step": 11865 + }, + { + "epoch": 71.50602409638554, + "grad_norm": 2.233412981033325, + "learning_rate": 4.8934627398482826e-05, + "loss": 0.1894, + "step": 11870 + }, + { + "epoch": 71.53614457831326, + "grad_norm": 1.8386956453323364, + "learning_rate": 4.8931838464971e-05, + "loss": 0.1743, + "step": 11875 + }, + { + "epoch": 71.56626506024097, + "grad_norm": 1.6761291027069092, + "learning_rate": 4.892904953145917e-05, + "loss": 0.1455, + "step": 11880 + }, + { + "epoch": 71.59638554216868, + "grad_norm": 2.172708749771118, + "learning_rate": 4.892626059794735e-05, + "loss": 0.1654, + "step": 11885 + }, + { + "epoch": 71.62650602409639, + "grad_norm": 1.2129136323928833, + "learning_rate": 4.892347166443552e-05, + "loss": 0.1644, + "step": 11890 + }, + { + "epoch": 71.6566265060241, + "grad_norm": 1.0940375328063965, + "learning_rate": 4.8920682730923695e-05, + "loss": 0.147, + "step": 11895 + }, + { + "epoch": 71.6867469879518, + "grad_norm": 0.9110568761825562, + "learning_rate": 4.8917893797411876e-05, + "loss": 0.1252, + "step": 11900 + }, + { + "epoch": 71.71686746987952, + "grad_norm": 2.237536668777466, + "learning_rate": 4.891510486390005e-05, + "loss": 0.1567, + "step": 11905 + }, + { + "epoch": 71.74698795180723, + "grad_norm": 0.9506092667579651, + "learning_rate": 4.8912315930388216e-05, + "loss": 0.1312, + "step": 11910 + }, + { + "epoch": 71.77710843373494, + "grad_norm": 1.5513007640838623, + "learning_rate": 4.89095269968764e-05, + "loss": 0.1403, + "step": 11915 + }, + { + "epoch": 71.80722891566265, + "grad_norm": 1.3525789976119995, + "learning_rate": 4.890673806336457e-05, + "loss": 0.1746, + "step": 11920 + }, + { + "epoch": 71.83734939759036, + "grad_norm": 2.4830636978149414, + "learning_rate": 4.8903949129852745e-05, + "loss": 0.158, + "step": 11925 + }, + { + "epoch": 71.86746987951807, + "grad_norm": 1.4283735752105713, + "learning_rate": 4.8901160196340925e-05, + "loss": 0.1459, + "step": 11930 + }, + { + "epoch": 71.89759036144578, + "grad_norm": 0.5627816915512085, + "learning_rate": 4.88983712628291e-05, + "loss": 0.1592, + "step": 11935 + }, + { + "epoch": 71.92771084337349, + "grad_norm": 1.2602050304412842, + "learning_rate": 4.8895582329317266e-05, + "loss": 0.1672, + "step": 11940 + }, + { + "epoch": 71.9578313253012, + "grad_norm": 1.8462952375411987, + "learning_rate": 4.8892793395805447e-05, + "loss": 0.1608, + "step": 11945 + }, + { + "epoch": 71.98795180722891, + "grad_norm": 1.4569382667541504, + "learning_rate": 4.889000446229362e-05, + "loss": 0.1659, + "step": 11950 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.9327565867086118, + "eval_auc": 0.9771220917101162, + "eval_f1": 0.8969258589511754, + "eval_loss": 0.234303280711174, + "eval_precision": 0.9514066496163683, + "eval_recall": 0.8483466362599772, + "eval_runtime": 17.1758, + "eval_samples_per_second": 148.057, + "eval_steps_per_second": 0.757, + "step": 11952 + }, + { + "epoch": 72.01807228915662, + "grad_norm": 2.3381729125976562, + "learning_rate": 4.8887215528781794e-05, + "loss": 0.1808, + "step": 11955 + }, + { + "epoch": 72.04819277108433, + "grad_norm": 1.6231787204742432, + "learning_rate": 4.8884426595269975e-05, + "loss": 0.1804, + "step": 11960 + }, + { + "epoch": 72.07831325301204, + "grad_norm": 1.2411439418792725, + "learning_rate": 4.888163766175814e-05, + "loss": 0.1695, + "step": 11965 + }, + { + "epoch": 72.10843373493977, + "grad_norm": 2.3571534156799316, + "learning_rate": 4.887884872824632e-05, + "loss": 0.1411, + "step": 11970 + }, + { + "epoch": 72.13855421686748, + "grad_norm": 0.9407815933227539, + "learning_rate": 4.8876059794734496e-05, + "loss": 0.1445, + "step": 11975 + }, + { + "epoch": 72.16867469879519, + "grad_norm": 1.3343504667282104, + "learning_rate": 4.887327086122267e-05, + "loss": 0.1516, + "step": 11980 + }, + { + "epoch": 72.1987951807229, + "grad_norm": 1.809865117073059, + "learning_rate": 4.887048192771085e-05, + "loss": 0.1318, + "step": 11985 + }, + { + "epoch": 72.2289156626506, + "grad_norm": 1.183609962463379, + "learning_rate": 4.8867692994199024e-05, + "loss": 0.1349, + "step": 11990 + }, + { + "epoch": 72.25903614457832, + "grad_norm": 1.6165786981582642, + "learning_rate": 4.886490406068719e-05, + "loss": 0.1535, + "step": 11995 + }, + { + "epoch": 72.28915662650603, + "grad_norm": 1.968641996383667, + "learning_rate": 4.886211512717537e-05, + "loss": 0.1976, + "step": 12000 + }, + { + "epoch": 72.31927710843374, + "grad_norm": 1.1011695861816406, + "learning_rate": 4.8859326193663546e-05, + "loss": 0.2182, + "step": 12005 + }, + { + "epoch": 72.34939759036145, + "grad_norm": 1.7650960683822632, + "learning_rate": 4.885653726015172e-05, + "loss": 0.1635, + "step": 12010 + }, + { + "epoch": 72.37951807228916, + "grad_norm": 0.9837899804115295, + "learning_rate": 4.88537483266399e-05, + "loss": 0.14, + "step": 12015 + }, + { + "epoch": 72.40963855421687, + "grad_norm": 0.8613243103027344, + "learning_rate": 4.8850959393128074e-05, + "loss": 0.151, + "step": 12020 + }, + { + "epoch": 72.43975903614458, + "grad_norm": 1.387433648109436, + "learning_rate": 4.884817045961624e-05, + "loss": 0.1758, + "step": 12025 + }, + { + "epoch": 72.46987951807229, + "grad_norm": 0.8315243721008301, + "learning_rate": 4.884538152610442e-05, + "loss": 0.136, + "step": 12030 + }, + { + "epoch": 72.5, + "grad_norm": 2.1914048194885254, + "learning_rate": 4.8842592592592595e-05, + "loss": 0.1536, + "step": 12035 + }, + { + "epoch": 72.53012048192771, + "grad_norm": 1.5686542987823486, + "learning_rate": 4.883980365908077e-05, + "loss": 0.1569, + "step": 12040 + }, + { + "epoch": 72.56024096385542, + "grad_norm": 1.354062795639038, + "learning_rate": 4.883701472556895e-05, + "loss": 0.141, + "step": 12045 + }, + { + "epoch": 72.59036144578313, + "grad_norm": 2.271022319793701, + "learning_rate": 4.883422579205712e-05, + "loss": 0.1573, + "step": 12050 + }, + { + "epoch": 72.62048192771084, + "grad_norm": 1.9208792448043823, + "learning_rate": 4.883143685854529e-05, + "loss": 0.0976, + "step": 12055 + }, + { + "epoch": 72.65060240963855, + "grad_norm": 1.3888119459152222, + "learning_rate": 4.882864792503347e-05, + "loss": 0.1353, + "step": 12060 + }, + { + "epoch": 72.68072289156626, + "grad_norm": 1.9032936096191406, + "learning_rate": 4.8825858991521645e-05, + "loss": 0.1592, + "step": 12065 + }, + { + "epoch": 72.71084337349397, + "grad_norm": 2.03615403175354, + "learning_rate": 4.882307005800982e-05, + "loss": 0.1529, + "step": 12070 + }, + { + "epoch": 72.74096385542168, + "grad_norm": 0.8284222483634949, + "learning_rate": 4.8820281124498e-05, + "loss": 0.1552, + "step": 12075 + }, + { + "epoch": 72.7710843373494, + "grad_norm": 2.2320663928985596, + "learning_rate": 4.8817492190986166e-05, + "loss": 0.1372, + "step": 12080 + }, + { + "epoch": 72.8012048192771, + "grad_norm": 1.9461119174957275, + "learning_rate": 4.881470325747434e-05, + "loss": 0.1579, + "step": 12085 + }, + { + "epoch": 72.83132530120481, + "grad_norm": 2.10284161567688, + "learning_rate": 4.881191432396252e-05, + "loss": 0.1613, + "step": 12090 + }, + { + "epoch": 72.86144578313252, + "grad_norm": 2.1088132858276367, + "learning_rate": 4.8809125390450694e-05, + "loss": 0.1413, + "step": 12095 + }, + { + "epoch": 72.89156626506023, + "grad_norm": 1.2813507318496704, + "learning_rate": 4.880633645693887e-05, + "loss": 0.1754, + "step": 12100 + }, + { + "epoch": 72.92168674698796, + "grad_norm": 2.11826491355896, + "learning_rate": 4.880354752342705e-05, + "loss": 0.1781, + "step": 12105 + }, + { + "epoch": 72.95180722891567, + "grad_norm": 1.9352747201919556, + "learning_rate": 4.8800758589915216e-05, + "loss": 0.1632, + "step": 12110 + }, + { + "epoch": 72.98192771084338, + "grad_norm": 2.155324697494507, + "learning_rate": 4.879796965640339e-05, + "loss": 0.1499, + "step": 12115 + }, + { + "epoch": 73.0, + "eval_accuracy": 0.9445536767597326, + "eval_auc": 0.9816194436725658, + "eval_f1": 0.9168141592920354, + "eval_loss": 0.19978806376457214, + "eval_precision": 0.9498777506112469, + "eval_recall": 0.8859749144811858, + "eval_runtime": 16.8874, + "eval_samples_per_second": 150.586, + "eval_steps_per_second": 0.77, + "step": 12118 + }, + { + "epoch": 73.01204819277109, + "grad_norm": 2.200657844543457, + "learning_rate": 4.879518072289157e-05, + "loss": 0.1402, + "step": 12120 + }, + { + "epoch": 73.0421686746988, + "grad_norm": 0.6882126927375793, + "learning_rate": 4.8792391789379744e-05, + "loss": 0.1093, + "step": 12125 + }, + { + "epoch": 73.07228915662651, + "grad_norm": 2.372523307800293, + "learning_rate": 4.878960285586792e-05, + "loss": 0.1789, + "step": 12130 + }, + { + "epoch": 73.10240963855422, + "grad_norm": 1.2518162727355957, + "learning_rate": 4.87868139223561e-05, + "loss": 0.1616, + "step": 12135 + }, + { + "epoch": 73.13253012048193, + "grad_norm": 2.601158618927002, + "learning_rate": 4.8784024988844265e-05, + "loss": 0.2215, + "step": 12140 + }, + { + "epoch": 73.16265060240964, + "grad_norm": 1.3373281955718994, + "learning_rate": 4.878123605533244e-05, + "loss": 0.182, + "step": 12145 + }, + { + "epoch": 73.19277108433735, + "grad_norm": 1.938125491142273, + "learning_rate": 4.877844712182062e-05, + "loss": 0.188, + "step": 12150 + }, + { + "epoch": 73.22289156626506, + "grad_norm": 1.121777057647705, + "learning_rate": 4.8775658188308793e-05, + "loss": 0.1442, + "step": 12155 + }, + { + "epoch": 73.25301204819277, + "grad_norm": 1.318291187286377, + "learning_rate": 4.877286925479697e-05, + "loss": 0.1739, + "step": 12160 + }, + { + "epoch": 73.28313253012048, + "grad_norm": 1.4455350637435913, + "learning_rate": 4.877008032128514e-05, + "loss": 0.1961, + "step": 12165 + }, + { + "epoch": 73.3132530120482, + "grad_norm": 1.7983412742614746, + "learning_rate": 4.8767291387773315e-05, + "loss": 0.1264, + "step": 12170 + }, + { + "epoch": 73.3433734939759, + "grad_norm": 1.8090513944625854, + "learning_rate": 4.876450245426149e-05, + "loss": 0.1842, + "step": 12175 + }, + { + "epoch": 73.37349397590361, + "grad_norm": 1.7406883239746094, + "learning_rate": 4.876171352074967e-05, + "loss": 0.1476, + "step": 12180 + }, + { + "epoch": 73.40361445783132, + "grad_norm": 1.712064504623413, + "learning_rate": 4.875892458723784e-05, + "loss": 0.2022, + "step": 12185 + }, + { + "epoch": 73.43373493975903, + "grad_norm": 1.943896770477295, + "learning_rate": 4.875613565372602e-05, + "loss": 0.1334, + "step": 12190 + }, + { + "epoch": 73.46385542168674, + "grad_norm": 1.7055920362472534, + "learning_rate": 4.875334672021419e-05, + "loss": 0.1452, + "step": 12195 + }, + { + "epoch": 73.49397590361446, + "grad_norm": 1.2974059581756592, + "learning_rate": 4.8750557786702364e-05, + "loss": 0.1817, + "step": 12200 + }, + { + "epoch": 73.52409638554217, + "grad_norm": 1.7666335105895996, + "learning_rate": 4.874776885319054e-05, + "loss": 0.1363, + "step": 12205 + }, + { + "epoch": 73.55421686746988, + "grad_norm": 1.5259292125701904, + "learning_rate": 4.874497991967872e-05, + "loss": 0.1401, + "step": 12210 + }, + { + "epoch": 73.58433734939759, + "grad_norm": 0.9198861122131348, + "learning_rate": 4.874219098616689e-05, + "loss": 0.1451, + "step": 12215 + }, + { + "epoch": 73.6144578313253, + "grad_norm": 1.8410776853561401, + "learning_rate": 4.8739402052655066e-05, + "loss": 0.1501, + "step": 12220 + }, + { + "epoch": 73.644578313253, + "grad_norm": 1.808163046836853, + "learning_rate": 4.873661311914324e-05, + "loss": 0.1307, + "step": 12225 + }, + { + "epoch": 73.67469879518072, + "grad_norm": 1.8669711351394653, + "learning_rate": 4.8733824185631414e-05, + "loss": 0.1443, + "step": 12230 + }, + { + "epoch": 73.70481927710843, + "grad_norm": 1.5613455772399902, + "learning_rate": 4.8731035252119594e-05, + "loss": 0.1701, + "step": 12235 + }, + { + "epoch": 73.73493975903614, + "grad_norm": 1.8711267709732056, + "learning_rate": 4.872824631860777e-05, + "loss": 0.2047, + "step": 12240 + }, + { + "epoch": 73.76506024096386, + "grad_norm": 2.122509479522705, + "learning_rate": 4.872545738509594e-05, + "loss": 0.1515, + "step": 12245 + }, + { + "epoch": 73.79518072289157, + "grad_norm": 2.5126874446868896, + "learning_rate": 4.8722668451584116e-05, + "loss": 0.1601, + "step": 12250 + }, + { + "epoch": 73.82530120481928, + "grad_norm": 1.0848028659820557, + "learning_rate": 4.871987951807229e-05, + "loss": 0.1219, + "step": 12255 + }, + { + "epoch": 73.855421686747, + "grad_norm": 1.7317748069763184, + "learning_rate": 4.8717090584560463e-05, + "loss": 0.1528, + "step": 12260 + }, + { + "epoch": 73.8855421686747, + "grad_norm": 1.4443916082382202, + "learning_rate": 4.8714301651048644e-05, + "loss": 0.1639, + "step": 12265 + }, + { + "epoch": 73.91566265060241, + "grad_norm": 1.4768887758255005, + "learning_rate": 4.871151271753682e-05, + "loss": 0.1726, + "step": 12270 + }, + { + "epoch": 73.94578313253012, + "grad_norm": 1.8033219575881958, + "learning_rate": 4.870872378402499e-05, + "loss": 0.1651, + "step": 12275 + }, + { + "epoch": 73.97590361445783, + "grad_norm": 1.9352055788040161, + "learning_rate": 4.8705934850513165e-05, + "loss": 0.1884, + "step": 12280 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.9496657491152183, + "eval_auc": 0.9825112485130884, + "eval_f1": 0.9245283018867925, + "eval_loss": 0.18663667142391205, + "eval_precision": 0.9572649572649573, + "eval_recall": 0.8939566704675028, + "eval_runtime": 17.2644, + "eval_samples_per_second": 147.297, + "eval_steps_per_second": 0.753, + "step": 12284 + }, + { + "epoch": 74.00602409638554, + "grad_norm": 0.5611174702644348, + "learning_rate": 4.870314591700134e-05, + "loss": 0.1332, + "step": 12285 + }, + { + "epoch": 74.03614457831326, + "grad_norm": 1.4713517427444458, + "learning_rate": 4.870035698348951e-05, + "loss": 0.1472, + "step": 12290 + }, + { + "epoch": 74.06626506024097, + "grad_norm": 1.0096324682235718, + "learning_rate": 4.8697568049977694e-05, + "loss": 0.1281, + "step": 12295 + }, + { + "epoch": 74.09638554216868, + "grad_norm": 2.5639259815216064, + "learning_rate": 4.869477911646587e-05, + "loss": 0.1904, + "step": 12300 + }, + { + "epoch": 74.12650602409639, + "grad_norm": 1.5320582389831543, + "learning_rate": 4.869199018295404e-05, + "loss": 0.1703, + "step": 12305 + }, + { + "epoch": 74.1566265060241, + "grad_norm": 0.815729022026062, + "learning_rate": 4.8689201249442215e-05, + "loss": 0.1134, + "step": 12310 + }, + { + "epoch": 74.1867469879518, + "grad_norm": 1.2700828313827515, + "learning_rate": 4.868641231593039e-05, + "loss": 0.1279, + "step": 12315 + }, + { + "epoch": 74.21686746987952, + "grad_norm": 2.0344655513763428, + "learning_rate": 4.868362338241856e-05, + "loss": 0.1497, + "step": 12320 + }, + { + "epoch": 74.24698795180723, + "grad_norm": 1.7374845743179321, + "learning_rate": 4.868083444890674e-05, + "loss": 0.1701, + "step": 12325 + }, + { + "epoch": 74.27710843373494, + "grad_norm": 1.5035818815231323, + "learning_rate": 4.867804551539492e-05, + "loss": 0.1256, + "step": 12330 + }, + { + "epoch": 74.30722891566265, + "grad_norm": 2.07659912109375, + "learning_rate": 4.867525658188309e-05, + "loss": 0.1431, + "step": 12335 + }, + { + "epoch": 74.33734939759036, + "grad_norm": 1.4296315908432007, + "learning_rate": 4.8672467648371265e-05, + "loss": 0.1318, + "step": 12340 + }, + { + "epoch": 74.36746987951807, + "grad_norm": 2.0391762256622314, + "learning_rate": 4.866967871485944e-05, + "loss": 0.1663, + "step": 12345 + }, + { + "epoch": 74.39759036144578, + "grad_norm": 1.483923077583313, + "learning_rate": 4.866688978134761e-05, + "loss": 0.1721, + "step": 12350 + }, + { + "epoch": 74.42771084337349, + "grad_norm": 1.750510811805725, + "learning_rate": 4.866410084783579e-05, + "loss": 0.1574, + "step": 12355 + }, + { + "epoch": 74.4578313253012, + "grad_norm": 1.6884591579437256, + "learning_rate": 4.8661311914323966e-05, + "loss": 0.1368, + "step": 12360 + }, + { + "epoch": 74.48795180722891, + "grad_norm": 1.468232274055481, + "learning_rate": 4.865852298081214e-05, + "loss": 0.1595, + "step": 12365 + }, + { + "epoch": 74.51807228915662, + "grad_norm": 2.2312231063842773, + "learning_rate": 4.8655734047300314e-05, + "loss": 0.1624, + "step": 12370 + }, + { + "epoch": 74.54819277108433, + "grad_norm": 1.472649335861206, + "learning_rate": 4.865294511378849e-05, + "loss": 0.181, + "step": 12375 + }, + { + "epoch": 74.57831325301204, + "grad_norm": 1.16572105884552, + "learning_rate": 4.865015618027666e-05, + "loss": 0.1374, + "step": 12380 + }, + { + "epoch": 74.60843373493977, + "grad_norm": 0.9645233750343323, + "learning_rate": 4.864736724676484e-05, + "loss": 0.1596, + "step": 12385 + }, + { + "epoch": 74.63855421686748, + "grad_norm": 1.2052770853042603, + "learning_rate": 4.8644578313253016e-05, + "loss": 0.1343, + "step": 12390 + }, + { + "epoch": 74.66867469879519, + "grad_norm": 1.0349658727645874, + "learning_rate": 4.864178937974119e-05, + "loss": 0.1606, + "step": 12395 + }, + { + "epoch": 74.6987951807229, + "grad_norm": 2.264651298522949, + "learning_rate": 4.8639000446229364e-05, + "loss": 0.1383, + "step": 12400 + }, + { + "epoch": 74.7289156626506, + "grad_norm": 1.8398776054382324, + "learning_rate": 4.863621151271754e-05, + "loss": 0.1364, + "step": 12405 + }, + { + "epoch": 74.75903614457832, + "grad_norm": 2.0272128582000732, + "learning_rate": 4.863342257920571e-05, + "loss": 0.1637, + "step": 12410 + }, + { + "epoch": 74.78915662650603, + "grad_norm": 1.3465744256973267, + "learning_rate": 4.863063364569389e-05, + "loss": 0.1429, + "step": 12415 + }, + { + "epoch": 74.81927710843374, + "grad_norm": 1.5959062576293945, + "learning_rate": 4.8627844712182066e-05, + "loss": 0.1452, + "step": 12420 + }, + { + "epoch": 74.84939759036145, + "grad_norm": 1.0067527294158936, + "learning_rate": 4.862505577867024e-05, + "loss": 0.1177, + "step": 12425 + }, + { + "epoch": 74.87951807228916, + "grad_norm": 1.834115982055664, + "learning_rate": 4.862226684515841e-05, + "loss": 0.1295, + "step": 12430 + }, + { + "epoch": 74.90963855421687, + "grad_norm": 0.5453358888626099, + "learning_rate": 4.861947791164659e-05, + "loss": 0.1491, + "step": 12435 + }, + { + "epoch": 74.93975903614458, + "grad_norm": 1.7512986660003662, + "learning_rate": 4.861668897813476e-05, + "loss": 0.1516, + "step": 12440 + }, + { + "epoch": 74.96987951807229, + "grad_norm": 2.071878671646118, + "learning_rate": 4.861390004462294e-05, + "loss": 0.1694, + "step": 12445 + }, + { + "epoch": 75.0, + "grad_norm": 1.0349500179290771, + "learning_rate": 4.8611111111111115e-05, + "loss": 0.1718, + "step": 12450 + }, + { + "epoch": 75.0, + "eval_accuracy": 0.9406213134093591, + "eval_auc": 0.9784043606039907, + "eval_f1": 0.908761329305136, + "eval_loss": 0.23483532667160034, + "eval_precision": 0.9665809768637532, + "eval_recall": 0.8574686431014823, + "eval_runtime": 19.7514, + "eval_samples_per_second": 128.75, + "eval_steps_per_second": 0.658, + "step": 12450 + }, + { + "epoch": 75.03012048192771, + "grad_norm": 1.2066843509674072, + "learning_rate": 4.860832217759929e-05, + "loss": 0.1313, + "step": 12455 + }, + { + "epoch": 75.06024096385542, + "grad_norm": 1.7551440000534058, + "learning_rate": 4.860553324408746e-05, + "loss": 0.136, + "step": 12460 + }, + { + "epoch": 75.09036144578313, + "grad_norm": 0.9596983194351196, + "learning_rate": 4.8602744310575637e-05, + "loss": 0.1539, + "step": 12465 + }, + { + "epoch": 75.12048192771084, + "grad_norm": 0.9710562825202942, + "learning_rate": 4.859995537706381e-05, + "loss": 0.1277, + "step": 12470 + }, + { + "epoch": 75.15060240963855, + "grad_norm": 1.754207730293274, + "learning_rate": 4.859716644355199e-05, + "loss": 0.1658, + "step": 12475 + }, + { + "epoch": 75.18072289156626, + "grad_norm": 1.8475215435028076, + "learning_rate": 4.8594377510040165e-05, + "loss": 0.1616, + "step": 12480 + }, + { + "epoch": 75.21084337349397, + "grad_norm": 0.8701611757278442, + "learning_rate": 4.859158857652834e-05, + "loss": 0.1076, + "step": 12485 + }, + { + "epoch": 75.24096385542168, + "grad_norm": 1.3654662370681763, + "learning_rate": 4.858879964301651e-05, + "loss": 0.1217, + "step": 12490 + }, + { + "epoch": 75.2710843373494, + "grad_norm": 1.7945674657821655, + "learning_rate": 4.8586010709504686e-05, + "loss": 0.1342, + "step": 12495 + }, + { + "epoch": 75.3012048192771, + "grad_norm": 0.8711778521537781, + "learning_rate": 4.858322177599287e-05, + "loss": 0.1648, + "step": 12500 + }, + { + "epoch": 75.33132530120481, + "grad_norm": 1.753820776939392, + "learning_rate": 4.858043284248104e-05, + "loss": 0.1598, + "step": 12505 + }, + { + "epoch": 75.36144578313252, + "grad_norm": 1.9302884340286255, + "learning_rate": 4.8577643908969214e-05, + "loss": 0.13, + "step": 12510 + }, + { + "epoch": 75.39156626506023, + "grad_norm": 1.5695573091506958, + "learning_rate": 4.857485497545739e-05, + "loss": 0.1689, + "step": 12515 + }, + { + "epoch": 75.42168674698796, + "grad_norm": 1.8120839595794678, + "learning_rate": 4.857206604194556e-05, + "loss": 0.1575, + "step": 12520 + }, + { + "epoch": 75.45180722891567, + "grad_norm": 0.9831445217132568, + "learning_rate": 4.8569277108433736e-05, + "loss": 0.1591, + "step": 12525 + }, + { + "epoch": 75.48192771084338, + "grad_norm": 1.8309271335601807, + "learning_rate": 4.8566488174921916e-05, + "loss": 0.1395, + "step": 12530 + }, + { + "epoch": 75.51204819277109, + "grad_norm": 2.22189998626709, + "learning_rate": 4.856369924141009e-05, + "loss": 0.1659, + "step": 12535 + }, + { + "epoch": 75.5421686746988, + "grad_norm": 2.0341784954071045, + "learning_rate": 4.8560910307898264e-05, + "loss": 0.1298, + "step": 12540 + }, + { + "epoch": 75.57228915662651, + "grad_norm": 2.1318440437316895, + "learning_rate": 4.855812137438644e-05, + "loss": 0.2028, + "step": 12545 + }, + { + "epoch": 75.60240963855422, + "grad_norm": 1.3413338661193848, + "learning_rate": 4.855533244087461e-05, + "loss": 0.2001, + "step": 12550 + }, + { + "epoch": 75.63253012048193, + "grad_norm": 2.999337673187256, + "learning_rate": 4.8552543507362785e-05, + "loss": 0.1812, + "step": 12555 + }, + { + "epoch": 75.66265060240964, + "grad_norm": 1.7685872316360474, + "learning_rate": 4.8549754573850966e-05, + "loss": 0.1607, + "step": 12560 + }, + { + "epoch": 75.69277108433735, + "grad_norm": 0.9249743819236755, + "learning_rate": 4.854696564033914e-05, + "loss": 0.1203, + "step": 12565 + }, + { + "epoch": 75.72289156626506, + "grad_norm": 0.767066240310669, + "learning_rate": 4.854417670682731e-05, + "loss": 0.1404, + "step": 12570 + }, + { + "epoch": 75.75301204819277, + "grad_norm": 1.860562801361084, + "learning_rate": 4.854138777331549e-05, + "loss": 0.173, + "step": 12575 + }, + { + "epoch": 75.78313253012048, + "grad_norm": 1.5018879175186157, + "learning_rate": 4.853859883980366e-05, + "loss": 0.149, + "step": 12580 + }, + { + "epoch": 75.8132530120482, + "grad_norm": 1.9827969074249268, + "learning_rate": 4.8535809906291835e-05, + "loss": 0.1642, + "step": 12585 + }, + { + "epoch": 75.8433734939759, + "grad_norm": 2.529618501663208, + "learning_rate": 4.8533020972780015e-05, + "loss": 0.1608, + "step": 12590 + }, + { + "epoch": 75.87349397590361, + "grad_norm": 1.5653445720672607, + "learning_rate": 4.853023203926819e-05, + "loss": 0.1639, + "step": 12595 + }, + { + "epoch": 75.90361445783132, + "grad_norm": 1.8559218645095825, + "learning_rate": 4.8527443105756356e-05, + "loss": 0.1563, + "step": 12600 + }, + { + "epoch": 75.93373493975903, + "grad_norm": 1.2865639925003052, + "learning_rate": 4.852465417224454e-05, + "loss": 0.1613, + "step": 12605 + }, + { + "epoch": 75.96385542168674, + "grad_norm": 1.7536803483963013, + "learning_rate": 4.852186523873271e-05, + "loss": 0.1621, + "step": 12610 + }, + { + "epoch": 75.99397590361446, + "grad_norm": 1.8247182369232178, + "learning_rate": 4.8519076305220884e-05, + "loss": 0.1481, + "step": 12615 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.9429807314195832, + "eval_auc": 0.9826009080941385, + "eval_f1": 0.9141503848431024, + "eval_loss": 0.18916143476963043, + "eval_precision": 0.9507389162561576, + "eval_recall": 0.8802736602052451, + "eval_runtime": 19.7205, + "eval_samples_per_second": 128.952, + "eval_steps_per_second": 0.659, + "step": 12616 + }, + { + "epoch": 76.02409638554217, + "grad_norm": 1.8324910402297974, + "learning_rate": 4.8516287371709065e-05, + "loss": 0.155, + "step": 12620 + }, + { + "epoch": 76.05421686746988, + "grad_norm": 1.6081472635269165, + "learning_rate": 4.851349843819724e-05, + "loss": 0.1395, + "step": 12625 + }, + { + "epoch": 76.08433734939759, + "grad_norm": 2.3763484954833984, + "learning_rate": 4.8510709504685406e-05, + "loss": 0.1555, + "step": 12630 + }, + { + "epoch": 76.1144578313253, + "grad_norm": 1.6817314624786377, + "learning_rate": 4.8507920571173586e-05, + "loss": 0.1345, + "step": 12635 + }, + { + "epoch": 76.144578313253, + "grad_norm": 1.8232126235961914, + "learning_rate": 4.850513163766176e-05, + "loss": 0.131, + "step": 12640 + }, + { + "epoch": 76.17469879518072, + "grad_norm": 1.329211950302124, + "learning_rate": 4.8502342704149934e-05, + "loss": 0.1191, + "step": 12645 + }, + { + "epoch": 76.20481927710843, + "grad_norm": 1.6847299337387085, + "learning_rate": 4.8499553770638114e-05, + "loss": 0.1599, + "step": 12650 + }, + { + "epoch": 76.23493975903614, + "grad_norm": 1.6902977228164673, + "learning_rate": 4.849676483712629e-05, + "loss": 0.1614, + "step": 12655 + }, + { + "epoch": 76.26506024096386, + "grad_norm": 1.05877685546875, + "learning_rate": 4.8493975903614455e-05, + "loss": 0.1795, + "step": 12660 + }, + { + "epoch": 76.29518072289157, + "grad_norm": 1.3436675071716309, + "learning_rate": 4.8491186970102636e-05, + "loss": 0.133, + "step": 12665 + }, + { + "epoch": 76.32530120481928, + "grad_norm": 1.3640121221542358, + "learning_rate": 4.848839803659081e-05, + "loss": 0.1552, + "step": 12670 + }, + { + "epoch": 76.355421686747, + "grad_norm": 2.0116324424743652, + "learning_rate": 4.848560910307898e-05, + "loss": 0.12, + "step": 12675 + }, + { + "epoch": 76.3855421686747, + "grad_norm": 1.7177692651748657, + "learning_rate": 4.8482820169567164e-05, + "loss": 0.1633, + "step": 12680 + }, + { + "epoch": 76.41566265060241, + "grad_norm": 2.392770528793335, + "learning_rate": 4.848003123605534e-05, + "loss": 0.1482, + "step": 12685 + }, + { + "epoch": 76.44578313253012, + "grad_norm": 2.1464264392852783, + "learning_rate": 4.8477242302543505e-05, + "loss": 0.1402, + "step": 12690 + }, + { + "epoch": 76.47590361445783, + "grad_norm": 1.3617451190948486, + "learning_rate": 4.8474453369031685e-05, + "loss": 0.1193, + "step": 12695 + }, + { + "epoch": 76.50602409638554, + "grad_norm": 2.1297426223754883, + "learning_rate": 4.847166443551986e-05, + "loss": 0.1829, + "step": 12700 + }, + { + "epoch": 76.53614457831326, + "grad_norm": 1.5493841171264648, + "learning_rate": 4.846887550200803e-05, + "loss": 0.1352, + "step": 12705 + }, + { + "epoch": 76.56626506024097, + "grad_norm": 1.562913179397583, + "learning_rate": 4.8466086568496214e-05, + "loss": 0.1285, + "step": 12710 + }, + { + "epoch": 76.59638554216868, + "grad_norm": 0.7667553424835205, + "learning_rate": 4.846329763498438e-05, + "loss": 0.1241, + "step": 12715 + }, + { + "epoch": 76.62650602409639, + "grad_norm": 1.4597588777542114, + "learning_rate": 4.8460508701472554e-05, + "loss": 0.1429, + "step": 12720 + }, + { + "epoch": 76.6566265060241, + "grad_norm": 2.1104636192321777, + "learning_rate": 4.8457719767960735e-05, + "loss": 0.1218, + "step": 12725 + }, + { + "epoch": 76.6867469879518, + "grad_norm": 0.8995615839958191, + "learning_rate": 4.845493083444891e-05, + "loss": 0.1343, + "step": 12730 + }, + { + "epoch": 76.71686746987952, + "grad_norm": 1.0554083585739136, + "learning_rate": 4.845214190093708e-05, + "loss": 0.1517, + "step": 12735 + }, + { + "epoch": 76.74698795180723, + "grad_norm": 2.7860047817230225, + "learning_rate": 4.844935296742526e-05, + "loss": 0.1242, + "step": 12740 + }, + { + "epoch": 76.77710843373494, + "grad_norm": 2.118823528289795, + "learning_rate": 4.844656403391343e-05, + "loss": 0.1753, + "step": 12745 + }, + { + "epoch": 76.80722891566265, + "grad_norm": 1.9358220100402832, + "learning_rate": 4.8443775100401604e-05, + "loss": 0.1583, + "step": 12750 + }, + { + "epoch": 76.83734939759036, + "grad_norm": 1.3970794677734375, + "learning_rate": 4.8440986166889784e-05, + "loss": 0.119, + "step": 12755 + }, + { + "epoch": 76.86746987951807, + "grad_norm": 1.7617546319961548, + "learning_rate": 4.843819723337796e-05, + "loss": 0.1805, + "step": 12760 + }, + { + "epoch": 76.89759036144578, + "grad_norm": 1.2866977453231812, + "learning_rate": 4.843540829986614e-05, + "loss": 0.1619, + "step": 12765 + }, + { + "epoch": 76.92771084337349, + "grad_norm": 1.5546061992645264, + "learning_rate": 4.843261936635431e-05, + "loss": 0.147, + "step": 12770 + }, + { + "epoch": 76.9578313253012, + "grad_norm": 1.4257575273513794, + "learning_rate": 4.842983043284248e-05, + "loss": 0.1575, + "step": 12775 + }, + { + "epoch": 76.98795180722891, + "grad_norm": 1.601998209953308, + "learning_rate": 4.842704149933066e-05, + "loss": 0.159, + "step": 12780 + }, + { + "epoch": 77.0, + "eval_accuracy": 0.9335430593786866, + "eval_auc": 0.9828859708079355, + "eval_f1": 0.8963825873697119, + "eval_loss": 0.23264428973197937, + "eval_precision": 0.9694960212201591, + "eval_recall": 0.8335233751425314, + "eval_runtime": 19.5293, + "eval_samples_per_second": 130.214, + "eval_steps_per_second": 0.666, + "step": 12782 + }, + { + "epoch": 77.01807228915662, + "grad_norm": 0.8001152276992798, + "learning_rate": 4.8424252565818834e-05, + "loss": 0.1266, + "step": 12785 + }, + { + "epoch": 77.04819277108433, + "grad_norm": 1.8577905893325806, + "learning_rate": 4.842146363230701e-05, + "loss": 0.1358, + "step": 12790 + }, + { + "epoch": 77.07831325301204, + "grad_norm": 1.4647650718688965, + "learning_rate": 4.841867469879519e-05, + "loss": 0.1319, + "step": 12795 + }, + { + "epoch": 77.10843373493977, + "grad_norm": 1.3896921873092651, + "learning_rate": 4.8415885765283355e-05, + "loss": 0.1553, + "step": 12800 + }, + { + "epoch": 77.13855421686748, + "grad_norm": 1.3923394680023193, + "learning_rate": 4.841309683177153e-05, + "loss": 0.1246, + "step": 12805 + }, + { + "epoch": 77.16867469879519, + "grad_norm": 1.335044503211975, + "learning_rate": 4.841030789825971e-05, + "loss": 0.1555, + "step": 12810 + }, + { + "epoch": 77.1987951807229, + "grad_norm": 1.0598628520965576, + "learning_rate": 4.8407518964747884e-05, + "loss": 0.1296, + "step": 12815 + }, + { + "epoch": 77.2289156626506, + "grad_norm": 1.370320200920105, + "learning_rate": 4.840473003123606e-05, + "loss": 0.1289, + "step": 12820 + }, + { + "epoch": 77.25903614457832, + "grad_norm": 2.2430672645568848, + "learning_rate": 4.840194109772424e-05, + "loss": 0.2017, + "step": 12825 + }, + { + "epoch": 77.28915662650603, + "grad_norm": 0.730043351650238, + "learning_rate": 4.8399152164212405e-05, + "loss": 0.1927, + "step": 12830 + }, + { + "epoch": 77.31927710843374, + "grad_norm": 1.2062714099884033, + "learning_rate": 4.839636323070058e-05, + "loss": 0.1194, + "step": 12835 + }, + { + "epoch": 77.34939759036145, + "grad_norm": 1.6250019073486328, + "learning_rate": 4.839357429718876e-05, + "loss": 0.1492, + "step": 12840 + }, + { + "epoch": 77.37951807228916, + "grad_norm": 1.2313861846923828, + "learning_rate": 4.839078536367693e-05, + "loss": 0.1086, + "step": 12845 + }, + { + "epoch": 77.40963855421687, + "grad_norm": 2.189713478088379, + "learning_rate": 4.838799643016511e-05, + "loss": 0.1701, + "step": 12850 + }, + { + "epoch": 77.43975903614458, + "grad_norm": 1.831549882888794, + "learning_rate": 4.838520749665329e-05, + "loss": 0.1678, + "step": 12855 + }, + { + "epoch": 77.46987951807229, + "grad_norm": 1.2970221042633057, + "learning_rate": 4.8382418563141454e-05, + "loss": 0.1435, + "step": 12860 + }, + { + "epoch": 77.5, + "grad_norm": 1.7378772497177124, + "learning_rate": 4.837962962962963e-05, + "loss": 0.1353, + "step": 12865 + }, + { + "epoch": 77.53012048192771, + "grad_norm": 1.8176010847091675, + "learning_rate": 4.837684069611781e-05, + "loss": 0.1281, + "step": 12870 + }, + { + "epoch": 77.56024096385542, + "grad_norm": 1.1678860187530518, + "learning_rate": 4.837405176260598e-05, + "loss": 0.1532, + "step": 12875 + }, + { + "epoch": 77.59036144578313, + "grad_norm": 1.607122778892517, + "learning_rate": 4.8371262829094156e-05, + "loss": 0.1976, + "step": 12880 + }, + { + "epoch": 77.62048192771084, + "grad_norm": 1.2736643552780151, + "learning_rate": 4.836847389558233e-05, + "loss": 0.1448, + "step": 12885 + }, + { + "epoch": 77.65060240963855, + "grad_norm": 1.4824341535568237, + "learning_rate": 4.8365684962070504e-05, + "loss": 0.1351, + "step": 12890 + }, + { + "epoch": 77.68072289156626, + "grad_norm": 1.9544227123260498, + "learning_rate": 4.836289602855868e-05, + "loss": 0.1599, + "step": 12895 + }, + { + "epoch": 77.71084337349397, + "grad_norm": 1.311608910560608, + "learning_rate": 4.836010709504686e-05, + "loss": 0.1564, + "step": 12900 + }, + { + "epoch": 77.74096385542168, + "grad_norm": 1.8438730239868164, + "learning_rate": 4.835731816153503e-05, + "loss": 0.1607, + "step": 12905 + }, + { + "epoch": 77.7710843373494, + "grad_norm": 1.3831279277801514, + "learning_rate": 4.8354529228023206e-05, + "loss": 0.1686, + "step": 12910 + }, + { + "epoch": 77.8012048192771, + "grad_norm": 2.1410272121429443, + "learning_rate": 4.835174029451138e-05, + "loss": 0.1589, + "step": 12915 + }, + { + "epoch": 77.83132530120481, + "grad_norm": 1.1024550199508667, + "learning_rate": 4.8348951360999554e-05, + "loss": 0.1355, + "step": 12920 + }, + { + "epoch": 77.86144578313252, + "grad_norm": 1.274852991104126, + "learning_rate": 4.834616242748773e-05, + "loss": 0.1728, + "step": 12925 + }, + { + "epoch": 77.89156626506023, + "grad_norm": 1.4424153566360474, + "learning_rate": 4.834337349397591e-05, + "loss": 0.143, + "step": 12930 + }, + { + "epoch": 77.92168674698796, + "grad_norm": 0.7716652750968933, + "learning_rate": 4.834058456046408e-05, + "loss": 0.1549, + "step": 12935 + }, + { + "epoch": 77.95180722891567, + "grad_norm": 0.8140466809272766, + "learning_rate": 4.8337795626952256e-05, + "loss": 0.1277, + "step": 12940 + }, + { + "epoch": 77.98192771084338, + "grad_norm": 2.625098705291748, + "learning_rate": 4.833500669344043e-05, + "loss": 0.1516, + "step": 12945 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.9303971686983877, + "eval_auc": 0.9816009642169297, + "eval_f1": 0.8910769230769231, + "eval_loss": 0.2438606321811676, + "eval_precision": 0.9679144385026738, + "eval_recall": 0.8255416191562144, + "eval_runtime": 17.1969, + "eval_samples_per_second": 147.875, + "eval_steps_per_second": 0.756, + "step": 12948 + }, + { + "epoch": 78.01204819277109, + "grad_norm": 1.4964710474014282, + "learning_rate": 4.83322177599286e-05, + "loss": 0.1541, + "step": 12950 + }, + { + "epoch": 78.0421686746988, + "grad_norm": 1.4898884296417236, + "learning_rate": 4.832942882641678e-05, + "loss": 0.1372, + "step": 12955 + }, + { + "epoch": 78.07228915662651, + "grad_norm": 1.0486506223678589, + "learning_rate": 4.832663989290496e-05, + "loss": 0.1392, + "step": 12960 + }, + { + "epoch": 78.10240963855422, + "grad_norm": 1.1756079196929932, + "learning_rate": 4.832385095939313e-05, + "loss": 0.1627, + "step": 12965 + }, + { + "epoch": 78.13253012048193, + "grad_norm": 1.2145124673843384, + "learning_rate": 4.8321062025881305e-05, + "loss": 0.1319, + "step": 12970 + }, + { + "epoch": 78.16265060240964, + "grad_norm": 1.7100399732589722, + "learning_rate": 4.831827309236948e-05, + "loss": 0.1514, + "step": 12975 + }, + { + "epoch": 78.19277108433735, + "grad_norm": 2.0911970138549805, + "learning_rate": 4.831548415885765e-05, + "loss": 0.1498, + "step": 12980 + }, + { + "epoch": 78.22289156626506, + "grad_norm": 0.9005778431892395, + "learning_rate": 4.8312695225345826e-05, + "loss": 0.141, + "step": 12985 + }, + { + "epoch": 78.25301204819277, + "grad_norm": 0.8048529028892517, + "learning_rate": 4.830990629183401e-05, + "loss": 0.1234, + "step": 12990 + }, + { + "epoch": 78.28313253012048, + "grad_norm": 2.1382839679718018, + "learning_rate": 4.830711735832218e-05, + "loss": 0.1294, + "step": 12995 + }, + { + "epoch": 78.3132530120482, + "grad_norm": 2.0718533992767334, + "learning_rate": 4.8304328424810355e-05, + "loss": 0.1749, + "step": 13000 + }, + { + "epoch": 78.3433734939759, + "grad_norm": 1.7664657831192017, + "learning_rate": 4.830153949129853e-05, + "loss": 0.1551, + "step": 13005 + }, + { + "epoch": 78.37349397590361, + "grad_norm": 2.1365315914154053, + "learning_rate": 4.82987505577867e-05, + "loss": 0.1415, + "step": 13010 + }, + { + "epoch": 78.40361445783132, + "grad_norm": 2.132537603378296, + "learning_rate": 4.8295961624274876e-05, + "loss": 0.1177, + "step": 13015 + }, + { + "epoch": 78.43373493975903, + "grad_norm": 1.7908108234405518, + "learning_rate": 4.8293172690763057e-05, + "loss": 0.1503, + "step": 13020 + }, + { + "epoch": 78.46385542168674, + "grad_norm": 2.238882064819336, + "learning_rate": 4.829038375725123e-05, + "loss": 0.1397, + "step": 13025 + }, + { + "epoch": 78.49397590361446, + "grad_norm": 1.7467546463012695, + "learning_rate": 4.8287594823739404e-05, + "loss": 0.1553, + "step": 13030 + }, + { + "epoch": 78.52409638554217, + "grad_norm": 1.1847363710403442, + "learning_rate": 4.828480589022758e-05, + "loss": 0.1648, + "step": 13035 + }, + { + "epoch": 78.55421686746988, + "grad_norm": 1.646297574043274, + "learning_rate": 4.828201695671575e-05, + "loss": 0.1653, + "step": 13040 + }, + { + "epoch": 78.58433734939759, + "grad_norm": 1.0398398637771606, + "learning_rate": 4.827922802320393e-05, + "loss": 0.1528, + "step": 13045 + }, + { + "epoch": 78.6144578313253, + "grad_norm": 1.7448629140853882, + "learning_rate": 4.8276439089692106e-05, + "loss": 0.1327, + "step": 13050 + }, + { + "epoch": 78.644578313253, + "grad_norm": 1.9700462818145752, + "learning_rate": 4.827365015618028e-05, + "loss": 0.1495, + "step": 13055 + }, + { + "epoch": 78.67469879518072, + "grad_norm": 1.2562404870986938, + "learning_rate": 4.8270861222668454e-05, + "loss": 0.151, + "step": 13060 + }, + { + "epoch": 78.70481927710843, + "grad_norm": 1.384185552597046, + "learning_rate": 4.826807228915663e-05, + "loss": 0.1414, + "step": 13065 + }, + { + "epoch": 78.73493975903614, + "grad_norm": 1.5802093744277954, + "learning_rate": 4.82652833556448e-05, + "loss": 0.1524, + "step": 13070 + }, + { + "epoch": 78.76506024096386, + "grad_norm": 1.0003732442855835, + "learning_rate": 4.826249442213298e-05, + "loss": 0.1264, + "step": 13075 + }, + { + "epoch": 78.79518072289157, + "grad_norm": 1.2597041130065918, + "learning_rate": 4.8259705488621156e-05, + "loss": 0.1209, + "step": 13080 + }, + { + "epoch": 78.82530120481928, + "grad_norm": 1.2785955667495728, + "learning_rate": 4.825691655510933e-05, + "loss": 0.121, + "step": 13085 + }, + { + "epoch": 78.855421686747, + "grad_norm": 1.2658116817474365, + "learning_rate": 4.82541276215975e-05, + "loss": 0.1855, + "step": 13090 + }, + { + "epoch": 78.8855421686747, + "grad_norm": 2.0068914890289307, + "learning_rate": 4.825133868808568e-05, + "loss": 0.173, + "step": 13095 + }, + { + "epoch": 78.91566265060241, + "grad_norm": 2.194626808166504, + "learning_rate": 4.824854975457385e-05, + "loss": 0.1793, + "step": 13100 + }, + { + "epoch": 78.94578313253012, + "grad_norm": 0.5974365472793579, + "learning_rate": 4.824576082106203e-05, + "loss": 0.1157, + "step": 13105 + }, + { + "epoch": 78.97590361445783, + "grad_norm": 1.793129563331604, + "learning_rate": 4.8242971887550205e-05, + "loss": 0.1381, + "step": 13110 + }, + { + "epoch": 79.0, + "eval_accuracy": 0.9445536767597326, + "eval_auc": 0.9816995213136567, + "eval_f1": 0.9154169166166767, + "eval_loss": 0.21309958398342133, + "eval_precision": 0.9658227848101266, + "eval_recall": 0.8700114025085519, + "eval_runtime": 16.8826, + "eval_samples_per_second": 150.629, + "eval_steps_per_second": 0.77, + "step": 13114 + }, + { + "epoch": 79.00602409638554, + "grad_norm": 1.3499771356582642, + "learning_rate": 4.824018295403838e-05, + "loss": 0.137, + "step": 13115 + }, + { + "epoch": 79.03614457831326, + "grad_norm": 1.3147327899932861, + "learning_rate": 4.823739402052655e-05, + "loss": 0.1477, + "step": 13120 + }, + { + "epoch": 79.06626506024097, + "grad_norm": 1.6996004581451416, + "learning_rate": 4.823460508701473e-05, + "loss": 0.1337, + "step": 13125 + }, + { + "epoch": 79.09638554216868, + "grad_norm": 1.8124324083328247, + "learning_rate": 4.82318161535029e-05, + "loss": 0.154, + "step": 13130 + }, + { + "epoch": 79.12650602409639, + "grad_norm": 0.9253437519073486, + "learning_rate": 4.822902721999108e-05, + "loss": 0.1171, + "step": 13135 + }, + { + "epoch": 79.1566265060241, + "grad_norm": 1.4336225986480713, + "learning_rate": 4.8226238286479255e-05, + "loss": 0.1267, + "step": 13140 + }, + { + "epoch": 79.1867469879518, + "grad_norm": 0.7874173521995544, + "learning_rate": 4.822344935296743e-05, + "loss": 0.1139, + "step": 13145 + }, + { + "epoch": 79.21686746987952, + "grad_norm": 2.4000792503356934, + "learning_rate": 4.82206604194556e-05, + "loss": 0.1352, + "step": 13150 + }, + { + "epoch": 79.24698795180723, + "grad_norm": 1.6120721101760864, + "learning_rate": 4.8217871485943776e-05, + "loss": 0.1332, + "step": 13155 + }, + { + "epoch": 79.27710843373494, + "grad_norm": 2.109287738800049, + "learning_rate": 4.821508255243195e-05, + "loss": 0.1505, + "step": 13160 + }, + { + "epoch": 79.30722891566265, + "grad_norm": 1.2057571411132812, + "learning_rate": 4.821229361892013e-05, + "loss": 0.13, + "step": 13165 + }, + { + "epoch": 79.33734939759036, + "grad_norm": 1.8289693593978882, + "learning_rate": 4.8209504685408304e-05, + "loss": 0.1708, + "step": 13170 + }, + { + "epoch": 79.36746987951807, + "grad_norm": 1.6963151693344116, + "learning_rate": 4.820671575189648e-05, + "loss": 0.1157, + "step": 13175 + }, + { + "epoch": 79.39759036144578, + "grad_norm": 1.7568334341049194, + "learning_rate": 4.820392681838465e-05, + "loss": 0.1222, + "step": 13180 + }, + { + "epoch": 79.42771084337349, + "grad_norm": 1.2924585342407227, + "learning_rate": 4.8201137884872826e-05, + "loss": 0.1325, + "step": 13185 + }, + { + "epoch": 79.4578313253012, + "grad_norm": 1.5911897420883179, + "learning_rate": 4.8198348951361e-05, + "loss": 0.1534, + "step": 13190 + }, + { + "epoch": 79.48795180722891, + "grad_norm": 2.340956926345825, + "learning_rate": 4.819556001784918e-05, + "loss": 0.1413, + "step": 13195 + }, + { + "epoch": 79.51807228915662, + "grad_norm": 1.9254859685897827, + "learning_rate": 4.8192771084337354e-05, + "loss": 0.1404, + "step": 13200 + }, + { + "epoch": 79.54819277108433, + "grad_norm": 2.4253478050231934, + "learning_rate": 4.818998215082553e-05, + "loss": 0.1981, + "step": 13205 + }, + { + "epoch": 79.57831325301204, + "grad_norm": 1.8151286840438843, + "learning_rate": 4.81871932173137e-05, + "loss": 0.1392, + "step": 13210 + }, + { + "epoch": 79.60843373493977, + "grad_norm": 2.336282968521118, + "learning_rate": 4.8184404283801875e-05, + "loss": 0.1375, + "step": 13215 + }, + { + "epoch": 79.63855421686748, + "grad_norm": 1.485291838645935, + "learning_rate": 4.818161535029005e-05, + "loss": 0.1329, + "step": 13220 + }, + { + "epoch": 79.66867469879519, + "grad_norm": 1.2211674451828003, + "learning_rate": 4.817882641677823e-05, + "loss": 0.1499, + "step": 13225 + }, + { + "epoch": 79.6987951807229, + "grad_norm": 3.9037880897521973, + "learning_rate": 4.8176037483266403e-05, + "loss": 0.1366, + "step": 13230 + }, + { + "epoch": 79.7289156626506, + "grad_norm": 2.195895195007324, + "learning_rate": 4.817324854975457e-05, + "loss": 0.124, + "step": 13235 + }, + { + "epoch": 79.75903614457832, + "grad_norm": 2.0953190326690674, + "learning_rate": 4.817045961624275e-05, + "loss": 0.1333, + "step": 13240 + }, + { + "epoch": 79.78915662650603, + "grad_norm": 0.6409681439399719, + "learning_rate": 4.8167670682730925e-05, + "loss": 0.1607, + "step": 13245 + }, + { + "epoch": 79.81927710843374, + "grad_norm": 0.958230197429657, + "learning_rate": 4.81648817492191e-05, + "loss": 0.1246, + "step": 13250 + }, + { + "epoch": 79.84939759036145, + "grad_norm": 1.3889961242675781, + "learning_rate": 4.816209281570728e-05, + "loss": 0.1231, + "step": 13255 + }, + { + "epoch": 79.87951807228916, + "grad_norm": 1.667174220085144, + "learning_rate": 4.815930388219545e-05, + "loss": 0.1712, + "step": 13260 + }, + { + "epoch": 79.90963855421687, + "grad_norm": 2.9418723583221436, + "learning_rate": 4.815651494868362e-05, + "loss": 0.1526, + "step": 13265 + }, + { + "epoch": 79.93975903614458, + "grad_norm": 2.489426612854004, + "learning_rate": 4.81537260151718e-05, + "loss": 0.1575, + "step": 13270 + }, + { + "epoch": 79.96987951807229, + "grad_norm": 1.1667286157608032, + "learning_rate": 4.8150937081659974e-05, + "loss": 0.0973, + "step": 13275 + }, + { + "epoch": 80.0, + "grad_norm": 1.7366154193878174, + "learning_rate": 4.814814814814815e-05, + "loss": 0.1904, + "step": 13280 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.9359024773889107, + "eval_auc": 0.9796424841316229, + "eval_f1": 0.9013914095583787, + "eval_loss": 0.2617673873901367, + "eval_precision": 0.9600515463917526, + "eval_recall": 0.8494868871151653, + "eval_runtime": 17.1382, + "eval_samples_per_second": 148.382, + "eval_steps_per_second": 0.759, + "step": 13280 + }, + { + "epoch": 80.03012048192771, + "grad_norm": 2.099924087524414, + "learning_rate": 4.814535921463633e-05, + "loss": 0.1552, + "step": 13285 + }, + { + "epoch": 80.06024096385542, + "grad_norm": 2.3259527683258057, + "learning_rate": 4.81425702811245e-05, + "loss": 0.1649, + "step": 13290 + }, + { + "epoch": 80.09036144578313, + "grad_norm": 2.0278472900390625, + "learning_rate": 4.8139781347612676e-05, + "loss": 0.1454, + "step": 13295 + }, + { + "epoch": 80.12048192771084, + "grad_norm": 1.2483811378479004, + "learning_rate": 4.813699241410085e-05, + "loss": 0.1447, + "step": 13300 + }, + { + "epoch": 80.15060240963855, + "grad_norm": 1.6578233242034912, + "learning_rate": 4.8134203480589024e-05, + "loss": 0.149, + "step": 13305 + }, + { + "epoch": 80.18072289156626, + "grad_norm": 0.9661278128623962, + "learning_rate": 4.8131414547077205e-05, + "loss": 0.1159, + "step": 13310 + }, + { + "epoch": 80.21084337349397, + "grad_norm": 0.6693234443664551, + "learning_rate": 4.812862561356538e-05, + "loss": 0.1184, + "step": 13315 + }, + { + "epoch": 80.24096385542168, + "grad_norm": 2.000763416290283, + "learning_rate": 4.812583668005355e-05, + "loss": 0.1051, + "step": 13320 + }, + { + "epoch": 80.2710843373494, + "grad_norm": 1.2876056432724, + "learning_rate": 4.8123047746541726e-05, + "loss": 0.1108, + "step": 13325 + }, + { + "epoch": 80.3012048192771, + "grad_norm": 2.2616982460021973, + "learning_rate": 4.81202588130299e-05, + "loss": 0.1141, + "step": 13330 + }, + { + "epoch": 80.33132530120481, + "grad_norm": 2.013023614883423, + "learning_rate": 4.8117469879518074e-05, + "loss": 0.1253, + "step": 13335 + }, + { + "epoch": 80.36144578313252, + "grad_norm": 1.6886414289474487, + "learning_rate": 4.8114680946006254e-05, + "loss": 0.1459, + "step": 13340 + }, + { + "epoch": 80.39156626506023, + "grad_norm": 0.890705406665802, + "learning_rate": 4.811189201249443e-05, + "loss": 0.1345, + "step": 13345 + }, + { + "epoch": 80.42168674698796, + "grad_norm": 1.505310297012329, + "learning_rate": 4.8109103078982595e-05, + "loss": 0.1349, + "step": 13350 + }, + { + "epoch": 80.45180722891567, + "grad_norm": 0.7418832778930664, + "learning_rate": 4.8106314145470775e-05, + "loss": 0.1217, + "step": 13355 + }, + { + "epoch": 80.48192771084338, + "grad_norm": 2.120851993560791, + "learning_rate": 4.810352521195895e-05, + "loss": 0.1624, + "step": 13360 + }, + { + "epoch": 80.51204819277109, + "grad_norm": 0.7259140014648438, + "learning_rate": 4.810073627844712e-05, + "loss": 0.1541, + "step": 13365 + }, + { + "epoch": 80.5421686746988, + "grad_norm": 1.0406228303909302, + "learning_rate": 4.8097947344935304e-05, + "loss": 0.1148, + "step": 13370 + }, + { + "epoch": 80.57228915662651, + "grad_norm": 0.7475118637084961, + "learning_rate": 4.809515841142348e-05, + "loss": 0.1043, + "step": 13375 + }, + { + "epoch": 80.60240963855422, + "grad_norm": 1.8983980417251587, + "learning_rate": 4.8092369477911644e-05, + "loss": 0.1558, + "step": 13380 + }, + { + "epoch": 80.63253012048193, + "grad_norm": 2.024404287338257, + "learning_rate": 4.8089580544399825e-05, + "loss": 0.1794, + "step": 13385 + }, + { + "epoch": 80.66265060240964, + "grad_norm": 1.3346220254898071, + "learning_rate": 4.8086791610888e-05, + "loss": 0.1716, + "step": 13390 + }, + { + "epoch": 80.69277108433735, + "grad_norm": 1.786770224571228, + "learning_rate": 4.808400267737617e-05, + "loss": 0.1154, + "step": 13395 + }, + { + "epoch": 80.72289156626506, + "grad_norm": 1.6041686534881592, + "learning_rate": 4.808121374386435e-05, + "loss": 0.1529, + "step": 13400 + }, + { + "epoch": 80.75301204819277, + "grad_norm": 1.4374834299087524, + "learning_rate": 4.807842481035253e-05, + "loss": 0.1418, + "step": 13405 + }, + { + "epoch": 80.78313253012048, + "grad_norm": 2.041236639022827, + "learning_rate": 4.8075635876840694e-05, + "loss": 0.1339, + "step": 13410 + }, + { + "epoch": 80.8132530120482, + "grad_norm": 2.5768935680389404, + "learning_rate": 4.8072846943328875e-05, + "loss": 0.1567, + "step": 13415 + }, + { + "epoch": 80.8433734939759, + "grad_norm": 1.8797093629837036, + "learning_rate": 4.807005800981705e-05, + "loss": 0.1377, + "step": 13420 + }, + { + "epoch": 80.87349397590361, + "grad_norm": 1.3676620721817017, + "learning_rate": 4.806726907630522e-05, + "loss": 0.1501, + "step": 13425 + }, + { + "epoch": 80.90361445783132, + "grad_norm": 1.518554925918579, + "learning_rate": 4.80644801427934e-05, + "loss": 0.1604, + "step": 13430 + }, + { + "epoch": 80.93373493975903, + "grad_norm": 1.760703444480896, + "learning_rate": 4.806169120928157e-05, + "loss": 0.1476, + "step": 13435 + }, + { + "epoch": 80.96385542168674, + "grad_norm": 1.6967287063598633, + "learning_rate": 4.8058902275769744e-05, + "loss": 0.1615, + "step": 13440 + }, + { + "epoch": 80.99397590361446, + "grad_norm": 0.8997840285301208, + "learning_rate": 4.8056113342257924e-05, + "loss": 0.1547, + "step": 13445 + }, + { + "epoch": 81.0, + "eval_accuracy": 0.94494691309477, + "eval_auc": 0.9844738351440918, + "eval_f1": 0.9163679808841099, + "eval_loss": 0.20724187791347504, + "eval_precision": 0.9623588456712673, + "eval_recall": 0.8745724059293044, + "eval_runtime": 16.8526, + "eval_samples_per_second": 150.896, + "eval_steps_per_second": 0.771, + "step": 13446 + }, + { + "epoch": 81.02409638554217, + "grad_norm": 1.3024643659591675, + "learning_rate": 4.80533244087461e-05, + "loss": 0.1334, + "step": 13450 + }, + { + "epoch": 81.05421686746988, + "grad_norm": 1.3637980222702026, + "learning_rate": 4.805053547523427e-05, + "loss": 0.1369, + "step": 13455 + }, + { + "epoch": 81.08433734939759, + "grad_norm": 1.1892391443252563, + "learning_rate": 4.804774654172245e-05, + "loss": 0.1497, + "step": 13460 + }, + { + "epoch": 81.1144578313253, + "grad_norm": 1.665014624595642, + "learning_rate": 4.804495760821062e-05, + "loss": 0.1227, + "step": 13465 + }, + { + "epoch": 81.144578313253, + "grad_norm": 1.7259368896484375, + "learning_rate": 4.804216867469879e-05, + "loss": 0.1468, + "step": 13470 + }, + { + "epoch": 81.17469879518072, + "grad_norm": 1.2737250328063965, + "learning_rate": 4.8039379741186974e-05, + "loss": 0.1334, + "step": 13475 + }, + { + "epoch": 81.20481927710843, + "grad_norm": 1.6451976299285889, + "learning_rate": 4.803659080767515e-05, + "loss": 0.1112, + "step": 13480 + }, + { + "epoch": 81.23493975903614, + "grad_norm": 2.124725341796875, + "learning_rate": 4.803380187416332e-05, + "loss": 0.1246, + "step": 13485 + }, + { + "epoch": 81.26506024096386, + "grad_norm": 0.8188139796257019, + "learning_rate": 4.80310129406515e-05, + "loss": 0.1303, + "step": 13490 + }, + { + "epoch": 81.29518072289157, + "grad_norm": 1.6451257467269897, + "learning_rate": 4.802822400713967e-05, + "loss": 0.1185, + "step": 13495 + }, + { + "epoch": 81.32530120481928, + "grad_norm": 1.5442471504211426, + "learning_rate": 4.802543507362784e-05, + "loss": 0.1691, + "step": 13500 + }, + { + "epoch": 81.355421686747, + "grad_norm": 2.715916633605957, + "learning_rate": 4.802264614011602e-05, + "loss": 0.1385, + "step": 13505 + }, + { + "epoch": 81.3855421686747, + "grad_norm": 2.193662643432617, + "learning_rate": 4.80198572066042e-05, + "loss": 0.1342, + "step": 13510 + }, + { + "epoch": 81.41566265060241, + "grad_norm": 1.1958253383636475, + "learning_rate": 4.801706827309237e-05, + "loss": 0.1308, + "step": 13515 + }, + { + "epoch": 81.44578313253012, + "grad_norm": 1.690035343170166, + "learning_rate": 4.8014279339580545e-05, + "loss": 0.1178, + "step": 13520 + }, + { + "epoch": 81.47590361445783, + "grad_norm": 1.9274437427520752, + "learning_rate": 4.801149040606872e-05, + "loss": 0.1432, + "step": 13525 + }, + { + "epoch": 81.50602409638554, + "grad_norm": 1.0109832286834717, + "learning_rate": 4.800870147255689e-05, + "loss": 0.1213, + "step": 13530 + }, + { + "epoch": 81.53614457831326, + "grad_norm": 2.056666374206543, + "learning_rate": 4.800591253904507e-05, + "loss": 0.132, + "step": 13535 + }, + { + "epoch": 81.56626506024097, + "grad_norm": 1.9702845811843872, + "learning_rate": 4.8003123605533247e-05, + "loss": 0.1443, + "step": 13540 + }, + { + "epoch": 81.59638554216868, + "grad_norm": 1.5064159631729126, + "learning_rate": 4.800033467202142e-05, + "loss": 0.1233, + "step": 13545 + }, + { + "epoch": 81.62650602409639, + "grad_norm": 2.276059627532959, + "learning_rate": 4.7997545738509594e-05, + "loss": 0.1261, + "step": 13550 + }, + { + "epoch": 81.6566265060241, + "grad_norm": 1.5666084289550781, + "learning_rate": 4.799475680499777e-05, + "loss": 0.1236, + "step": 13555 + }, + { + "epoch": 81.6867469879518, + "grad_norm": 2.2686519622802734, + "learning_rate": 4.799196787148594e-05, + "loss": 0.1748, + "step": 13560 + }, + { + "epoch": 81.71686746987952, + "grad_norm": 2.449956178665161, + "learning_rate": 4.798917893797412e-05, + "loss": 0.1864, + "step": 13565 + }, + { + "epoch": 81.74698795180723, + "grad_norm": 1.606286883354187, + "learning_rate": 4.7986390004462296e-05, + "loss": 0.1134, + "step": 13570 + }, + { + "epoch": 81.77710843373494, + "grad_norm": 1.2915971279144287, + "learning_rate": 4.798360107095048e-05, + "loss": 0.1477, + "step": 13575 + }, + { + "epoch": 81.80722891566265, + "grad_norm": 2.0134148597717285, + "learning_rate": 4.7980812137438644e-05, + "loss": 0.1765, + "step": 13580 + }, + { + "epoch": 81.83734939759036, + "grad_norm": 1.4820908308029175, + "learning_rate": 4.797802320392682e-05, + "loss": 0.1427, + "step": 13585 + }, + { + "epoch": 81.86746987951807, + "grad_norm": 3.5725741386413574, + "learning_rate": 4.7975234270415e-05, + "loss": 0.1195, + "step": 13590 + }, + { + "epoch": 81.89759036144578, + "grad_norm": 1.6857229471206665, + "learning_rate": 4.797244533690317e-05, + "loss": 0.1409, + "step": 13595 + }, + { + "epoch": 81.92771084337349, + "grad_norm": 0.9184637069702148, + "learning_rate": 4.7969656403391346e-05, + "loss": 0.1504, + "step": 13600 + }, + { + "epoch": 81.9578313253012, + "grad_norm": 1.696109414100647, + "learning_rate": 4.796686746987952e-05, + "loss": 0.1665, + "step": 13605 + }, + { + "epoch": 81.98795180722891, + "grad_norm": 2.1991405487060547, + "learning_rate": 4.796407853636769e-05, + "loss": 0.1613, + "step": 13610 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.9473063311049941, + "eval_auc": 0.9821652720381198, + "eval_f1": 0.9201430274135876, + "eval_loss": 0.20730562508106232, + "eval_precision": 0.9637952559300874, + "eval_recall": 0.8802736602052451, + "eval_runtime": 16.6574, + "eval_samples_per_second": 152.665, + "eval_steps_per_second": 0.78, + "step": 13612 + }, + { + "epoch": 82.01807228915662, + "grad_norm": 1.758849024772644, + "learning_rate": 4.796128960285587e-05, + "loss": 0.1349, + "step": 13615 + }, + { + "epoch": 82.04819277108433, + "grad_norm": 0.6276155710220337, + "learning_rate": 4.795850066934405e-05, + "loss": 0.0966, + "step": 13620 + }, + { + "epoch": 82.07831325301204, + "grad_norm": 1.5245643854141235, + "learning_rate": 4.795571173583222e-05, + "loss": 0.1456, + "step": 13625 + }, + { + "epoch": 82.10843373493977, + "grad_norm": 2.19927716255188, + "learning_rate": 4.7952922802320395e-05, + "loss": 0.1452, + "step": 13630 + }, + { + "epoch": 82.13855421686748, + "grad_norm": 1.0611395835876465, + "learning_rate": 4.795013386880857e-05, + "loss": 0.1266, + "step": 13635 + }, + { + "epoch": 82.16867469879519, + "grad_norm": 1.6245512962341309, + "learning_rate": 4.794734493529674e-05, + "loss": 0.1096, + "step": 13640 + }, + { + "epoch": 82.1987951807229, + "grad_norm": 1.3010703325271606, + "learning_rate": 4.7944556001784917e-05, + "loss": 0.1368, + "step": 13645 + }, + { + "epoch": 82.2289156626506, + "grad_norm": 1.6115851402282715, + "learning_rate": 4.79417670682731e-05, + "loss": 0.1584, + "step": 13650 + }, + { + "epoch": 82.25903614457832, + "grad_norm": 0.7851889133453369, + "learning_rate": 4.793897813476127e-05, + "loss": 0.108, + "step": 13655 + }, + { + "epoch": 82.28915662650603, + "grad_norm": 2.6004412174224854, + "learning_rate": 4.7936189201249445e-05, + "loss": 0.2029, + "step": 13660 + }, + { + "epoch": 82.31927710843374, + "grad_norm": 0.9214454293251038, + "learning_rate": 4.793340026773762e-05, + "loss": 0.129, + "step": 13665 + }, + { + "epoch": 82.34939759036145, + "grad_norm": 1.3081505298614502, + "learning_rate": 4.793061133422579e-05, + "loss": 0.1535, + "step": 13670 + }, + { + "epoch": 82.37951807228916, + "grad_norm": 2.4201855659484863, + "learning_rate": 4.7927822400713966e-05, + "loss": 0.1492, + "step": 13675 + }, + { + "epoch": 82.40963855421687, + "grad_norm": 1.2794040441513062, + "learning_rate": 4.792503346720215e-05, + "loss": 0.1998, + "step": 13680 + }, + { + "epoch": 82.43975903614458, + "grad_norm": 1.3795418739318848, + "learning_rate": 4.792224453369032e-05, + "loss": 0.1272, + "step": 13685 + }, + { + "epoch": 82.46987951807229, + "grad_norm": 0.9484094381332397, + "learning_rate": 4.7919455600178494e-05, + "loss": 0.1306, + "step": 13690 + }, + { + "epoch": 82.5, + "grad_norm": 1.1522639989852905, + "learning_rate": 4.791666666666667e-05, + "loss": 0.1385, + "step": 13695 + }, + { + "epoch": 82.53012048192771, + "grad_norm": 1.298977255821228, + "learning_rate": 4.791387773315484e-05, + "loss": 0.143, + "step": 13700 + }, + { + "epoch": 82.56024096385542, + "grad_norm": 1.7475353479385376, + "learning_rate": 4.7911088799643016e-05, + "loss": 0.1288, + "step": 13705 + }, + { + "epoch": 82.59036144578313, + "grad_norm": 2.226940393447876, + "learning_rate": 4.7908299866131196e-05, + "loss": 0.1297, + "step": 13710 + }, + { + "epoch": 82.62048192771084, + "grad_norm": 2.041835308074951, + "learning_rate": 4.790551093261937e-05, + "loss": 0.1503, + "step": 13715 + }, + { + "epoch": 82.65060240963855, + "grad_norm": 1.4314419031143188, + "learning_rate": 4.7902721999107544e-05, + "loss": 0.114, + "step": 13720 + }, + { + "epoch": 82.68072289156626, + "grad_norm": 2.819429397583008, + "learning_rate": 4.789993306559572e-05, + "loss": 0.1246, + "step": 13725 + }, + { + "epoch": 82.71084337349397, + "grad_norm": 0.9852308630943298, + "learning_rate": 4.789714413208389e-05, + "loss": 0.1114, + "step": 13730 + }, + { + "epoch": 82.74096385542168, + "grad_norm": 1.1365885734558105, + "learning_rate": 4.7894355198572065e-05, + "loss": 0.1119, + "step": 13735 + }, + { + "epoch": 82.7710843373494, + "grad_norm": 1.4428057670593262, + "learning_rate": 4.7891566265060246e-05, + "loss": 0.1132, + "step": 13740 + }, + { + "epoch": 82.8012048192771, + "grad_norm": 2.48418927192688, + "learning_rate": 4.788877733154842e-05, + "loss": 0.0916, + "step": 13745 + }, + { + "epoch": 82.83132530120481, + "grad_norm": 2.8917527198791504, + "learning_rate": 4.7885988398036593e-05, + "loss": 0.1774, + "step": 13750 + }, + { + "epoch": 82.86144578313252, + "grad_norm": 2.5240049362182617, + "learning_rate": 4.788319946452477e-05, + "loss": 0.1591, + "step": 13755 + }, + { + "epoch": 82.89156626506023, + "grad_norm": 2.186082124710083, + "learning_rate": 4.788041053101294e-05, + "loss": 0.1646, + "step": 13760 + }, + { + "epoch": 82.92168674698796, + "grad_norm": 1.6441371440887451, + "learning_rate": 4.7877621597501115e-05, + "loss": 0.1163, + "step": 13765 + }, + { + "epoch": 82.95180722891567, + "grad_norm": 1.3157641887664795, + "learning_rate": 4.7874832663989295e-05, + "loss": 0.1297, + "step": 13770 + }, + { + "epoch": 82.98192771084338, + "grad_norm": 0.8910824656486511, + "learning_rate": 4.787204373047747e-05, + "loss": 0.1247, + "step": 13775 + }, + { + "epoch": 83.0, + "eval_accuracy": 0.9402280770743217, + "eval_auc": 0.9830995111841772, + "eval_f1": 0.9090909090909091, + "eval_loss": 0.2184453010559082, + "eval_precision": 0.9559748427672956, + "eval_recall": 0.8665906499429875, + "eval_runtime": 16.7944, + "eval_samples_per_second": 151.42, + "eval_steps_per_second": 0.774, + "step": 13778 + }, + { + "epoch": 83.01204819277109, + "grad_norm": 1.388502836227417, + "learning_rate": 4.786925479696564e-05, + "loss": 0.1414, + "step": 13780 + }, + { + "epoch": 83.0421686746988, + "grad_norm": 2.4867851734161377, + "learning_rate": 4.786646586345382e-05, + "loss": 0.1678, + "step": 13785 + }, + { + "epoch": 83.07228915662651, + "grad_norm": 1.294487476348877, + "learning_rate": 4.786367692994199e-05, + "loss": 0.1222, + "step": 13790 + }, + { + "epoch": 83.10240963855422, + "grad_norm": 1.1315916776657104, + "learning_rate": 4.7860887996430164e-05, + "loss": 0.1111, + "step": 13795 + }, + { + "epoch": 83.13253012048193, + "grad_norm": 1.9068232774734497, + "learning_rate": 4.7858099062918345e-05, + "loss": 0.1363, + "step": 13800 + }, + { + "epoch": 83.16265060240964, + "grad_norm": 1.085170030593872, + "learning_rate": 4.785531012940652e-05, + "loss": 0.1469, + "step": 13805 + }, + { + "epoch": 83.19277108433735, + "grad_norm": 1.0437813997268677, + "learning_rate": 4.785252119589469e-05, + "loss": 0.1131, + "step": 13810 + }, + { + "epoch": 83.22289156626506, + "grad_norm": 1.1478298902511597, + "learning_rate": 4.7849732262382866e-05, + "loss": 0.1231, + "step": 13815 + }, + { + "epoch": 83.25301204819277, + "grad_norm": 1.557233452796936, + "learning_rate": 4.784694332887104e-05, + "loss": 0.161, + "step": 13820 + }, + { + "epoch": 83.28313253012048, + "grad_norm": 1.7268131971359253, + "learning_rate": 4.7844154395359214e-05, + "loss": 0.1343, + "step": 13825 + }, + { + "epoch": 83.3132530120482, + "grad_norm": 1.5991772413253784, + "learning_rate": 4.7841365461847394e-05, + "loss": 0.1193, + "step": 13830 + }, + { + "epoch": 83.3433734939759, + "grad_norm": 1.3358711004257202, + "learning_rate": 4.783857652833557e-05, + "loss": 0.0934, + "step": 13835 + }, + { + "epoch": 83.37349397590361, + "grad_norm": 1.6789311170578003, + "learning_rate": 4.783578759482374e-05, + "loss": 0.1213, + "step": 13840 + }, + { + "epoch": 83.40361445783132, + "grad_norm": 1.81136953830719, + "learning_rate": 4.7832998661311916e-05, + "loss": 0.1346, + "step": 13845 + }, + { + "epoch": 83.43373493975903, + "grad_norm": 1.3308287858963013, + "learning_rate": 4.783020972780009e-05, + "loss": 0.096, + "step": 13850 + }, + { + "epoch": 83.46385542168674, + "grad_norm": 2.7993826866149902, + "learning_rate": 4.782742079428827e-05, + "loss": 0.1815, + "step": 13855 + }, + { + "epoch": 83.49397590361446, + "grad_norm": 2.379805564880371, + "learning_rate": 4.7824631860776444e-05, + "loss": 0.1174, + "step": 13860 + }, + { + "epoch": 83.52409638554217, + "grad_norm": 1.6603949069976807, + "learning_rate": 4.782184292726462e-05, + "loss": 0.1559, + "step": 13865 + }, + { + "epoch": 83.55421686746988, + "grad_norm": 2.775527000427246, + "learning_rate": 4.781905399375279e-05, + "loss": 0.1516, + "step": 13870 + }, + { + "epoch": 83.58433734939759, + "grad_norm": 2.9960052967071533, + "learning_rate": 4.7816265060240965e-05, + "loss": 0.1372, + "step": 13875 + }, + { + "epoch": 83.6144578313253, + "grad_norm": 2.177501678466797, + "learning_rate": 4.781347612672914e-05, + "loss": 0.1798, + "step": 13880 + }, + { + "epoch": 83.644578313253, + "grad_norm": 1.3907676935195923, + "learning_rate": 4.781068719321732e-05, + "loss": 0.1448, + "step": 13885 + }, + { + "epoch": 83.67469879518072, + "grad_norm": 1.755016803741455, + "learning_rate": 4.7807898259705494e-05, + "loss": 0.132, + "step": 13890 + }, + { + "epoch": 83.70481927710843, + "grad_norm": 2.779858112335205, + "learning_rate": 4.780510932619367e-05, + "loss": 0.1507, + "step": 13895 + }, + { + "epoch": 83.73493975903614, + "grad_norm": 1.2361454963684082, + "learning_rate": 4.780232039268184e-05, + "loss": 0.1408, + "step": 13900 + }, + { + "epoch": 83.76506024096386, + "grad_norm": 1.7588298320770264, + "learning_rate": 4.7799531459170015e-05, + "loss": 0.142, + "step": 13905 + }, + { + "epoch": 83.79518072289157, + "grad_norm": 1.982361078262329, + "learning_rate": 4.779674252565819e-05, + "loss": 0.1245, + "step": 13910 + }, + { + "epoch": 83.82530120481928, + "grad_norm": 1.4219602346420288, + "learning_rate": 4.779395359214637e-05, + "loss": 0.1286, + "step": 13915 + }, + { + "epoch": 83.855421686747, + "grad_norm": 0.875392735004425, + "learning_rate": 4.779116465863454e-05, + "loss": 0.1867, + "step": 13920 + }, + { + "epoch": 83.8855421686747, + "grad_norm": 1.4474810361862183, + "learning_rate": 4.778837572512272e-05, + "loss": 0.1386, + "step": 13925 + }, + { + "epoch": 83.91566265060241, + "grad_norm": 1.7907133102416992, + "learning_rate": 4.778558679161089e-05, + "loss": 0.1493, + "step": 13930 + }, + { + "epoch": 83.94578313253012, + "grad_norm": 1.8963080644607544, + "learning_rate": 4.7782797858099065e-05, + "loss": 0.1287, + "step": 13935 + }, + { + "epoch": 83.97590361445783, + "grad_norm": 1.8549200296401978, + "learning_rate": 4.778000892458724e-05, + "loss": 0.1263, + "step": 13940 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.9355092410538733, + "eval_auc": 0.9806407169481248, + "eval_f1": 0.8996328029375765, + "eval_loss": 0.2553333342075348, + "eval_precision": 0.9709379128137384, + "eval_recall": 0.8380843785632839, + "eval_runtime": 16.8708, + "eval_samples_per_second": 150.734, + "eval_steps_per_second": 0.771, + "step": 13944 + }, + { + "epoch": 84.00602409638554, + "grad_norm": 1.2648922204971313, + "learning_rate": 4.777721999107542e-05, + "loss": 0.1059, + "step": 13945 + }, + { + "epoch": 84.03614457831326, + "grad_norm": 0.9515756964683533, + "learning_rate": 4.777443105756359e-05, + "loss": 0.1389, + "step": 13950 + }, + { + "epoch": 84.06626506024097, + "grad_norm": 1.7787405252456665, + "learning_rate": 4.777164212405176e-05, + "loss": 0.1563, + "step": 13955 + }, + { + "epoch": 84.09638554216868, + "grad_norm": 0.850971519947052, + "learning_rate": 4.776885319053994e-05, + "loss": 0.1349, + "step": 13960 + }, + { + "epoch": 84.12650602409639, + "grad_norm": 1.711816668510437, + "learning_rate": 4.7766064257028114e-05, + "loss": 0.1404, + "step": 13965 + }, + { + "epoch": 84.1566265060241, + "grad_norm": 0.7210081815719604, + "learning_rate": 4.776327532351629e-05, + "loss": 0.1305, + "step": 13970 + }, + { + "epoch": 84.1867469879518, + "grad_norm": 0.9942130446434021, + "learning_rate": 4.776048639000447e-05, + "loss": 0.1155, + "step": 13975 + }, + { + "epoch": 84.21686746987952, + "grad_norm": 0.5861592292785645, + "learning_rate": 4.775769745649264e-05, + "loss": 0.1212, + "step": 13980 + }, + { + "epoch": 84.24698795180723, + "grad_norm": 0.8859348893165588, + "learning_rate": 4.775490852298081e-05, + "loss": 0.1155, + "step": 13985 + }, + { + "epoch": 84.27710843373494, + "grad_norm": 1.1719838380813599, + "learning_rate": 4.775211958946899e-05, + "loss": 0.101, + "step": 13990 + }, + { + "epoch": 84.30722891566265, + "grad_norm": 1.1109471321105957, + "learning_rate": 4.7749330655957164e-05, + "loss": 0.1757, + "step": 13995 + }, + { + "epoch": 84.33734939759036, + "grad_norm": 1.2019339799880981, + "learning_rate": 4.774654172244534e-05, + "loss": 0.133, + "step": 14000 + }, + { + "epoch": 84.36746987951807, + "grad_norm": 2.0538597106933594, + "learning_rate": 4.774375278893352e-05, + "loss": 0.1767, + "step": 14005 + }, + { + "epoch": 84.39759036144578, + "grad_norm": 1.268314242362976, + "learning_rate": 4.774096385542169e-05, + "loss": 0.1562, + "step": 14010 + }, + { + "epoch": 84.42771084337349, + "grad_norm": 1.7017498016357422, + "learning_rate": 4.773817492190986e-05, + "loss": 0.1514, + "step": 14015 + }, + { + "epoch": 84.4578313253012, + "grad_norm": 1.9080393314361572, + "learning_rate": 4.773538598839804e-05, + "loss": 0.1621, + "step": 14020 + }, + { + "epoch": 84.48795180722891, + "grad_norm": 1.2499710321426392, + "learning_rate": 4.773259705488621e-05, + "loss": 0.1595, + "step": 14025 + }, + { + "epoch": 84.51807228915662, + "grad_norm": 1.181410312652588, + "learning_rate": 4.772980812137439e-05, + "loss": 0.1465, + "step": 14030 + }, + { + "epoch": 84.54819277108433, + "grad_norm": 1.1394851207733154, + "learning_rate": 4.772701918786257e-05, + "loss": 0.1263, + "step": 14035 + }, + { + "epoch": 84.57831325301204, + "grad_norm": 1.396920919418335, + "learning_rate": 4.772423025435074e-05, + "loss": 0.1157, + "step": 14040 + }, + { + "epoch": 84.60843373493977, + "grad_norm": 1.530899167060852, + "learning_rate": 4.772144132083891e-05, + "loss": 0.1305, + "step": 14045 + }, + { + "epoch": 84.63855421686748, + "grad_norm": 2.6912682056427, + "learning_rate": 4.771865238732709e-05, + "loss": 0.139, + "step": 14050 + }, + { + "epoch": 84.66867469879519, + "grad_norm": 2.976313829421997, + "learning_rate": 4.771586345381526e-05, + "loss": 0.1747, + "step": 14055 + }, + { + "epoch": 84.6987951807229, + "grad_norm": 1.7152972221374512, + "learning_rate": 4.7713074520303437e-05, + "loss": 0.1337, + "step": 14060 + }, + { + "epoch": 84.7289156626506, + "grad_norm": 1.9049164056777954, + "learning_rate": 4.771028558679162e-05, + "loss": 0.1506, + "step": 14065 + }, + { + "epoch": 84.75903614457832, + "grad_norm": 1.6719861030578613, + "learning_rate": 4.7707496653279784e-05, + "loss": 0.1509, + "step": 14070 + }, + { + "epoch": 84.78915662650603, + "grad_norm": 1.9368972778320312, + "learning_rate": 4.770470771976796e-05, + "loss": 0.1756, + "step": 14075 + }, + { + "epoch": 84.81927710843374, + "grad_norm": 1.2720271348953247, + "learning_rate": 4.770191878625614e-05, + "loss": 0.1275, + "step": 14080 + }, + { + "epoch": 84.84939759036145, + "grad_norm": 1.9887089729309082, + "learning_rate": 4.769912985274431e-05, + "loss": 0.1453, + "step": 14085 + }, + { + "epoch": 84.87951807228916, + "grad_norm": 1.2748991250991821, + "learning_rate": 4.7696340919232486e-05, + "loss": 0.1356, + "step": 14090 + }, + { + "epoch": 84.90963855421687, + "grad_norm": 0.7837698459625244, + "learning_rate": 4.769355198572067e-05, + "loss": 0.1034, + "step": 14095 + }, + { + "epoch": 84.93975903614458, + "grad_norm": 1.91938054561615, + "learning_rate": 4.7690763052208834e-05, + "loss": 0.1837, + "step": 14100 + }, + { + "epoch": 84.96987951807229, + "grad_norm": 1.103245496749878, + "learning_rate": 4.7687974118697014e-05, + "loss": 0.1537, + "step": 14105 + }, + { + "epoch": 85.0, + "grad_norm": 1.532736897468567, + "learning_rate": 4.768518518518519e-05, + "loss": 0.1253, + "step": 14110 + }, + { + "epoch": 85.0, + "eval_accuracy": 0.9433739677546206, + "eval_auc": 0.9841418893669213, + "eval_f1": 0.9143876337693222, + "eval_loss": 0.18828138709068298, + "eval_precision": 0.9552795031055901, + "eval_recall": 0.8768529076396807, + "eval_runtime": 17.0104, + "eval_samples_per_second": 149.497, + "eval_steps_per_second": 0.764, + "step": 14110 + }, + { + "epoch": 85.03012048192771, + "grad_norm": 1.0667166709899902, + "learning_rate": 4.768239625167336e-05, + "loss": 0.1471, + "step": 14115 + }, + { + "epoch": 85.06024096385542, + "grad_norm": 1.475081205368042, + "learning_rate": 4.767960731816154e-05, + "loss": 0.1133, + "step": 14120 + }, + { + "epoch": 85.09036144578313, + "grad_norm": 0.9159479737281799, + "learning_rate": 4.7676818384649716e-05, + "loss": 0.131, + "step": 14125 + }, + { + "epoch": 85.12048192771084, + "grad_norm": 1.6095043420791626, + "learning_rate": 4.767402945113788e-05, + "loss": 0.1483, + "step": 14130 + }, + { + "epoch": 85.15060240963855, + "grad_norm": 1.4973933696746826, + "learning_rate": 4.7671240517626064e-05, + "loss": 0.1482, + "step": 14135 + }, + { + "epoch": 85.18072289156626, + "grad_norm": 2.2780914306640625, + "learning_rate": 4.766845158411424e-05, + "loss": 0.1662, + "step": 14140 + }, + { + "epoch": 85.21084337349397, + "grad_norm": 1.1623525619506836, + "learning_rate": 4.766566265060241e-05, + "loss": 0.1236, + "step": 14145 + }, + { + "epoch": 85.24096385542168, + "grad_norm": 1.9882997274398804, + "learning_rate": 4.766287371709059e-05, + "loss": 0.1287, + "step": 14150 + }, + { + "epoch": 85.2710843373494, + "grad_norm": 0.5409373044967651, + "learning_rate": 4.766008478357876e-05, + "loss": 0.1124, + "step": 14155 + }, + { + "epoch": 85.3012048192771, + "grad_norm": 1.2196239233016968, + "learning_rate": 4.765729585006693e-05, + "loss": 0.1435, + "step": 14160 + }, + { + "epoch": 85.33132530120481, + "grad_norm": 1.3611983060836792, + "learning_rate": 4.765450691655511e-05, + "loss": 0.1293, + "step": 14165 + }, + { + "epoch": 85.36144578313252, + "grad_norm": 1.6768685579299927, + "learning_rate": 4.765171798304329e-05, + "loss": 0.125, + "step": 14170 + }, + { + "epoch": 85.39156626506023, + "grad_norm": 1.4841111898422241, + "learning_rate": 4.764892904953146e-05, + "loss": 0.1151, + "step": 14175 + }, + { + "epoch": 85.42168674698796, + "grad_norm": 0.5706485509872437, + "learning_rate": 4.764614011601964e-05, + "loss": 0.1005, + "step": 14180 + }, + { + "epoch": 85.45180722891567, + "grad_norm": 0.9013178944587708, + "learning_rate": 4.764335118250781e-05, + "loss": 0.1316, + "step": 14185 + }, + { + "epoch": 85.48192771084338, + "grad_norm": 1.557949423789978, + "learning_rate": 4.764056224899598e-05, + "loss": 0.1359, + "step": 14190 + }, + { + "epoch": 85.51204819277109, + "grad_norm": 2.801118850708008, + "learning_rate": 4.763777331548416e-05, + "loss": 0.1569, + "step": 14195 + }, + { + "epoch": 85.5421686746988, + "grad_norm": 1.2411048412322998, + "learning_rate": 4.763498438197234e-05, + "loss": 0.1311, + "step": 14200 + }, + { + "epoch": 85.57228915662651, + "grad_norm": 2.2194406986236572, + "learning_rate": 4.763219544846051e-05, + "loss": 0.1688, + "step": 14205 + }, + { + "epoch": 85.60240963855422, + "grad_norm": 1.5219783782958984, + "learning_rate": 4.762940651494869e-05, + "loss": 0.1452, + "step": 14210 + }, + { + "epoch": 85.63253012048193, + "grad_norm": 1.2245383262634277, + "learning_rate": 4.762661758143686e-05, + "loss": 0.1317, + "step": 14215 + }, + { + "epoch": 85.66265060240964, + "grad_norm": 1.17124342918396, + "learning_rate": 4.762382864792503e-05, + "loss": 0.108, + "step": 14220 + }, + { + "epoch": 85.69277108433735, + "grad_norm": 1.87996244430542, + "learning_rate": 4.762103971441321e-05, + "loss": 0.1348, + "step": 14225 + }, + { + "epoch": 85.72289156626506, + "grad_norm": 1.8117679357528687, + "learning_rate": 4.7618250780901386e-05, + "loss": 0.1515, + "step": 14230 + }, + { + "epoch": 85.75301204819277, + "grad_norm": 1.6517044305801392, + "learning_rate": 4.761546184738956e-05, + "loss": 0.1497, + "step": 14235 + }, + { + "epoch": 85.78313253012048, + "grad_norm": 2.1645545959472656, + "learning_rate": 4.7612672913877734e-05, + "loss": 0.1189, + "step": 14240 + }, + { + "epoch": 85.8132530120482, + "grad_norm": 2.453587770462036, + "learning_rate": 4.760988398036591e-05, + "loss": 0.1548, + "step": 14245 + }, + { + "epoch": 85.8433734939759, + "grad_norm": 2.449235677719116, + "learning_rate": 4.760709504685408e-05, + "loss": 0.1199, + "step": 14250 + }, + { + "epoch": 85.87349397590361, + "grad_norm": 1.6433216333389282, + "learning_rate": 4.760430611334226e-05, + "loss": 0.1388, + "step": 14255 + }, + { + "epoch": 85.90361445783132, + "grad_norm": 1.9673657417297363, + "learning_rate": 4.7601517179830436e-05, + "loss": 0.1739, + "step": 14260 + }, + { + "epoch": 85.93373493975903, + "grad_norm": 0.7465510964393616, + "learning_rate": 4.759872824631861e-05, + "loss": 0.0861, + "step": 14265 + }, + { + "epoch": 85.96385542168674, + "grad_norm": 1.1348292827606201, + "learning_rate": 4.759593931280678e-05, + "loss": 0.0973, + "step": 14270 + }, + { + "epoch": 85.99397590361446, + "grad_norm": 0.902716875076294, + "learning_rate": 4.759315037929496e-05, + "loss": 0.1709, + "step": 14275 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.9480928037750688, + "eval_auc": 0.9864627721099842, + "eval_f1": 0.9217081850533808, + "eval_loss": 0.1892118602991104, + "eval_precision": 0.9604449938195303, + "eval_recall": 0.8859749144811858, + "eval_runtime": 16.9175, + "eval_samples_per_second": 150.318, + "eval_steps_per_second": 0.768, + "step": 14276 + }, + { + "epoch": 86.02409638554217, + "grad_norm": 1.6581169366836548, + "learning_rate": 4.759036144578313e-05, + "loss": 0.1411, + "step": 14280 + }, + { + "epoch": 86.05421686746988, + "grad_norm": 1.0091661214828491, + "learning_rate": 4.758757251227131e-05, + "loss": 0.1101, + "step": 14285 + }, + { + "epoch": 86.08433734939759, + "grad_norm": 1.1630033254623413, + "learning_rate": 4.7584783578759485e-05, + "loss": 0.1147, + "step": 14290 + }, + { + "epoch": 86.1144578313253, + "grad_norm": 0.7185408473014832, + "learning_rate": 4.758199464524766e-05, + "loss": 0.1016, + "step": 14295 + }, + { + "epoch": 86.144578313253, + "grad_norm": 1.512789249420166, + "learning_rate": 4.757920571173583e-05, + "loss": 0.1485, + "step": 14300 + }, + { + "epoch": 86.17469879518072, + "grad_norm": 0.4657615125179291, + "learning_rate": 4.757641677822401e-05, + "loss": 0.1186, + "step": 14305 + }, + { + "epoch": 86.20481927710843, + "grad_norm": 2.3068294525146484, + "learning_rate": 4.757362784471218e-05, + "loss": 0.1433, + "step": 14310 + }, + { + "epoch": 86.23493975903614, + "grad_norm": 1.127864956855774, + "learning_rate": 4.757083891120036e-05, + "loss": 0.1397, + "step": 14315 + }, + { + "epoch": 86.26506024096386, + "grad_norm": 1.1965922117233276, + "learning_rate": 4.7568049977688535e-05, + "loss": 0.1298, + "step": 14320 + }, + { + "epoch": 86.29518072289157, + "grad_norm": 1.5388059616088867, + "learning_rate": 4.756526104417671e-05, + "loss": 0.1224, + "step": 14325 + }, + { + "epoch": 86.32530120481928, + "grad_norm": 2.3614656925201416, + "learning_rate": 4.756247211066488e-05, + "loss": 0.142, + "step": 14330 + }, + { + "epoch": 86.355421686747, + "grad_norm": 1.8381450176239014, + "learning_rate": 4.7559683177153056e-05, + "loss": 0.1116, + "step": 14335 + }, + { + "epoch": 86.3855421686747, + "grad_norm": 1.1881322860717773, + "learning_rate": 4.755689424364123e-05, + "loss": 0.1662, + "step": 14340 + }, + { + "epoch": 86.41566265060241, + "grad_norm": 1.2886515855789185, + "learning_rate": 4.755410531012941e-05, + "loss": 0.1068, + "step": 14345 + }, + { + "epoch": 86.44578313253012, + "grad_norm": 1.9574270248413086, + "learning_rate": 4.7551316376617584e-05, + "loss": 0.1485, + "step": 14350 + }, + { + "epoch": 86.47590361445783, + "grad_norm": 0.9434966444969177, + "learning_rate": 4.754852744310576e-05, + "loss": 0.1241, + "step": 14355 + }, + { + "epoch": 86.50602409638554, + "grad_norm": 0.7882797718048096, + "learning_rate": 4.754573850959393e-05, + "loss": 0.1348, + "step": 14360 + }, + { + "epoch": 86.53614457831326, + "grad_norm": 1.2485636472702026, + "learning_rate": 4.7542949576082106e-05, + "loss": 0.113, + "step": 14365 + }, + { + "epoch": 86.56626506024097, + "grad_norm": 1.1508848667144775, + "learning_rate": 4.7540160642570286e-05, + "loss": 0.127, + "step": 14370 + }, + { + "epoch": 86.59638554216868, + "grad_norm": 1.799537181854248, + "learning_rate": 4.753737170905846e-05, + "loss": 0.1802, + "step": 14375 + }, + { + "epoch": 86.62650602409639, + "grad_norm": 2.453477621078491, + "learning_rate": 4.7534582775546634e-05, + "loss": 0.1481, + "step": 14380 + }, + { + "epoch": 86.6566265060241, + "grad_norm": 0.9357603192329407, + "learning_rate": 4.753179384203481e-05, + "loss": 0.143, + "step": 14385 + }, + { + "epoch": 86.6867469879518, + "grad_norm": 1.1688281297683716, + "learning_rate": 4.752900490852298e-05, + "loss": 0.1439, + "step": 14390 + }, + { + "epoch": 86.71686746987952, + "grad_norm": 0.8570228815078735, + "learning_rate": 4.7526215975011155e-05, + "loss": 0.1239, + "step": 14395 + }, + { + "epoch": 86.74698795180723, + "grad_norm": 1.5286699533462524, + "learning_rate": 4.7523427041499336e-05, + "loss": 0.1491, + "step": 14400 + }, + { + "epoch": 86.77710843373494, + "grad_norm": 1.9417452812194824, + "learning_rate": 4.752063810798751e-05, + "loss": 0.1122, + "step": 14405 + }, + { + "epoch": 86.80722891566265, + "grad_norm": 1.2182284593582153, + "learning_rate": 4.7517849174475684e-05, + "loss": 0.0986, + "step": 14410 + }, + { + "epoch": 86.83734939759036, + "grad_norm": 0.8801213502883911, + "learning_rate": 4.751506024096386e-05, + "loss": 0.0997, + "step": 14415 + }, + { + "epoch": 86.86746987951807, + "grad_norm": 1.2546206712722778, + "learning_rate": 4.751227130745203e-05, + "loss": 0.1667, + "step": 14420 + }, + { + "epoch": 86.89759036144578, + "grad_norm": 1.6996676921844482, + "learning_rate": 4.7509482373940205e-05, + "loss": 0.1432, + "step": 14425 + }, + { + "epoch": 86.92771084337349, + "grad_norm": 0.9992546439170837, + "learning_rate": 4.7506693440428386e-05, + "loss": 0.1392, + "step": 14430 + }, + { + "epoch": 86.9578313253012, + "grad_norm": 1.7589772939682007, + "learning_rate": 4.750390450691656e-05, + "loss": 0.1297, + "step": 14435 + }, + { + "epoch": 86.98795180722891, + "grad_norm": 1.7378164529800415, + "learning_rate": 4.750111557340473e-05, + "loss": 0.1469, + "step": 14440 + }, + { + "epoch": 87.0, + "eval_accuracy": 0.9508454581203303, + "eval_auc": 0.985316703648392, + "eval_f1": 0.9255509231685527, + "eval_loss": 0.1991521120071411, + "eval_precision": 0.9688279301745636, + "eval_recall": 0.8859749144811858, + "eval_runtime": 16.9096, + "eval_samples_per_second": 150.388, + "eval_steps_per_second": 0.769, + "step": 14442 + }, + { + "epoch": 87.01807228915662, + "grad_norm": 1.0375066995620728, + "learning_rate": 4.749832663989291e-05, + "loss": 0.1279, + "step": 14445 + }, + { + "epoch": 87.04819277108433, + "grad_norm": 1.4889252185821533, + "learning_rate": 4.749553770638108e-05, + "loss": 0.1232, + "step": 14450 + }, + { + "epoch": 87.07831325301204, + "grad_norm": 1.7051292657852173, + "learning_rate": 4.7492748772869254e-05, + "loss": 0.091, + "step": 14455 + }, + { + "epoch": 87.10843373493977, + "grad_norm": 1.5671590566635132, + "learning_rate": 4.7489959839357435e-05, + "loss": 0.0976, + "step": 14460 + }, + { + "epoch": 87.13855421686748, + "grad_norm": 1.0408308506011963, + "learning_rate": 4.748717090584561e-05, + "loss": 0.1275, + "step": 14465 + }, + { + "epoch": 87.16867469879519, + "grad_norm": 1.5001578330993652, + "learning_rate": 4.748438197233378e-05, + "loss": 0.1436, + "step": 14470 + }, + { + "epoch": 87.1987951807229, + "grad_norm": 0.7032421827316284, + "learning_rate": 4.7481593038821956e-05, + "loss": 0.1021, + "step": 14475 + }, + { + "epoch": 87.2289156626506, + "grad_norm": 0.7100562453269958, + "learning_rate": 4.747880410531013e-05, + "loss": 0.1344, + "step": 14480 + }, + { + "epoch": 87.25903614457832, + "grad_norm": 1.7033170461654663, + "learning_rate": 4.7476015171798304e-05, + "loss": 0.1775, + "step": 14485 + }, + { + "epoch": 87.28915662650603, + "grad_norm": 1.7784265279769897, + "learning_rate": 4.7473226238286485e-05, + "loss": 0.1229, + "step": 14490 + }, + { + "epoch": 87.31927710843374, + "grad_norm": 1.1719051599502563, + "learning_rate": 4.747043730477466e-05, + "loss": 0.1322, + "step": 14495 + }, + { + "epoch": 87.34939759036145, + "grad_norm": 1.3448373079299927, + "learning_rate": 4.746764837126283e-05, + "loss": 0.1047, + "step": 14500 + }, + { + "epoch": 87.37951807228916, + "grad_norm": 1.515297293663025, + "learning_rate": 4.7464859437751006e-05, + "loss": 0.1331, + "step": 14505 + }, + { + "epoch": 87.40963855421687, + "grad_norm": 1.7046703100204468, + "learning_rate": 4.746207050423918e-05, + "loss": 0.1517, + "step": 14510 + }, + { + "epoch": 87.43975903614458, + "grad_norm": 2.1440868377685547, + "learning_rate": 4.7459281570727354e-05, + "loss": 0.147, + "step": 14515 + }, + { + "epoch": 87.46987951807229, + "grad_norm": 1.4107609987258911, + "learning_rate": 4.7456492637215534e-05, + "loss": 0.1326, + "step": 14520 + }, + { + "epoch": 87.5, + "grad_norm": 2.455655574798584, + "learning_rate": 4.745370370370371e-05, + "loss": 0.1378, + "step": 14525 + }, + { + "epoch": 87.53012048192771, + "grad_norm": 1.4808694124221802, + "learning_rate": 4.745091477019188e-05, + "loss": 0.1255, + "step": 14530 + }, + { + "epoch": 87.56024096385542, + "grad_norm": 0.9613294005393982, + "learning_rate": 4.7448125836680056e-05, + "loss": 0.1225, + "step": 14535 + }, + { + "epoch": 87.59036144578313, + "grad_norm": 1.7466461658477783, + "learning_rate": 4.744533690316823e-05, + "loss": 0.1056, + "step": 14540 + }, + { + "epoch": 87.62048192771084, + "grad_norm": 1.7773330211639404, + "learning_rate": 4.74425479696564e-05, + "loss": 0.1692, + "step": 14545 + }, + { + "epoch": 87.65060240963855, + "grad_norm": 1.3193150758743286, + "learning_rate": 4.7439759036144584e-05, + "loss": 0.1485, + "step": 14550 + }, + { + "epoch": 87.68072289156626, + "grad_norm": 1.9448775053024292, + "learning_rate": 4.743697010263276e-05, + "loss": 0.152, + "step": 14555 + }, + { + "epoch": 87.71084337349397, + "grad_norm": 1.9711596965789795, + "learning_rate": 4.743418116912093e-05, + "loss": 0.1283, + "step": 14560 + }, + { + "epoch": 87.74096385542168, + "grad_norm": 0.5820239186286926, + "learning_rate": 4.7431392235609105e-05, + "loss": 0.1317, + "step": 14565 + }, + { + "epoch": 87.7710843373494, + "grad_norm": 1.3593775033950806, + "learning_rate": 4.742860330209728e-05, + "loss": 0.15, + "step": 14570 + }, + { + "epoch": 87.8012048192771, + "grad_norm": 0.7001518607139587, + "learning_rate": 4.742581436858545e-05, + "loss": 0.1119, + "step": 14575 + }, + { + "epoch": 87.83132530120481, + "grad_norm": 2.0903732776641846, + "learning_rate": 4.742302543507363e-05, + "loss": 0.1496, + "step": 14580 + }, + { + "epoch": 87.86144578313252, + "grad_norm": 1.6374163627624512, + "learning_rate": 4.742023650156181e-05, + "loss": 0.1526, + "step": 14585 + }, + { + "epoch": 87.89156626506023, + "grad_norm": 0.8886626958847046, + "learning_rate": 4.7417447568049974e-05, + "loss": 0.0918, + "step": 14590 + }, + { + "epoch": 87.92168674698796, + "grad_norm": 1.5021910667419434, + "learning_rate": 4.7414658634538155e-05, + "loss": 0.1256, + "step": 14595 + }, + { + "epoch": 87.95180722891567, + "grad_norm": 1.6364808082580566, + "learning_rate": 4.741186970102633e-05, + "loss": 0.1244, + "step": 14600 + }, + { + "epoch": 87.98192771084338, + "grad_norm": 2.0130977630615234, + "learning_rate": 4.74090807675145e-05, + "loss": 0.1283, + "step": 14605 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.9445536767597326, + "eval_auc": 0.983247689041409, + "eval_f1": 0.9153153153153153, + "eval_loss": 0.21100325882434845, + "eval_precision": 0.9670050761421319, + "eval_recall": 0.8688711516533637, + "eval_runtime": 19.5416, + "eval_samples_per_second": 130.133, + "eval_steps_per_second": 0.665, + "step": 14608 + }, + { + "epoch": 88.01204819277109, + "grad_norm": 1.7429838180541992, + "learning_rate": 4.740629183400268e-05, + "loss": 0.1304, + "step": 14610 + }, + { + "epoch": 88.0421686746988, + "grad_norm": 2.927536725997925, + "learning_rate": 4.7403502900490857e-05, + "loss": 0.1399, + "step": 14615 + }, + { + "epoch": 88.07228915662651, + "grad_norm": 0.9462056159973145, + "learning_rate": 4.7400713966979024e-05, + "loss": 0.1014, + "step": 14620 + }, + { + "epoch": 88.10240963855422, + "grad_norm": 1.36577308177948, + "learning_rate": 4.7397925033467204e-05, + "loss": 0.1577, + "step": 14625 + }, + { + "epoch": 88.13253012048193, + "grad_norm": 1.4795022010803223, + "learning_rate": 4.739513609995538e-05, + "loss": 0.1463, + "step": 14630 + }, + { + "epoch": 88.16265060240964, + "grad_norm": 1.4152950048446655, + "learning_rate": 4.739234716644356e-05, + "loss": 0.1526, + "step": 14635 + }, + { + "epoch": 88.19277108433735, + "grad_norm": 1.4010131359100342, + "learning_rate": 4.738955823293173e-05, + "loss": 0.1232, + "step": 14640 + }, + { + "epoch": 88.22289156626506, + "grad_norm": 0.896776556968689, + "learning_rate": 4.7386769299419906e-05, + "loss": 0.1316, + "step": 14645 + }, + { + "epoch": 88.25301204819277, + "grad_norm": 0.8451403379440308, + "learning_rate": 4.738398036590808e-05, + "loss": 0.1152, + "step": 14650 + }, + { + "epoch": 88.28313253012048, + "grad_norm": 1.6329681873321533, + "learning_rate": 4.7381191432396254e-05, + "loss": 0.1036, + "step": 14655 + }, + { + "epoch": 88.3132530120482, + "grad_norm": 0.7393683791160583, + "learning_rate": 4.737840249888443e-05, + "loss": 0.1191, + "step": 14660 + }, + { + "epoch": 88.3433734939759, + "grad_norm": 1.9568076133728027, + "learning_rate": 4.737561356537261e-05, + "loss": 0.1321, + "step": 14665 + }, + { + "epoch": 88.37349397590361, + "grad_norm": 0.9561821222305298, + "learning_rate": 4.737282463186078e-05, + "loss": 0.1272, + "step": 14670 + }, + { + "epoch": 88.40361445783132, + "grad_norm": 2.8008792400360107, + "learning_rate": 4.7370035698348956e-05, + "loss": 0.1335, + "step": 14675 + }, + { + "epoch": 88.43373493975903, + "grad_norm": 1.8802995681762695, + "learning_rate": 4.736724676483713e-05, + "loss": 0.1407, + "step": 14680 + }, + { + "epoch": 88.46385542168674, + "grad_norm": 0.9617180228233337, + "learning_rate": 4.73644578313253e-05, + "loss": 0.1284, + "step": 14685 + }, + { + "epoch": 88.49397590361446, + "grad_norm": 1.9008986949920654, + "learning_rate": 4.736166889781348e-05, + "loss": 0.1761, + "step": 14690 + }, + { + "epoch": 88.52409638554217, + "grad_norm": 1.58428156375885, + "learning_rate": 4.735887996430166e-05, + "loss": 0.1205, + "step": 14695 + }, + { + "epoch": 88.55421686746988, + "grad_norm": 1.0729864835739136, + "learning_rate": 4.735609103078983e-05, + "loss": 0.1183, + "step": 14700 + }, + { + "epoch": 88.58433734939759, + "grad_norm": 1.0596061944961548, + "learning_rate": 4.7353302097278e-05, + "loss": 0.1105, + "step": 14705 + }, + { + "epoch": 88.6144578313253, + "grad_norm": 1.2726976871490479, + "learning_rate": 4.735051316376618e-05, + "loss": 0.1047, + "step": 14710 + }, + { + "epoch": 88.644578313253, + "grad_norm": 1.7473506927490234, + "learning_rate": 4.734772423025435e-05, + "loss": 0.1383, + "step": 14715 + }, + { + "epoch": 88.67469879518072, + "grad_norm": 1.214264154434204, + "learning_rate": 4.734493529674253e-05, + "loss": 0.146, + "step": 14720 + }, + { + "epoch": 88.70481927710843, + "grad_norm": 0.8195292353630066, + "learning_rate": 4.734214636323071e-05, + "loss": 0.1379, + "step": 14725 + }, + { + "epoch": 88.73493975903614, + "grad_norm": 0.8225693106651306, + "learning_rate": 4.733935742971888e-05, + "loss": 0.1321, + "step": 14730 + }, + { + "epoch": 88.76506024096386, + "grad_norm": 1.8106837272644043, + "learning_rate": 4.733656849620705e-05, + "loss": 0.1224, + "step": 14735 + }, + { + "epoch": 88.79518072289157, + "grad_norm": 1.5850567817687988, + "learning_rate": 4.733377956269523e-05, + "loss": 0.1366, + "step": 14740 + }, + { + "epoch": 88.82530120481928, + "grad_norm": 1.3589211702346802, + "learning_rate": 4.73309906291834e-05, + "loss": 0.1274, + "step": 14745 + }, + { + "epoch": 88.855421686747, + "grad_norm": 1.5127644538879395, + "learning_rate": 4.7328201695671576e-05, + "loss": 0.158, + "step": 14750 + }, + { + "epoch": 88.8855421686747, + "grad_norm": 1.4552568197250366, + "learning_rate": 4.732541276215976e-05, + "loss": 0.0927, + "step": 14755 + }, + { + "epoch": 88.91566265060241, + "grad_norm": 1.2925678491592407, + "learning_rate": 4.732262382864793e-05, + "loss": 0.1184, + "step": 14760 + }, + { + "epoch": 88.94578313253012, + "grad_norm": 0.8832511901855469, + "learning_rate": 4.73198348951361e-05, + "loss": 0.1404, + "step": 14765 + }, + { + "epoch": 88.97590361445783, + "grad_norm": 1.4447160959243774, + "learning_rate": 4.731704596162428e-05, + "loss": 0.1684, + "step": 14770 + }, + { + "epoch": 89.0, + "eval_accuracy": 0.9500589854502556, + "eval_auc": 0.9829287473256122, + "eval_f1": 0.9251620506776664, + "eval_loss": 0.1974656581878662, + "eval_precision": 0.9573170731707317, + "eval_recall": 0.895096921322691, + "eval_runtime": 19.6403, + "eval_samples_per_second": 129.479, + "eval_steps_per_second": 0.662, + "step": 14774 + }, + { + "epoch": 89.00602409638554, + "grad_norm": 1.3041869401931763, + "learning_rate": 4.731425702811245e-05, + "loss": 0.1104, + "step": 14775 + }, + { + "epoch": 89.03614457831326, + "grad_norm": 1.4686906337738037, + "learning_rate": 4.7311468094600626e-05, + "loss": 0.1235, + "step": 14780 + }, + { + "epoch": 89.06626506024097, + "grad_norm": 2.0663414001464844, + "learning_rate": 4.7308679161088806e-05, + "loss": 0.1675, + "step": 14785 + }, + { + "epoch": 89.09638554216868, + "grad_norm": 0.9813429713249207, + "learning_rate": 4.730589022757697e-05, + "loss": 0.1309, + "step": 14790 + }, + { + "epoch": 89.12650602409639, + "grad_norm": 1.1441782712936401, + "learning_rate": 4.730310129406515e-05, + "loss": 0.1265, + "step": 14795 + }, + { + "epoch": 89.1566265060241, + "grad_norm": 1.6443047523498535, + "learning_rate": 4.730031236055333e-05, + "loss": 0.1648, + "step": 14800 + }, + { + "epoch": 89.1867469879518, + "grad_norm": 2.0546061992645264, + "learning_rate": 4.72975234270415e-05, + "loss": 0.1715, + "step": 14805 + }, + { + "epoch": 89.21686746987952, + "grad_norm": 1.258359432220459, + "learning_rate": 4.7294734493529675e-05, + "loss": 0.1182, + "step": 14810 + }, + { + "epoch": 89.24698795180723, + "grad_norm": 0.9215362668037415, + "learning_rate": 4.7291945560017856e-05, + "loss": 0.1619, + "step": 14815 + }, + { + "epoch": 89.27710843373494, + "grad_norm": 0.7660539746284485, + "learning_rate": 4.728915662650602e-05, + "loss": 0.1134, + "step": 14820 + }, + { + "epoch": 89.30722891566265, + "grad_norm": 1.2452195882797241, + "learning_rate": 4.72863676929942e-05, + "loss": 0.1275, + "step": 14825 + }, + { + "epoch": 89.33734939759036, + "grad_norm": 0.6670877933502197, + "learning_rate": 4.728357875948238e-05, + "loss": 0.1495, + "step": 14830 + }, + { + "epoch": 89.36746987951807, + "grad_norm": 2.953082323074341, + "learning_rate": 4.728078982597055e-05, + "loss": 0.1247, + "step": 14835 + }, + { + "epoch": 89.39759036144578, + "grad_norm": 1.5403558015823364, + "learning_rate": 4.7278000892458725e-05, + "loss": 0.1465, + "step": 14840 + }, + { + "epoch": 89.42771084337349, + "grad_norm": 1.5101865530014038, + "learning_rate": 4.7275211958946905e-05, + "loss": 0.0789, + "step": 14845 + }, + { + "epoch": 89.4578313253012, + "grad_norm": 1.332971453666687, + "learning_rate": 4.727242302543507e-05, + "loss": 0.1089, + "step": 14850 + }, + { + "epoch": 89.48795180722891, + "grad_norm": 2.0793509483337402, + "learning_rate": 4.7269634091923246e-05, + "loss": 0.1236, + "step": 14855 + }, + { + "epoch": 89.51807228915662, + "grad_norm": 0.8516743779182434, + "learning_rate": 4.726684515841143e-05, + "loss": 0.0934, + "step": 14860 + }, + { + "epoch": 89.54819277108433, + "grad_norm": 1.6131713390350342, + "learning_rate": 4.72640562248996e-05, + "loss": 0.1237, + "step": 14865 + }, + { + "epoch": 89.57831325301204, + "grad_norm": 1.8243027925491333, + "learning_rate": 4.7261267291387774e-05, + "loss": 0.1706, + "step": 14870 + }, + { + "epoch": 89.60843373493977, + "grad_norm": 2.104937791824341, + "learning_rate": 4.725847835787595e-05, + "loss": 0.1364, + "step": 14875 + }, + { + "epoch": 89.63855421686748, + "grad_norm": 2.301976203918457, + "learning_rate": 4.725568942436412e-05, + "loss": 0.145, + "step": 14880 + }, + { + "epoch": 89.66867469879519, + "grad_norm": 1.202064037322998, + "learning_rate": 4.7252900490852296e-05, + "loss": 0.1191, + "step": 14885 + }, + { + "epoch": 89.6987951807229, + "grad_norm": 1.10089111328125, + "learning_rate": 4.7250111557340476e-05, + "loss": 0.1553, + "step": 14890 + }, + { + "epoch": 89.7289156626506, + "grad_norm": 0.7968999743461609, + "learning_rate": 4.724732262382865e-05, + "loss": 0.1369, + "step": 14895 + }, + { + "epoch": 89.75903614457832, + "grad_norm": 1.395979404449463, + "learning_rate": 4.724453369031683e-05, + "loss": 0.1556, + "step": 14900 + }, + { + "epoch": 89.78915662650603, + "grad_norm": 1.076385736465454, + "learning_rate": 4.7241744756805e-05, + "loss": 0.159, + "step": 14905 + }, + { + "epoch": 89.81927710843374, + "grad_norm": 1.197190523147583, + "learning_rate": 4.723895582329317e-05, + "loss": 0.119, + "step": 14910 + }, + { + "epoch": 89.84939759036145, + "grad_norm": 1.8807010650634766, + "learning_rate": 4.723616688978135e-05, + "loss": 0.1558, + "step": 14915 + }, + { + "epoch": 89.87951807228916, + "grad_norm": 0.935214102268219, + "learning_rate": 4.7233377956269526e-05, + "loss": 0.1514, + "step": 14920 + }, + { + "epoch": 89.90963855421687, + "grad_norm": 1.4490586519241333, + "learning_rate": 4.72305890227577e-05, + "loss": 0.1365, + "step": 14925 + }, + { + "epoch": 89.93975903614458, + "grad_norm": 1.6296980381011963, + "learning_rate": 4.722780008924588e-05, + "loss": 0.1283, + "step": 14930 + }, + { + "epoch": 89.96987951807229, + "grad_norm": 2.673417568206787, + "learning_rate": 4.722501115573405e-05, + "loss": 0.134, + "step": 14935 + }, + { + "epoch": 90.0, + "grad_norm": 1.232714295387268, + "learning_rate": 4.722222222222222e-05, + "loss": 0.1445, + "step": 14940 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.9445536767597326, + "eval_auc": 0.9855658340873408, + "eval_f1": 0.9149064574532287, + "eval_loss": 0.2041027843952179, + "eval_precision": 0.9717948717948718, + "eval_recall": 0.8643101482326112, + "eval_runtime": 19.7223, + "eval_samples_per_second": 128.94, + "eval_steps_per_second": 0.659, + "step": 14940 + }, + { + "epoch": 90.03012048192771, + "grad_norm": 1.9144062995910645, + "learning_rate": 4.72194332887104e-05, + "loss": 0.1296, + "step": 14945 + }, + { + "epoch": 90.06024096385542, + "grad_norm": 1.984622597694397, + "learning_rate": 4.7216644355198575e-05, + "loss": 0.1341, + "step": 14950 + }, + { + "epoch": 90.09036144578313, + "grad_norm": 0.7810771465301514, + "learning_rate": 4.721385542168675e-05, + "loss": 0.1028, + "step": 14955 + }, + { + "epoch": 90.12048192771084, + "grad_norm": 1.0553934574127197, + "learning_rate": 4.721106648817493e-05, + "loss": 0.15, + "step": 14960 + }, + { + "epoch": 90.15060240963855, + "grad_norm": 2.661931037902832, + "learning_rate": 4.72082775546631e-05, + "loss": 0.1553, + "step": 14965 + }, + { + "epoch": 90.18072289156626, + "grad_norm": 1.422800898551941, + "learning_rate": 4.720548862115127e-05, + "loss": 0.1241, + "step": 14970 + }, + { + "epoch": 90.21084337349397, + "grad_norm": 1.7622931003570557, + "learning_rate": 4.720269968763945e-05, + "loss": 0.1031, + "step": 14975 + }, + { + "epoch": 90.24096385542168, + "grad_norm": 1.3108450174331665, + "learning_rate": 4.7199910754127625e-05, + "loss": 0.0993, + "step": 14980 + }, + { + "epoch": 90.2710843373494, + "grad_norm": 1.8057940006256104, + "learning_rate": 4.71971218206158e-05, + "loss": 0.1394, + "step": 14985 + }, + { + "epoch": 90.3012048192771, + "grad_norm": 1.7234320640563965, + "learning_rate": 4.719433288710397e-05, + "loss": 0.1404, + "step": 14990 + }, + { + "epoch": 90.33132530120481, + "grad_norm": 1.6687768697738647, + "learning_rate": 4.7191543953592146e-05, + "loss": 0.1298, + "step": 14995 + }, + { + "epoch": 90.36144578313252, + "grad_norm": 1.7635596990585327, + "learning_rate": 4.718875502008032e-05, + "loss": 0.1032, + "step": 15000 + }, + { + "epoch": 90.39156626506023, + "grad_norm": 1.9103935956954956, + "learning_rate": 4.71859660865685e-05, + "loss": 0.1, + "step": 15005 + }, + { + "epoch": 90.42168674698796, + "grad_norm": 1.3442938327789307, + "learning_rate": 4.7183177153056675e-05, + "loss": 0.098, + "step": 15010 + }, + { + "epoch": 90.45180722891567, + "grad_norm": 1.2276017665863037, + "learning_rate": 4.718038821954485e-05, + "loss": 0.105, + "step": 15015 + }, + { + "epoch": 90.48192771084338, + "grad_norm": 2.2045164108276367, + "learning_rate": 4.717759928603302e-05, + "loss": 0.1812, + "step": 15020 + }, + { + "epoch": 90.51204819277109, + "grad_norm": 3.1401782035827637, + "learning_rate": 4.7174810352521196e-05, + "loss": 0.1533, + "step": 15025 + }, + { + "epoch": 90.5421686746988, + "grad_norm": 2.1406538486480713, + "learning_rate": 4.717202141900937e-05, + "loss": 0.1508, + "step": 15030 + }, + { + "epoch": 90.57228915662651, + "grad_norm": 2.2567074298858643, + "learning_rate": 4.716923248549755e-05, + "loss": 0.1367, + "step": 15035 + }, + { + "epoch": 90.60240963855422, + "grad_norm": 1.9470092058181763, + "learning_rate": 4.7166443551985724e-05, + "loss": 0.1665, + "step": 15040 + }, + { + "epoch": 90.63253012048193, + "grad_norm": 0.9224712252616882, + "learning_rate": 4.71636546184739e-05, + "loss": 0.142, + "step": 15045 + }, + { + "epoch": 90.66265060240964, + "grad_norm": 1.0731422901153564, + "learning_rate": 4.716086568496207e-05, + "loss": 0.1301, + "step": 15050 + }, + { + "epoch": 90.69277108433735, + "grad_norm": 1.3973654508590698, + "learning_rate": 4.7158076751450246e-05, + "loss": 0.1579, + "step": 15055 + }, + { + "epoch": 90.72289156626506, + "grad_norm": 1.42367684841156, + "learning_rate": 4.715528781793842e-05, + "loss": 0.1171, + "step": 15060 + }, + { + "epoch": 90.75301204819277, + "grad_norm": 1.3728097677230835, + "learning_rate": 4.71524988844266e-05, + "loss": 0.1156, + "step": 15065 + }, + { + "epoch": 90.78313253012048, + "grad_norm": 2.5987324714660645, + "learning_rate": 4.7149709950914774e-05, + "loss": 0.1293, + "step": 15070 + }, + { + "epoch": 90.8132530120482, + "grad_norm": 1.7050949335098267, + "learning_rate": 4.714692101740295e-05, + "loss": 0.1283, + "step": 15075 + }, + { + "epoch": 90.8433734939759, + "grad_norm": 2.5224061012268066, + "learning_rate": 4.714413208389112e-05, + "loss": 0.126, + "step": 15080 + }, + { + "epoch": 90.87349397590361, + "grad_norm": 1.523181676864624, + "learning_rate": 4.7141343150379295e-05, + "loss": 0.1507, + "step": 15085 + }, + { + "epoch": 90.90361445783132, + "grad_norm": 1.1003533601760864, + "learning_rate": 4.713855421686747e-05, + "loss": 0.1503, + "step": 15090 + }, + { + "epoch": 90.93373493975903, + "grad_norm": 1.1355431079864502, + "learning_rate": 4.713576528335565e-05, + "loss": 0.1301, + "step": 15095 + }, + { + "epoch": 90.96385542168674, + "grad_norm": 0.584497332572937, + "learning_rate": 4.713297634984382e-05, + "loss": 0.0904, + "step": 15100 + }, + { + "epoch": 90.99397590361446, + "grad_norm": 0.8010081052780151, + "learning_rate": 4.7130187416332e-05, + "loss": 0.1301, + "step": 15105 + }, + { + "epoch": 91.0, + "eval_accuracy": 0.9441604404246953, + "eval_auc": 0.9860302159632383, + "eval_f1": 0.9143546441495778, + "eval_loss": 0.20790936052799225, + "eval_precision": 0.970550576184379, + "eval_recall": 0.8643101482326112, + "eval_runtime": 16.9068, + "eval_samples_per_second": 150.413, + "eval_steps_per_second": 0.769, + "step": 15106 + }, + { + "epoch": 91.02409638554217, + "grad_norm": 1.8080776929855347, + "learning_rate": 4.712739848282017e-05, + "loss": 0.1072, + "step": 15110 + }, + { + "epoch": 91.05421686746988, + "grad_norm": 1.793641448020935, + "learning_rate": 4.7124609549308345e-05, + "loss": 0.1225, + "step": 15115 + }, + { + "epoch": 91.08433734939759, + "grad_norm": 1.1437933444976807, + "learning_rate": 4.712182061579652e-05, + "loss": 0.1045, + "step": 15120 + }, + { + "epoch": 91.1144578313253, + "grad_norm": 1.4480006694793701, + "learning_rate": 4.71190316822847e-05, + "loss": 0.1177, + "step": 15125 + }, + { + "epoch": 91.144578313253, + "grad_norm": 0.5498224496841431, + "learning_rate": 4.711624274877287e-05, + "loss": 0.0925, + "step": 15130 + }, + { + "epoch": 91.17469879518072, + "grad_norm": 1.589474081993103, + "learning_rate": 4.7113453815261047e-05, + "loss": 0.1477, + "step": 15135 + }, + { + "epoch": 91.20481927710843, + "grad_norm": 1.3703738451004028, + "learning_rate": 4.711066488174922e-05, + "loss": 0.1123, + "step": 15140 + }, + { + "epoch": 91.23493975903614, + "grad_norm": 2.3812754154205322, + "learning_rate": 4.7107875948237394e-05, + "loss": 0.1176, + "step": 15145 + }, + { + "epoch": 91.26506024096386, + "grad_norm": 1.2320115566253662, + "learning_rate": 4.710508701472557e-05, + "loss": 0.123, + "step": 15150 + }, + { + "epoch": 91.29518072289157, + "grad_norm": 1.0371230840682983, + "learning_rate": 4.710229808121375e-05, + "loss": 0.1162, + "step": 15155 + }, + { + "epoch": 91.32530120481928, + "grad_norm": 2.1828746795654297, + "learning_rate": 4.709950914770192e-05, + "loss": 0.116, + "step": 15160 + }, + { + "epoch": 91.355421686747, + "grad_norm": 1.36992609500885, + "learning_rate": 4.7096720214190096e-05, + "loss": 0.1102, + "step": 15165 + }, + { + "epoch": 91.3855421686747, + "grad_norm": 0.7277379035949707, + "learning_rate": 4.709393128067827e-05, + "loss": 0.1223, + "step": 15170 + }, + { + "epoch": 91.41566265060241, + "grad_norm": 1.4891860485076904, + "learning_rate": 4.7091142347166444e-05, + "loss": 0.1366, + "step": 15175 + }, + { + "epoch": 91.44578313253012, + "grad_norm": 1.5657923221588135, + "learning_rate": 4.7088353413654624e-05, + "loss": 0.1714, + "step": 15180 + }, + { + "epoch": 91.47590361445783, + "grad_norm": 0.8934221863746643, + "learning_rate": 4.70855644801428e-05, + "loss": 0.1274, + "step": 15185 + }, + { + "epoch": 91.50602409638554, + "grad_norm": 2.132737636566162, + "learning_rate": 4.708277554663097e-05, + "loss": 0.1274, + "step": 15190 + }, + { + "epoch": 91.53614457831326, + "grad_norm": 1.4237350225448608, + "learning_rate": 4.7079986613119146e-05, + "loss": 0.1337, + "step": 15195 + }, + { + "epoch": 91.56626506024097, + "grad_norm": 0.7660580277442932, + "learning_rate": 4.707719767960732e-05, + "loss": 0.1011, + "step": 15200 + }, + { + "epoch": 91.59638554216868, + "grad_norm": 1.0952409505844116, + "learning_rate": 4.707440874609549e-05, + "loss": 0.1232, + "step": 15205 + }, + { + "epoch": 91.62650602409639, + "grad_norm": 2.080371856689453, + "learning_rate": 4.7071619812583674e-05, + "loss": 0.1521, + "step": 15210 + }, + { + "epoch": 91.6566265060241, + "grad_norm": 1.226672649383545, + "learning_rate": 4.706883087907185e-05, + "loss": 0.112, + "step": 15215 + }, + { + "epoch": 91.6867469879518, + "grad_norm": 1.8429591655731201, + "learning_rate": 4.706604194556002e-05, + "loss": 0.1257, + "step": 15220 + }, + { + "epoch": 91.71686746987952, + "grad_norm": 2.1857190132141113, + "learning_rate": 4.7063253012048195e-05, + "loss": 0.1498, + "step": 15225 + }, + { + "epoch": 91.74698795180723, + "grad_norm": 1.732683777809143, + "learning_rate": 4.706046407853637e-05, + "loss": 0.1414, + "step": 15230 + }, + { + "epoch": 91.77710843373494, + "grad_norm": 2.2705845832824707, + "learning_rate": 4.705767514502454e-05, + "loss": 0.1272, + "step": 15235 + }, + { + "epoch": 91.80722891566265, + "grad_norm": 1.2718278169631958, + "learning_rate": 4.705488621151272e-05, + "loss": 0.1495, + "step": 15240 + }, + { + "epoch": 91.83734939759036, + "grad_norm": 1.7926607131958008, + "learning_rate": 4.70520972780009e-05, + "loss": 0.1427, + "step": 15245 + }, + { + "epoch": 91.86746987951807, + "grad_norm": 0.5878552794456482, + "learning_rate": 4.704930834448907e-05, + "loss": 0.0921, + "step": 15250 + }, + { + "epoch": 91.89759036144578, + "grad_norm": 1.603685975074768, + "learning_rate": 4.7046519410977245e-05, + "loss": 0.1339, + "step": 15255 + }, + { + "epoch": 91.92771084337349, + "grad_norm": 1.0628222227096558, + "learning_rate": 4.704373047746542e-05, + "loss": 0.1122, + "step": 15260 + }, + { + "epoch": 91.9578313253012, + "grad_norm": 1.1744061708450317, + "learning_rate": 4.704094154395359e-05, + "loss": 0.1312, + "step": 15265 + }, + { + "epoch": 91.98795180722891, + "grad_norm": 1.7355620861053467, + "learning_rate": 4.703815261044177e-05, + "loss": 0.1141, + "step": 15270 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.9343295320487613, + "eval_auc": 0.9789443713631405, + "eval_f1": 0.8986035215543412, + "eval_loss": 0.2643064260482788, + "eval_precision": 0.961038961038961, + "eval_recall": 0.8437856328392246, + "eval_runtime": 16.749, + "eval_samples_per_second": 151.83, + "eval_steps_per_second": 0.776, + "step": 15272 + }, + { + "epoch": 92.01807228915662, + "grad_norm": 0.8560194373130798, + "learning_rate": 4.703536367692995e-05, + "loss": 0.1348, + "step": 15275 + }, + { + "epoch": 92.04819277108433, + "grad_norm": 0.6043124198913574, + "learning_rate": 4.703257474341812e-05, + "loss": 0.1624, + "step": 15280 + }, + { + "epoch": 92.07831325301204, + "grad_norm": 1.2813502550125122, + "learning_rate": 4.7029785809906294e-05, + "loss": 0.1363, + "step": 15285 + }, + { + "epoch": 92.10843373493977, + "grad_norm": 1.1883949041366577, + "learning_rate": 4.702699687639447e-05, + "loss": 0.1241, + "step": 15290 + }, + { + "epoch": 92.13855421686748, + "grad_norm": 2.056490421295166, + "learning_rate": 4.702420794288264e-05, + "loss": 0.1383, + "step": 15295 + }, + { + "epoch": 92.16867469879519, + "grad_norm": 1.779117226600647, + "learning_rate": 4.702141900937082e-05, + "loss": 0.1315, + "step": 15300 + }, + { + "epoch": 92.1987951807229, + "grad_norm": 2.0701324939727783, + "learning_rate": 4.7018630075858996e-05, + "loss": 0.1325, + "step": 15305 + }, + { + "epoch": 92.2289156626506, + "grad_norm": 0.8832273483276367, + "learning_rate": 4.701584114234717e-05, + "loss": 0.1064, + "step": 15310 + }, + { + "epoch": 92.25903614457832, + "grad_norm": 1.8025341033935547, + "learning_rate": 4.7013052208835344e-05, + "loss": 0.1213, + "step": 15315 + }, + { + "epoch": 92.28915662650603, + "grad_norm": 2.2082834243774414, + "learning_rate": 4.701026327532352e-05, + "loss": 0.1331, + "step": 15320 + }, + { + "epoch": 92.31927710843374, + "grad_norm": 1.1036571264266968, + "learning_rate": 4.700747434181169e-05, + "loss": 0.1427, + "step": 15325 + }, + { + "epoch": 92.34939759036145, + "grad_norm": 1.1703567504882812, + "learning_rate": 4.700468540829987e-05, + "loss": 0.1182, + "step": 15330 + }, + { + "epoch": 92.37951807228916, + "grad_norm": 2.180934429168701, + "learning_rate": 4.7001896474788046e-05, + "loss": 0.166, + "step": 15335 + }, + { + "epoch": 92.40963855421687, + "grad_norm": 0.8977643847465515, + "learning_rate": 4.699910754127621e-05, + "loss": 0.1225, + "step": 15340 + }, + { + "epoch": 92.43975903614458, + "grad_norm": 1.1891638040542603, + "learning_rate": 4.6996318607764393e-05, + "loss": 0.1297, + "step": 15345 + }, + { + "epoch": 92.46987951807229, + "grad_norm": 1.1613346338272095, + "learning_rate": 4.699352967425257e-05, + "loss": 0.1483, + "step": 15350 + }, + { + "epoch": 92.5, + "grad_norm": 1.102997899055481, + "learning_rate": 4.699074074074074e-05, + "loss": 0.1559, + "step": 15355 + }, + { + "epoch": 92.53012048192771, + "grad_norm": 0.9282310605049133, + "learning_rate": 4.698795180722892e-05, + "loss": 0.1242, + "step": 15360 + }, + { + "epoch": 92.56024096385542, + "grad_norm": 1.2180817127227783, + "learning_rate": 4.6985162873717095e-05, + "loss": 0.1544, + "step": 15365 + }, + { + "epoch": 92.59036144578313, + "grad_norm": 2.0906457901000977, + "learning_rate": 4.698237394020526e-05, + "loss": 0.1391, + "step": 15370 + }, + { + "epoch": 92.62048192771084, + "grad_norm": 1.5469666719436646, + "learning_rate": 4.697958500669344e-05, + "loss": 0.1291, + "step": 15375 + }, + { + "epoch": 92.65060240963855, + "grad_norm": 1.2399280071258545, + "learning_rate": 4.697679607318162e-05, + "loss": 0.1557, + "step": 15380 + }, + { + "epoch": 92.68072289156626, + "grad_norm": 1.096070408821106, + "learning_rate": 4.697400713966979e-05, + "loss": 0.1335, + "step": 15385 + }, + { + "epoch": 92.71084337349397, + "grad_norm": 1.0564683675765991, + "learning_rate": 4.697121820615797e-05, + "loss": 0.129, + "step": 15390 + }, + { + "epoch": 92.74096385542168, + "grad_norm": 0.9859278202056885, + "learning_rate": 4.6968429272646145e-05, + "loss": 0.1225, + "step": 15395 + }, + { + "epoch": 92.7710843373494, + "grad_norm": 1.4117382764816284, + "learning_rate": 4.696564033913431e-05, + "loss": 0.1513, + "step": 15400 + }, + { + "epoch": 92.8012048192771, + "grad_norm": 1.4786218404769897, + "learning_rate": 4.696285140562249e-05, + "loss": 0.1329, + "step": 15405 + }, + { + "epoch": 92.83132530120481, + "grad_norm": 0.9054820537567139, + "learning_rate": 4.6960062472110666e-05, + "loss": 0.079, + "step": 15410 + }, + { + "epoch": 92.86144578313252, + "grad_norm": 1.3725407123565674, + "learning_rate": 4.695727353859884e-05, + "loss": 0.13, + "step": 15415 + }, + { + "epoch": 92.89156626506023, + "grad_norm": 1.6865205764770508, + "learning_rate": 4.695448460508702e-05, + "loss": 0.1606, + "step": 15420 + }, + { + "epoch": 92.92168674698796, + "grad_norm": 1.0956621170043945, + "learning_rate": 4.695169567157519e-05, + "loss": 0.1176, + "step": 15425 + }, + { + "epoch": 92.95180722891567, + "grad_norm": 1.4258737564086914, + "learning_rate": 4.694890673806337e-05, + "loss": 0.0975, + "step": 15430 + }, + { + "epoch": 92.98192771084338, + "grad_norm": 1.2521849870681763, + "learning_rate": 4.694611780455154e-05, + "loss": 0.1715, + "step": 15435 + }, + { + "epoch": 93.0, + "eval_accuracy": 0.9366889500589854, + "eval_auc": 0.9824588900554522, + "eval_f1": 0.9022465088038859, + "eval_loss": 0.23510372638702393, + "eval_precision": 0.964935064935065, + "eval_recall": 0.8472063854047891, + "eval_runtime": 19.5607, + "eval_samples_per_second": 130.006, + "eval_steps_per_second": 0.665, + "step": 15438 + }, + { + "epoch": 93.01204819277109, + "grad_norm": 0.8004704117774963, + "learning_rate": 4.6943328871039716e-05, + "loss": 0.0874, + "step": 15440 + }, + { + "epoch": 93.0421686746988, + "grad_norm": 0.9212279319763184, + "learning_rate": 4.6940539937527896e-05, + "loss": 0.1018, + "step": 15445 + }, + { + "epoch": 93.07228915662651, + "grad_norm": 1.2655545473098755, + "learning_rate": 4.693775100401607e-05, + "loss": 0.1464, + "step": 15450 + }, + { + "epoch": 93.10240963855422, + "grad_norm": 1.1404651403427124, + "learning_rate": 4.693496207050424e-05, + "loss": 0.1161, + "step": 15455 + }, + { + "epoch": 93.13253012048193, + "grad_norm": 1.7204524278640747, + "learning_rate": 4.693217313699242e-05, + "loss": 0.1166, + "step": 15460 + }, + { + "epoch": 93.16265060240964, + "grad_norm": 2.0438854694366455, + "learning_rate": 4.692938420348059e-05, + "loss": 0.1214, + "step": 15465 + }, + { + "epoch": 93.19277108433735, + "grad_norm": 2.632890224456787, + "learning_rate": 4.6926595269968765e-05, + "loss": 0.1182, + "step": 15470 + }, + { + "epoch": 93.22289156626506, + "grad_norm": 1.2251503467559814, + "learning_rate": 4.6923806336456946e-05, + "loss": 0.1437, + "step": 15475 + }, + { + "epoch": 93.25301204819277, + "grad_norm": 1.5710704326629639, + "learning_rate": 4.692101740294512e-05, + "loss": 0.0881, + "step": 15480 + }, + { + "epoch": 93.28313253012048, + "grad_norm": 0.9319076538085938, + "learning_rate": 4.691822846943329e-05, + "loss": 0.1176, + "step": 15485 + }, + { + "epoch": 93.3132530120482, + "grad_norm": 1.3066734075546265, + "learning_rate": 4.691543953592147e-05, + "loss": 0.1278, + "step": 15490 + }, + { + "epoch": 93.3433734939759, + "grad_norm": 1.5395375490188599, + "learning_rate": 4.691265060240964e-05, + "loss": 0.1177, + "step": 15495 + }, + { + "epoch": 93.37349397590361, + "grad_norm": 0.9871209263801575, + "learning_rate": 4.6909861668897815e-05, + "loss": 0.1598, + "step": 15500 + }, + { + "epoch": 93.40361445783132, + "grad_norm": 0.8233869671821594, + "learning_rate": 4.6907072735385996e-05, + "loss": 0.1343, + "step": 15505 + }, + { + "epoch": 93.43373493975903, + "grad_norm": 1.3258026838302612, + "learning_rate": 4.690428380187416e-05, + "loss": 0.126, + "step": 15510 + }, + { + "epoch": 93.46385542168674, + "grad_norm": 1.9142283201217651, + "learning_rate": 4.6901494868362336e-05, + "loss": 0.1588, + "step": 15515 + }, + { + "epoch": 93.49397590361446, + "grad_norm": 0.9098950624465942, + "learning_rate": 4.689870593485052e-05, + "loss": 0.1162, + "step": 15520 + }, + { + "epoch": 93.52409638554217, + "grad_norm": 1.0858807563781738, + "learning_rate": 4.689591700133869e-05, + "loss": 0.1241, + "step": 15525 + }, + { + "epoch": 93.55421686746988, + "grad_norm": 1.2349001169204712, + "learning_rate": 4.6893128067826865e-05, + "loss": 0.1169, + "step": 15530 + }, + { + "epoch": 93.58433734939759, + "grad_norm": 0.9307500123977661, + "learning_rate": 4.6890339134315045e-05, + "loss": 0.0899, + "step": 15535 + }, + { + "epoch": 93.6144578313253, + "grad_norm": 1.7344714403152466, + "learning_rate": 4.688755020080321e-05, + "loss": 0.1213, + "step": 15540 + }, + { + "epoch": 93.644578313253, + "grad_norm": 0.9669047594070435, + "learning_rate": 4.6884761267291386e-05, + "loss": 0.0895, + "step": 15545 + }, + { + "epoch": 93.67469879518072, + "grad_norm": 2.3350412845611572, + "learning_rate": 4.6881972333779566e-05, + "loss": 0.1303, + "step": 15550 + }, + { + "epoch": 93.70481927710843, + "grad_norm": 1.516924500465393, + "learning_rate": 4.687918340026774e-05, + "loss": 0.1043, + "step": 15555 + }, + { + "epoch": 93.73493975903614, + "grad_norm": 3.6865155696868896, + "learning_rate": 4.6876394466755914e-05, + "loss": 0.1422, + "step": 15560 + }, + { + "epoch": 93.76506024096386, + "grad_norm": 1.2372610569000244, + "learning_rate": 4.6873605533244095e-05, + "loss": 0.1153, + "step": 15565 + }, + { + "epoch": 93.79518072289157, + "grad_norm": 2.35564923286438, + "learning_rate": 4.687081659973226e-05, + "loss": 0.1343, + "step": 15570 + }, + { + "epoch": 93.82530120481928, + "grad_norm": 0.6407546997070312, + "learning_rate": 4.6868027666220435e-05, + "loss": 0.0994, + "step": 15575 + }, + { + "epoch": 93.855421686747, + "grad_norm": 0.9919431805610657, + "learning_rate": 4.6865238732708616e-05, + "loss": 0.1158, + "step": 15580 + }, + { + "epoch": 93.8855421686747, + "grad_norm": 1.9676462411880493, + "learning_rate": 4.686244979919679e-05, + "loss": 0.1579, + "step": 15585 + }, + { + "epoch": 93.91566265060241, + "grad_norm": 0.8513187170028687, + "learning_rate": 4.6859660865684964e-05, + "loss": 0.1263, + "step": 15590 + }, + { + "epoch": 93.94578313253012, + "grad_norm": 0.6019960045814514, + "learning_rate": 4.6856871932173144e-05, + "loss": 0.14, + "step": 15595 + }, + { + "epoch": 93.97590361445783, + "grad_norm": 0.6069483160972595, + "learning_rate": 4.685408299866131e-05, + "loss": 0.1197, + "step": 15600 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.936295713723948, + "eval_auc": 0.9818925289614135, + "eval_f1": 0.9010989010989011, + "eval_loss": 0.252782940864563, + "eval_precision": 0.9697766097240473, + "eval_recall": 0.8415051311288484, + "eval_runtime": 17.0499, + "eval_samples_per_second": 149.15, + "eval_steps_per_second": 0.762, + "step": 15604 + }, + { + "epoch": 94.00602409638554, + "grad_norm": 0.9746034145355225, + "learning_rate": 4.6851294065149485e-05, + "loss": 0.1174, + "step": 15605 + }, + { + "epoch": 94.03614457831326, + "grad_norm": 2.57694411277771, + "learning_rate": 4.6848505131637666e-05, + "loss": 0.1268, + "step": 15610 + }, + { + "epoch": 94.06626506024097, + "grad_norm": 0.77326899766922, + "learning_rate": 4.684571619812584e-05, + "loss": 0.1191, + "step": 15615 + }, + { + "epoch": 94.09638554216868, + "grad_norm": 1.114821195602417, + "learning_rate": 4.684292726461401e-05, + "loss": 0.093, + "step": 15620 + }, + { + "epoch": 94.12650602409639, + "grad_norm": 2.533151149749756, + "learning_rate": 4.684013833110219e-05, + "loss": 0.1474, + "step": 15625 + }, + { + "epoch": 94.1566265060241, + "grad_norm": 1.6700258255004883, + "learning_rate": 4.683734939759036e-05, + "loss": 0.1325, + "step": 15630 + }, + { + "epoch": 94.1867469879518, + "grad_norm": 1.7010748386383057, + "learning_rate": 4.6834560464078535e-05, + "loss": 0.0969, + "step": 15635 + }, + { + "epoch": 94.21686746987952, + "grad_norm": 1.4638324975967407, + "learning_rate": 4.6831771530566715e-05, + "loss": 0.1433, + "step": 15640 + }, + { + "epoch": 94.24698795180723, + "grad_norm": 1.4791585206985474, + "learning_rate": 4.682898259705489e-05, + "loss": 0.1462, + "step": 15645 + }, + { + "epoch": 94.27710843373494, + "grad_norm": 0.7184770703315735, + "learning_rate": 4.682619366354306e-05, + "loss": 0.1424, + "step": 15650 + }, + { + "epoch": 94.30722891566265, + "grad_norm": 1.324702262878418, + "learning_rate": 4.6823404730031237e-05, + "loss": 0.146, + "step": 15655 + }, + { + "epoch": 94.33734939759036, + "grad_norm": 1.2018970251083374, + "learning_rate": 4.682061579651941e-05, + "loss": 0.1174, + "step": 15660 + }, + { + "epoch": 94.36746987951807, + "grad_norm": 1.1097744703292847, + "learning_rate": 4.6817826863007584e-05, + "loss": 0.121, + "step": 15665 + }, + { + "epoch": 94.39759036144578, + "grad_norm": 1.4790349006652832, + "learning_rate": 4.6815037929495765e-05, + "loss": 0.1149, + "step": 15670 + }, + { + "epoch": 94.42771084337349, + "grad_norm": 1.5985567569732666, + "learning_rate": 4.681224899598394e-05, + "loss": 0.1182, + "step": 15675 + }, + { + "epoch": 94.4578313253012, + "grad_norm": 2.173953056335449, + "learning_rate": 4.680946006247211e-05, + "loss": 0.1467, + "step": 15680 + }, + { + "epoch": 94.48795180722891, + "grad_norm": 1.268538475036621, + "learning_rate": 4.6806671128960286e-05, + "loss": 0.1264, + "step": 15685 + }, + { + "epoch": 94.51807228915662, + "grad_norm": 1.8958779573440552, + "learning_rate": 4.680388219544846e-05, + "loss": 0.1349, + "step": 15690 + }, + { + "epoch": 94.54819277108433, + "grad_norm": 0.5450842976570129, + "learning_rate": 4.6801093261936634e-05, + "loss": 0.119, + "step": 15695 + }, + { + "epoch": 94.57831325301204, + "grad_norm": 1.3927003145217896, + "learning_rate": 4.6798304328424814e-05, + "loss": 0.089, + "step": 15700 + }, + { + "epoch": 94.60843373493977, + "grad_norm": 1.4803962707519531, + "learning_rate": 4.679551539491299e-05, + "loss": 0.159, + "step": 15705 + }, + { + "epoch": 94.63855421686748, + "grad_norm": 1.481036901473999, + "learning_rate": 4.679272646140116e-05, + "loss": 0.1033, + "step": 15710 + }, + { + "epoch": 94.66867469879519, + "grad_norm": 2.072739601135254, + "learning_rate": 4.6789937527889336e-05, + "loss": 0.1313, + "step": 15715 + }, + { + "epoch": 94.6987951807229, + "grad_norm": 0.9376345872879028, + "learning_rate": 4.678714859437751e-05, + "loss": 0.116, + "step": 15720 + }, + { + "epoch": 94.7289156626506, + "grad_norm": 0.8326073288917542, + "learning_rate": 4.678435966086569e-05, + "loss": 0.1274, + "step": 15725 + }, + { + "epoch": 94.75903614457832, + "grad_norm": 1.4961594343185425, + "learning_rate": 4.6781570727353864e-05, + "loss": 0.1398, + "step": 15730 + }, + { + "epoch": 94.78915662650603, + "grad_norm": 2.2496695518493652, + "learning_rate": 4.677878179384204e-05, + "loss": 0.1562, + "step": 15735 + }, + { + "epoch": 94.81927710843374, + "grad_norm": 0.6008087396621704, + "learning_rate": 4.677599286033021e-05, + "loss": 0.1167, + "step": 15740 + }, + { + "epoch": 94.84939759036145, + "grad_norm": 1.6028498411178589, + "learning_rate": 4.6773203926818385e-05, + "loss": 0.1245, + "step": 15745 + }, + { + "epoch": 94.87951807228916, + "grad_norm": 1.37605881690979, + "learning_rate": 4.677041499330656e-05, + "loss": 0.1065, + "step": 15750 + }, + { + "epoch": 94.90963855421687, + "grad_norm": 0.9388094544410706, + "learning_rate": 4.676762605979474e-05, + "loss": 0.1322, + "step": 15755 + }, + { + "epoch": 94.93975903614458, + "grad_norm": 0.8253313899040222, + "learning_rate": 4.676483712628291e-05, + "loss": 0.1097, + "step": 15760 + }, + { + "epoch": 94.96987951807229, + "grad_norm": 0.8523429036140442, + "learning_rate": 4.676204819277109e-05, + "loss": 0.1099, + "step": 15765 + }, + { + "epoch": 95.0, + "grad_norm": 1.5498061180114746, + "learning_rate": 4.675925925925926e-05, + "loss": 0.1522, + "step": 15770 + }, + { + "epoch": 95.0, + "eval_accuracy": 0.9500589854502556, + "eval_auc": 0.9869014880752757, + "eval_f1": 0.9252501471453797, + "eval_loss": 0.19789645075798035, + "eval_precision": 0.9562043795620438, + "eval_recall": 0.8962371721778791, + "eval_runtime": 16.8002, + "eval_samples_per_second": 151.367, + "eval_steps_per_second": 0.774, + "step": 15770 + }, + { + "epoch": 95.03012048192771, + "grad_norm": 1.4736396074295044, + "learning_rate": 4.6756470325747435e-05, + "loss": 0.1415, + "step": 15775 + }, + { + "epoch": 95.06024096385542, + "grad_norm": 2.076514959335327, + "learning_rate": 4.675368139223561e-05, + "loss": 0.1535, + "step": 15780 + }, + { + "epoch": 95.09036144578313, + "grad_norm": 1.2584677934646606, + "learning_rate": 4.675089245872379e-05, + "loss": 0.1, + "step": 15785 + }, + { + "epoch": 95.12048192771084, + "grad_norm": 1.3247615098953247, + "learning_rate": 4.674810352521196e-05, + "loss": 0.1273, + "step": 15790 + }, + { + "epoch": 95.15060240963855, + "grad_norm": 1.2890088558197021, + "learning_rate": 4.674531459170014e-05, + "loss": 0.1322, + "step": 15795 + }, + { + "epoch": 95.18072289156626, + "grad_norm": 1.6674821376800537, + "learning_rate": 4.674252565818831e-05, + "loss": 0.1026, + "step": 15800 + }, + { + "epoch": 95.21084337349397, + "grad_norm": 0.9363815784454346, + "learning_rate": 4.6739736724676484e-05, + "loss": 0.1242, + "step": 15805 + }, + { + "epoch": 95.24096385542168, + "grad_norm": 1.679032564163208, + "learning_rate": 4.673694779116466e-05, + "loss": 0.1314, + "step": 15810 + }, + { + "epoch": 95.2710843373494, + "grad_norm": 1.0566807985305786, + "learning_rate": 4.673415885765284e-05, + "loss": 0.159, + "step": 15815 + }, + { + "epoch": 95.3012048192771, + "grad_norm": 1.3573179244995117, + "learning_rate": 4.673136992414101e-05, + "loss": 0.1406, + "step": 15820 + }, + { + "epoch": 95.33132530120481, + "grad_norm": 0.8744345307350159, + "learning_rate": 4.6728580990629186e-05, + "loss": 0.1365, + "step": 15825 + }, + { + "epoch": 95.36144578313252, + "grad_norm": 1.0821994543075562, + "learning_rate": 4.672579205711736e-05, + "loss": 0.1234, + "step": 15830 + }, + { + "epoch": 95.39156626506023, + "grad_norm": 1.922343134880066, + "learning_rate": 4.6723003123605534e-05, + "loss": 0.1118, + "step": 15835 + }, + { + "epoch": 95.42168674698796, + "grad_norm": 0.9799426794052124, + "learning_rate": 4.672021419009371e-05, + "loss": 0.0908, + "step": 15840 + }, + { + "epoch": 95.45180722891567, + "grad_norm": 2.637895345687866, + "learning_rate": 4.671742525658189e-05, + "loss": 0.105, + "step": 15845 + }, + { + "epoch": 95.48192771084338, + "grad_norm": 1.9730476140975952, + "learning_rate": 4.671463632307006e-05, + "loss": 0.1127, + "step": 15850 + }, + { + "epoch": 95.51204819277109, + "grad_norm": 3.6851091384887695, + "learning_rate": 4.6711847389558236e-05, + "loss": 0.1292, + "step": 15855 + }, + { + "epoch": 95.5421686746988, + "grad_norm": 1.5435463190078735, + "learning_rate": 4.670905845604641e-05, + "loss": 0.162, + "step": 15860 + }, + { + "epoch": 95.57228915662651, + "grad_norm": 1.4445061683654785, + "learning_rate": 4.670626952253458e-05, + "loss": 0.1507, + "step": 15865 + }, + { + "epoch": 95.60240963855422, + "grad_norm": 1.2720803022384644, + "learning_rate": 4.670348058902276e-05, + "loss": 0.1351, + "step": 15870 + }, + { + "epoch": 95.63253012048193, + "grad_norm": 1.1591479778289795, + "learning_rate": 4.670069165551094e-05, + "loss": 0.115, + "step": 15875 + }, + { + "epoch": 95.66265060240964, + "grad_norm": 1.4791457653045654, + "learning_rate": 4.669790272199911e-05, + "loss": 0.1304, + "step": 15880 + }, + { + "epoch": 95.69277108433735, + "grad_norm": 1.037960410118103, + "learning_rate": 4.6695113788487285e-05, + "loss": 0.1382, + "step": 15885 + }, + { + "epoch": 95.72289156626506, + "grad_norm": 0.9557424187660217, + "learning_rate": 4.669232485497546e-05, + "loss": 0.1161, + "step": 15890 + }, + { + "epoch": 95.75301204819277, + "grad_norm": 1.8750642538070679, + "learning_rate": 4.668953592146363e-05, + "loss": 0.1205, + "step": 15895 + }, + { + "epoch": 95.78313253012048, + "grad_norm": 1.0831913948059082, + "learning_rate": 4.668674698795181e-05, + "loss": 0.0839, + "step": 15900 + }, + { + "epoch": 95.8132530120482, + "grad_norm": 0.7221660017967224, + "learning_rate": 4.668395805443999e-05, + "loss": 0.1134, + "step": 15905 + }, + { + "epoch": 95.8433734939759, + "grad_norm": 1.5886327028274536, + "learning_rate": 4.668116912092816e-05, + "loss": 0.1243, + "step": 15910 + }, + { + "epoch": 95.87349397590361, + "grad_norm": 1.270094394683838, + "learning_rate": 4.6678380187416335e-05, + "loss": 0.1272, + "step": 15915 + }, + { + "epoch": 95.90361445783132, + "grad_norm": 2.4260833263397217, + "learning_rate": 4.667559125390451e-05, + "loss": 0.1571, + "step": 15920 + }, + { + "epoch": 95.93373493975903, + "grad_norm": 0.9783493876457214, + "learning_rate": 4.667280232039268e-05, + "loss": 0.1062, + "step": 15925 + }, + { + "epoch": 95.96385542168674, + "grad_norm": 1.5558791160583496, + "learning_rate": 4.6670013386880856e-05, + "loss": 0.1235, + "step": 15930 + }, + { + "epoch": 95.99397590361446, + "grad_norm": 1.8295536041259766, + "learning_rate": 4.666722445336904e-05, + "loss": 0.1616, + "step": 15935 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.9418010224144711, + "eval_auc": 0.9832754082248636, + "eval_f1": 0.9121140142517815, + "eval_loss": 0.2412232756614685, + "eval_precision": 0.9516728624535316, + "eval_recall": 0.8757126567844926, + "eval_runtime": 17.0487, + "eval_samples_per_second": 149.161, + "eval_steps_per_second": 0.763, + "step": 15936 + }, + { + "epoch": 96.02409638554217, + "grad_norm": 1.7150579690933228, + "learning_rate": 4.666443551985721e-05, + "loss": 0.1433, + "step": 15940 + }, + { + "epoch": 96.05421686746988, + "grad_norm": 1.854522705078125, + "learning_rate": 4.6661646586345384e-05, + "loss": 0.1344, + "step": 15945 + }, + { + "epoch": 96.08433734939759, + "grad_norm": 1.563353180885315, + "learning_rate": 4.665885765283356e-05, + "loss": 0.1068, + "step": 15950 + }, + { + "epoch": 96.1144578313253, + "grad_norm": 0.6915176510810852, + "learning_rate": 4.665606871932173e-05, + "loss": 0.1171, + "step": 15955 + }, + { + "epoch": 96.144578313253, + "grad_norm": 0.9819210171699524, + "learning_rate": 4.6653279785809906e-05, + "loss": 0.1068, + "step": 15960 + }, + { + "epoch": 96.17469879518072, + "grad_norm": 1.0088191032409668, + "learning_rate": 4.6650490852298086e-05, + "loss": 0.0913, + "step": 15965 + }, + { + "epoch": 96.20481927710843, + "grad_norm": 1.2183645963668823, + "learning_rate": 4.664770191878626e-05, + "loss": 0.129, + "step": 15970 + }, + { + "epoch": 96.23493975903614, + "grad_norm": 0.746493399143219, + "learning_rate": 4.6644912985274434e-05, + "loss": 0.1636, + "step": 15975 + }, + { + "epoch": 96.26506024096386, + "grad_norm": 2.4620680809020996, + "learning_rate": 4.664212405176261e-05, + "loss": 0.1255, + "step": 15980 + }, + { + "epoch": 96.29518072289157, + "grad_norm": 1.318218469619751, + "learning_rate": 4.663933511825078e-05, + "loss": 0.1316, + "step": 15985 + }, + { + "epoch": 96.32530120481928, + "grad_norm": 1.3522661924362183, + "learning_rate": 4.663654618473896e-05, + "loss": 0.1186, + "step": 15990 + }, + { + "epoch": 96.355421686747, + "grad_norm": 1.5552898645401, + "learning_rate": 4.6633757251227136e-05, + "loss": 0.1408, + "step": 15995 + }, + { + "epoch": 96.3855421686747, + "grad_norm": 0.9430577754974365, + "learning_rate": 4.663096831771531e-05, + "loss": 0.122, + "step": 16000 + }, + { + "epoch": 96.41566265060241, + "grad_norm": 0.7161679267883301, + "learning_rate": 4.6628179384203484e-05, + "loss": 0.1138, + "step": 16005 + }, + { + "epoch": 96.44578313253012, + "grad_norm": 2.2095699310302734, + "learning_rate": 4.662539045069166e-05, + "loss": 0.1104, + "step": 16010 + }, + { + "epoch": 96.47590361445783, + "grad_norm": 1.1146612167358398, + "learning_rate": 4.662260151717983e-05, + "loss": 0.0945, + "step": 16015 + }, + { + "epoch": 96.50602409638554, + "grad_norm": 0.7639297246932983, + "learning_rate": 4.661981258366801e-05, + "loss": 0.0998, + "step": 16020 + }, + { + "epoch": 96.53614457831326, + "grad_norm": 1.307127594947815, + "learning_rate": 4.6617023650156186e-05, + "loss": 0.1117, + "step": 16025 + }, + { + "epoch": 96.56626506024097, + "grad_norm": 1.696721076965332, + "learning_rate": 4.661423471664436e-05, + "loss": 0.1107, + "step": 16030 + }, + { + "epoch": 96.59638554216868, + "grad_norm": 2.453157663345337, + "learning_rate": 4.661144578313253e-05, + "loss": 0.1544, + "step": 16035 + }, + { + "epoch": 96.62650602409639, + "grad_norm": 1.5119658708572388, + "learning_rate": 4.660865684962071e-05, + "loss": 0.1055, + "step": 16040 + }, + { + "epoch": 96.6566265060241, + "grad_norm": 1.1876705884933472, + "learning_rate": 4.660586791610888e-05, + "loss": 0.1325, + "step": 16045 + }, + { + "epoch": 96.6867469879518, + "grad_norm": 0.7079341411590576, + "learning_rate": 4.660307898259706e-05, + "loss": 0.1109, + "step": 16050 + }, + { + "epoch": 96.71686746987952, + "grad_norm": 2.3505184650421143, + "learning_rate": 4.6600290049085235e-05, + "loss": 0.0938, + "step": 16055 + }, + { + "epoch": 96.74698795180723, + "grad_norm": 1.5596070289611816, + "learning_rate": 4.65975011155734e-05, + "loss": 0.1855, + "step": 16060 + }, + { + "epoch": 96.77710843373494, + "grad_norm": 0.8612086772918701, + "learning_rate": 4.659471218206158e-05, + "loss": 0.1204, + "step": 16065 + }, + { + "epoch": 96.80722891566265, + "grad_norm": 1.3117247819900513, + "learning_rate": 4.6591923248549756e-05, + "loss": 0.1037, + "step": 16070 + }, + { + "epoch": 96.83734939759036, + "grad_norm": 1.7731101512908936, + "learning_rate": 4.658913431503793e-05, + "loss": 0.1445, + "step": 16075 + }, + { + "epoch": 96.86746987951807, + "grad_norm": 1.4806108474731445, + "learning_rate": 4.658634538152611e-05, + "loss": 0.1438, + "step": 16080 + }, + { + "epoch": 96.89759036144578, + "grad_norm": 1.2414298057556152, + "learning_rate": 4.6583556448014285e-05, + "loss": 0.1147, + "step": 16085 + }, + { + "epoch": 96.92771084337349, + "grad_norm": 1.7466983795166016, + "learning_rate": 4.658076751450245e-05, + "loss": 0.1361, + "step": 16090 + }, + { + "epoch": 96.9578313253012, + "grad_norm": 1.1356420516967773, + "learning_rate": 4.657797858099063e-05, + "loss": 0.1157, + "step": 16095 + }, + { + "epoch": 96.98795180722891, + "grad_norm": 2.459758758544922, + "learning_rate": 4.6575189647478806e-05, + "loss": 0.1374, + "step": 16100 + }, + { + "epoch": 97.0, + "eval_accuracy": 0.9457333857648447, + "eval_auc": 0.9851661303061704, + "eval_f1": 0.9173652694610779, + "eval_loss": 0.22718866169452667, + "eval_precision": 0.9659520807061791, + "eval_recall": 0.8734321550741163, + "eval_runtime": 16.5732, + "eval_samples_per_second": 153.441, + "eval_steps_per_second": 0.784, + "step": 16102 + }, + { + "epoch": 97.01807228915662, + "grad_norm": 1.7173633575439453, + "learning_rate": 4.657240071396698e-05, + "loss": 0.1034, + "step": 16105 + }, + { + "epoch": 97.04819277108433, + "grad_norm": 0.6392005681991577, + "learning_rate": 4.656961178045516e-05, + "loss": 0.1016, + "step": 16110 + }, + { + "epoch": 97.07831325301204, + "grad_norm": 1.4957361221313477, + "learning_rate": 4.6566822846943334e-05, + "loss": 0.1364, + "step": 16115 + }, + { + "epoch": 97.10843373493977, + "grad_norm": 2.0598764419555664, + "learning_rate": 4.65640339134315e-05, + "loss": 0.1122, + "step": 16120 + }, + { + "epoch": 97.13855421686748, + "grad_norm": 1.5152614116668701, + "learning_rate": 4.656124497991968e-05, + "loss": 0.1032, + "step": 16125 + }, + { + "epoch": 97.16867469879519, + "grad_norm": 2.9211199283599854, + "learning_rate": 4.6558456046407856e-05, + "loss": 0.1917, + "step": 16130 + }, + { + "epoch": 97.1987951807229, + "grad_norm": 2.7019996643066406, + "learning_rate": 4.655566711289603e-05, + "loss": 0.1223, + "step": 16135 + }, + { + "epoch": 97.2289156626506, + "grad_norm": 1.0914349555969238, + "learning_rate": 4.655287817938421e-05, + "loss": 0.1117, + "step": 16140 + }, + { + "epoch": 97.25903614457832, + "grad_norm": 0.7972518801689148, + "learning_rate": 4.655008924587238e-05, + "loss": 0.1037, + "step": 16145 + }, + { + "epoch": 97.28915662650603, + "grad_norm": 0.6472540497779846, + "learning_rate": 4.654730031236055e-05, + "loss": 0.1293, + "step": 16150 + }, + { + "epoch": 97.31927710843374, + "grad_norm": 0.8009729981422424, + "learning_rate": 4.654451137884873e-05, + "loss": 0.1242, + "step": 16155 + }, + { + "epoch": 97.34939759036145, + "grad_norm": 1.0288755893707275, + "learning_rate": 4.6541722445336905e-05, + "loss": 0.1358, + "step": 16160 + }, + { + "epoch": 97.37951807228916, + "grad_norm": 0.8072810173034668, + "learning_rate": 4.653893351182508e-05, + "loss": 0.1229, + "step": 16165 + }, + { + "epoch": 97.40963855421687, + "grad_norm": 2.337909460067749, + "learning_rate": 4.653614457831326e-05, + "loss": 0.0932, + "step": 16170 + }, + { + "epoch": 97.43975903614458, + "grad_norm": 0.561859130859375, + "learning_rate": 4.6533355644801426e-05, + "loss": 0.0918, + "step": 16175 + }, + { + "epoch": 97.46987951807229, + "grad_norm": 1.3851932287216187, + "learning_rate": 4.65305667112896e-05, + "loss": 0.1177, + "step": 16180 + }, + { + "epoch": 97.5, + "grad_norm": 1.1070224046707153, + "learning_rate": 4.652777777777778e-05, + "loss": 0.1441, + "step": 16185 + }, + { + "epoch": 97.53012048192771, + "grad_norm": 1.4459174871444702, + "learning_rate": 4.6524988844265955e-05, + "loss": 0.1391, + "step": 16190 + }, + { + "epoch": 97.56024096385542, + "grad_norm": 0.9987706542015076, + "learning_rate": 4.652219991075413e-05, + "loss": 0.1393, + "step": 16195 + }, + { + "epoch": 97.59036144578313, + "grad_norm": 1.2517822980880737, + "learning_rate": 4.651941097724231e-05, + "loss": 0.1147, + "step": 16200 + }, + { + "epoch": 97.62048192771084, + "grad_norm": 0.6012493968009949, + "learning_rate": 4.6516622043730476e-05, + "loss": 0.1125, + "step": 16205 + }, + { + "epoch": 97.65060240963855, + "grad_norm": 0.6572882533073425, + "learning_rate": 4.651383311021865e-05, + "loss": 0.1406, + "step": 16210 + }, + { + "epoch": 97.68072289156626, + "grad_norm": 1.877267837524414, + "learning_rate": 4.651104417670683e-05, + "loss": 0.1172, + "step": 16215 + }, + { + "epoch": 97.71084337349397, + "grad_norm": 1.3656926155090332, + "learning_rate": 4.6508255243195004e-05, + "loss": 0.1493, + "step": 16220 + }, + { + "epoch": 97.74096385542168, + "grad_norm": 2.071478843688965, + "learning_rate": 4.650546630968318e-05, + "loss": 0.1464, + "step": 16225 + }, + { + "epoch": 97.7710843373494, + "grad_norm": 1.0450981855392456, + "learning_rate": 4.650267737617136e-05, + "loss": 0.123, + "step": 16230 + }, + { + "epoch": 97.8012048192771, + "grad_norm": 1.2422423362731934, + "learning_rate": 4.6499888442659526e-05, + "loss": 0.1201, + "step": 16235 + }, + { + "epoch": 97.83132530120481, + "grad_norm": 1.80039381980896, + "learning_rate": 4.6497099509147706e-05, + "loss": 0.1066, + "step": 16240 + }, + { + "epoch": 97.86144578313252, + "grad_norm": 2.6740550994873047, + "learning_rate": 4.649431057563588e-05, + "loss": 0.145, + "step": 16245 + }, + { + "epoch": 97.89156626506023, + "grad_norm": 1.6974881887435913, + "learning_rate": 4.6491521642124054e-05, + "loss": 0.1251, + "step": 16250 + }, + { + "epoch": 97.92168674698796, + "grad_norm": 0.8452017307281494, + "learning_rate": 4.6488732708612234e-05, + "loss": 0.1148, + "step": 16255 + }, + { + "epoch": 97.95180722891567, + "grad_norm": 0.8741090893745422, + "learning_rate": 4.64859437751004e-05, + "loss": 0.1366, + "step": 16260 + }, + { + "epoch": 97.98192771084338, + "grad_norm": 1.6355156898498535, + "learning_rate": 4.6483154841588575e-05, + "loss": 0.1239, + "step": 16265 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.9547778214707039, + "eval_auc": 0.9865613292067112, + "eval_f1": 0.9329446064139941, + "eval_loss": 0.1774035394191742, + "eval_precision": 0.954653937947494, + "eval_recall": 0.9122006841505131, + "eval_runtime": 19.7506, + "eval_samples_per_second": 128.756, + "eval_steps_per_second": 0.658, + "step": 16268 + } + ], + "logging_steps": 5, + "max_steps": 99600, + "num_input_tokens_seen": 0, + "num_train_epochs": 600, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 100, + "early_stopping_threshold": 0.01 + }, + "attributes": { + "early_stopping_patience_counter": 44 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.319025282240568e+21, + "train_batch_size": 100, + "trial_name": null, + "trial_params": null +}