{ "best_metric": 0.1774035394191742, "best_model_checkpoint": "Prostate158-PI-CAI-MRI-Tumor-T2W-ADC-HBV-DWI-v01/checkpoint-16268", "epoch": 98.0, "eval_steps": 500, "global_step": 16268, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.030120481927710843, "grad_norm": 5.825586318969727, "learning_rate": 2.5100401606425703e-08, "loss": 0.7396, "step": 5 }, { "epoch": 0.060240963855421686, "grad_norm": 2.165442705154419, "learning_rate": 5.0200803212851406e-08, "loss": 0.7379, "step": 10 }, { "epoch": 0.09036144578313253, "grad_norm": 2.5576512813568115, "learning_rate": 7.530120481927711e-08, "loss": 0.7364, "step": 15 }, { "epoch": 0.12048192771084337, "grad_norm": 3.243180751800537, "learning_rate": 1.0040160642570281e-07, "loss": 0.7588, "step": 20 }, { "epoch": 0.15060240963855423, "grad_norm": 2.1289498805999756, "learning_rate": 1.2550200803212853e-07, "loss": 0.7046, "step": 25 }, { "epoch": 0.18072289156626506, "grad_norm": 3.678800582885742, "learning_rate": 1.5060240963855423e-07, "loss": 0.7478, "step": 30 }, { "epoch": 0.21084337349397592, "grad_norm": 2.020659923553467, "learning_rate": 1.7570281124497993e-07, "loss": 0.7372, "step": 35 }, { "epoch": 0.24096385542168675, "grad_norm": 3.0805442333221436, "learning_rate": 2.0080321285140563e-07, "loss": 0.7562, "step": 40 }, { "epoch": 0.2710843373493976, "grad_norm": 2.0384714603424072, "learning_rate": 2.2590361445783133e-07, "loss": 0.728, "step": 45 }, { "epoch": 0.30120481927710846, "grad_norm": 2.3842644691467285, "learning_rate": 2.5100401606425705e-07, "loss": 0.7374, "step": 50 }, { "epoch": 0.3313253012048193, "grad_norm": 2.663119316101074, "learning_rate": 2.761044176706827e-07, "loss": 0.7349, "step": 55 }, { "epoch": 0.3614457831325301, "grad_norm": 2.26052188873291, "learning_rate": 3.0120481927710845e-07, "loss": 0.7327, "step": 60 }, { "epoch": 0.39156626506024095, "grad_norm": 2.991396188735962, "learning_rate": 3.263052208835341e-07, "loss": 0.7289, "step": 65 }, { "epoch": 0.42168674698795183, "grad_norm": 2.195991277694702, "learning_rate": 3.5140562248995985e-07, "loss": 0.7252, "step": 70 }, { "epoch": 0.45180722891566266, "grad_norm": 2.390545129776001, "learning_rate": 3.765060240963856e-07, "loss": 0.7333, "step": 75 }, { "epoch": 0.4819277108433735, "grad_norm": 3.8800010681152344, "learning_rate": 4.0160642570281125e-07, "loss": 0.7129, "step": 80 }, { "epoch": 0.5120481927710844, "grad_norm": 2.3233087062835693, "learning_rate": 4.26706827309237e-07, "loss": 0.7251, "step": 85 }, { "epoch": 0.5421686746987951, "grad_norm": 2.914829730987549, "learning_rate": 4.5180722891566265e-07, "loss": 0.7321, "step": 90 }, { "epoch": 0.572289156626506, "grad_norm": 2.2649123668670654, "learning_rate": 4.769076305220884e-07, "loss": 0.7101, "step": 95 }, { "epoch": 0.6024096385542169, "grad_norm": 2.63425612449646, "learning_rate": 5.020080321285141e-07, "loss": 0.7302, "step": 100 }, { "epoch": 0.6325301204819277, "grad_norm": 2.2094509601593018, "learning_rate": 5.271084337349398e-07, "loss": 0.7194, "step": 105 }, { "epoch": 0.6626506024096386, "grad_norm": 2.336904287338257, "learning_rate": 5.522088353413655e-07, "loss": 0.7301, "step": 110 }, { "epoch": 0.6927710843373494, "grad_norm": 3.619689464569092, "learning_rate": 5.773092369477911e-07, "loss": 0.6986, "step": 115 }, { "epoch": 0.7228915662650602, "grad_norm": 2.2559053897857666, "learning_rate": 6.024096385542169e-07, "loss": 0.7399, "step": 120 }, { "epoch": 0.7530120481927711, "grad_norm": 2.235473871231079, "learning_rate": 6.275100401606426e-07, "loss": 0.7268, "step": 125 }, { "epoch": 0.7831325301204819, "grad_norm": 2.779989242553711, "learning_rate": 6.526104417670682e-07, "loss": 0.7044, "step": 130 }, { "epoch": 0.8132530120481928, "grad_norm": 2.1756601333618164, "learning_rate": 6.77710843373494e-07, "loss": 0.719, "step": 135 }, { "epoch": 0.8433734939759037, "grad_norm": 10.11095142364502, "learning_rate": 7.028112449799197e-07, "loss": 0.7135, "step": 140 }, { "epoch": 0.8734939759036144, "grad_norm": 2.282147169113159, "learning_rate": 7.279116465863455e-07, "loss": 0.7205, "step": 145 }, { "epoch": 0.9036144578313253, "grad_norm": 2.8020949363708496, "learning_rate": 7.530120481927712e-07, "loss": 0.7529, "step": 150 }, { "epoch": 0.9337349397590361, "grad_norm": 2.5485680103302, "learning_rate": 7.781124497991968e-07, "loss": 0.7155, "step": 155 }, { "epoch": 0.963855421686747, "grad_norm": 1.8938534259796143, "learning_rate": 8.032128514056225e-07, "loss": 0.7242, "step": 160 }, { "epoch": 0.9939759036144579, "grad_norm": 2.8763959407806396, "learning_rate": 8.283132530120483e-07, "loss": 0.7217, "step": 165 }, { "epoch": 1.0, "eval_accuracy": 0.5422729060165159, "eval_auc": 0.5260933335706004, "eval_f1": 0.40551583248212464, "eval_loss": 0.7027444243431091, "eval_precision": 0.36725254394079554, "eval_recall": 0.4526795895096921, "eval_runtime": 18.6786, "eval_samples_per_second": 136.145, "eval_steps_per_second": 0.696, "step": 166 }, { "epoch": 1.0240963855421688, "grad_norm": 2.2965047359466553, "learning_rate": 8.53413654618474e-07, "loss": 0.7055, "step": 170 }, { "epoch": 1.0542168674698795, "grad_norm": 3.0637803077697754, "learning_rate": 8.785140562248996e-07, "loss": 0.7046, "step": 175 }, { "epoch": 1.0843373493975903, "grad_norm": 2.772043466567993, "learning_rate": 9.036144578313253e-07, "loss": 0.6925, "step": 180 }, { "epoch": 1.1144578313253013, "grad_norm": 2.482011556625366, "learning_rate": 9.28714859437751e-07, "loss": 0.7053, "step": 185 }, { "epoch": 1.144578313253012, "grad_norm": 2.894693374633789, "learning_rate": 9.538152610441769e-07, "loss": 0.7386, "step": 190 }, { "epoch": 1.1746987951807228, "grad_norm": 2.042769432067871, "learning_rate": 9.789156626506025e-07, "loss": 0.7142, "step": 195 }, { "epoch": 1.2048192771084336, "grad_norm": 2.739431142807007, "learning_rate": 1.0040160642570282e-06, "loss": 0.7128, "step": 200 }, { "epoch": 1.2349397590361446, "grad_norm": 2.2312636375427246, "learning_rate": 1.0291164658634539e-06, "loss": 0.7218, "step": 205 }, { "epoch": 1.2650602409638554, "grad_norm": 2.0345458984375, "learning_rate": 1.0542168674698796e-06, "loss": 0.6982, "step": 210 }, { "epoch": 1.2951807228915664, "grad_norm": 2.9002037048339844, "learning_rate": 1.0793172690763054e-06, "loss": 0.7212, "step": 215 }, { "epoch": 1.3253012048192772, "grad_norm": 2.2102439403533936, "learning_rate": 1.104417670682731e-06, "loss": 0.681, "step": 220 }, { "epoch": 1.355421686746988, "grad_norm": 2.3994767665863037, "learning_rate": 1.1295180722891566e-06, "loss": 0.707, "step": 225 }, { "epoch": 1.3855421686746987, "grad_norm": 2.515076160430908, "learning_rate": 1.1546184738955822e-06, "loss": 0.6887, "step": 230 }, { "epoch": 1.4156626506024097, "grad_norm": 2.2097108364105225, "learning_rate": 1.179718875502008e-06, "loss": 0.6966, "step": 235 }, { "epoch": 1.4457831325301205, "grad_norm": 2.8546788692474365, "learning_rate": 1.2048192771084338e-06, "loss": 0.6811, "step": 240 }, { "epoch": 1.4759036144578312, "grad_norm": 3.2900049686431885, "learning_rate": 1.2299196787148595e-06, "loss": 0.7053, "step": 245 }, { "epoch": 1.5060240963855422, "grad_norm": 2.3631157875061035, "learning_rate": 1.2550200803212852e-06, "loss": 0.6831, "step": 250 }, { "epoch": 1.536144578313253, "grad_norm": 1.8485504388809204, "learning_rate": 1.2801204819277108e-06, "loss": 0.6957, "step": 255 }, { "epoch": 1.5662650602409638, "grad_norm": 2.748507261276245, "learning_rate": 1.3052208835341365e-06, "loss": 0.69, "step": 260 }, { "epoch": 1.5963855421686746, "grad_norm": 1.7519440650939941, "learning_rate": 1.3303212851405624e-06, "loss": 0.6928, "step": 265 }, { "epoch": 1.6265060240963856, "grad_norm": 1.7398183345794678, "learning_rate": 1.355421686746988e-06, "loss": 0.6695, "step": 270 }, { "epoch": 1.6566265060240963, "grad_norm": 2.2140514850616455, "learning_rate": 1.3805220883534137e-06, "loss": 0.696, "step": 275 }, { "epoch": 1.6867469879518073, "grad_norm": 2.0776941776275635, "learning_rate": 1.4056224899598394e-06, "loss": 0.6636, "step": 280 }, { "epoch": 1.716867469879518, "grad_norm": 2.1594786643981934, "learning_rate": 1.430722891566265e-06, "loss": 0.7105, "step": 285 }, { "epoch": 1.7469879518072289, "grad_norm": 2.252272844314575, "learning_rate": 1.455823293172691e-06, "loss": 0.6678, "step": 290 }, { "epoch": 1.7771084337349397, "grad_norm": 2.4131171703338623, "learning_rate": 1.4809236947791166e-06, "loss": 0.6576, "step": 295 }, { "epoch": 1.8072289156626506, "grad_norm": 2.6049649715423584, "learning_rate": 1.5060240963855423e-06, "loss": 0.6773, "step": 300 }, { "epoch": 1.8373493975903614, "grad_norm": 2.995439052581787, "learning_rate": 1.531124497991968e-06, "loss": 0.6792, "step": 305 }, { "epoch": 1.8674698795180724, "grad_norm": 1.8594603538513184, "learning_rate": 1.5562248995983937e-06, "loss": 0.6584, "step": 310 }, { "epoch": 1.8975903614457832, "grad_norm": 1.6492047309875488, "learning_rate": 1.5813253012048193e-06, "loss": 0.669, "step": 315 }, { "epoch": 1.927710843373494, "grad_norm": 2.0496723651885986, "learning_rate": 1.606425702811245e-06, "loss": 0.6582, "step": 320 }, { "epoch": 1.9578313253012047, "grad_norm": 2.19722056388855, "learning_rate": 1.6315261044176709e-06, "loss": 0.6789, "step": 325 }, { "epoch": 1.9879518072289155, "grad_norm": 2.4436256885528564, "learning_rate": 1.6566265060240966e-06, "loss": 0.6554, "step": 330 }, { "epoch": 2.0, "eval_accuracy": 0.6217066456940621, "eval_auc": 0.6144076786929138, "eval_f1": 0.5036119711042312, "eval_loss": 0.653194010257721, "eval_precision": 0.4599434495758718, "eval_recall": 0.556442417331813, "eval_runtime": 20.269, "eval_samples_per_second": 125.462, "eval_steps_per_second": 0.641, "step": 332 }, { "epoch": 2.0180722891566263, "grad_norm": 1.9626730680465698, "learning_rate": 1.6817269076305222e-06, "loss": 0.6859, "step": 335 }, { "epoch": 2.0481927710843375, "grad_norm": 3.7429897785186768, "learning_rate": 1.706827309236948e-06, "loss": 0.6644, "step": 340 }, { "epoch": 2.0783132530120483, "grad_norm": 3.046361207962036, "learning_rate": 1.7319277108433736e-06, "loss": 0.6635, "step": 345 }, { "epoch": 2.108433734939759, "grad_norm": 2.4592392444610596, "learning_rate": 1.7570281124497993e-06, "loss": 0.6551, "step": 350 }, { "epoch": 2.13855421686747, "grad_norm": 2.031898260116577, "learning_rate": 1.782128514056225e-06, "loss": 0.6567, "step": 355 }, { "epoch": 2.1686746987951806, "grad_norm": 2.472118616104126, "learning_rate": 1.8072289156626506e-06, "loss": 0.6759, "step": 360 }, { "epoch": 2.1987951807228914, "grad_norm": 2.3769283294677734, "learning_rate": 1.8323293172690763e-06, "loss": 0.6514, "step": 365 }, { "epoch": 2.2289156626506026, "grad_norm": 1.799721121788025, "learning_rate": 1.857429718875502e-06, "loss": 0.669, "step": 370 }, { "epoch": 2.2590361445783134, "grad_norm": 1.7890353202819824, "learning_rate": 1.882530120481928e-06, "loss": 0.6488, "step": 375 }, { "epoch": 2.289156626506024, "grad_norm": 2.3873209953308105, "learning_rate": 1.9076305220883537e-06, "loss": 0.6731, "step": 380 }, { "epoch": 2.319277108433735, "grad_norm": 2.3185174465179443, "learning_rate": 1.9327309236947794e-06, "loss": 0.6583, "step": 385 }, { "epoch": 2.3493975903614457, "grad_norm": 2.4070496559143066, "learning_rate": 1.957831325301205e-06, "loss": 0.6551, "step": 390 }, { "epoch": 2.3795180722891565, "grad_norm": 2.2057552337646484, "learning_rate": 1.9829317269076307e-06, "loss": 0.6313, "step": 395 }, { "epoch": 2.4096385542168672, "grad_norm": 1.937827467918396, "learning_rate": 2.0080321285140564e-06, "loss": 0.6206, "step": 400 }, { "epoch": 2.4397590361445785, "grad_norm": 4.218434810638428, "learning_rate": 2.033132530120482e-06, "loss": 0.679, "step": 405 }, { "epoch": 2.4698795180722892, "grad_norm": 1.721917748451233, "learning_rate": 2.0582329317269078e-06, "loss": 0.6457, "step": 410 }, { "epoch": 2.5, "grad_norm": 1.8327134847640991, "learning_rate": 2.0833333333333334e-06, "loss": 0.6613, "step": 415 }, { "epoch": 2.5301204819277108, "grad_norm": 2.555305242538452, "learning_rate": 2.108433734939759e-06, "loss": 0.6485, "step": 420 }, { "epoch": 2.5602409638554215, "grad_norm": 2.2785534858703613, "learning_rate": 2.133534136546185e-06, "loss": 0.6364, "step": 425 }, { "epoch": 2.5903614457831328, "grad_norm": 1.9908968210220337, "learning_rate": 2.158634538152611e-06, "loss": 0.6301, "step": 430 }, { "epoch": 2.6204819277108435, "grad_norm": 2.1207611560821533, "learning_rate": 2.1837349397590366e-06, "loss": 0.6492, "step": 435 }, { "epoch": 2.6506024096385543, "grad_norm": 2.1720712184906006, "learning_rate": 2.208835341365462e-06, "loss": 0.6638, "step": 440 }, { "epoch": 2.680722891566265, "grad_norm": 2.6269476413726807, "learning_rate": 2.2339357429718875e-06, "loss": 0.6382, "step": 445 }, { "epoch": 2.710843373493976, "grad_norm": 1.9714411497116089, "learning_rate": 2.259036144578313e-06, "loss": 0.6467, "step": 450 }, { "epoch": 2.7409638554216866, "grad_norm": 1.945236086845398, "learning_rate": 2.284136546184739e-06, "loss": 0.6273, "step": 455 }, { "epoch": 2.7710843373493974, "grad_norm": 2.1924173831939697, "learning_rate": 2.3092369477911645e-06, "loss": 0.6354, "step": 460 }, { "epoch": 2.8012048192771086, "grad_norm": 2.991903066635132, "learning_rate": 2.33433734939759e-06, "loss": 0.655, "step": 465 }, { "epoch": 2.8313253012048194, "grad_norm": 2.096970796585083, "learning_rate": 2.359437751004016e-06, "loss": 0.6365, "step": 470 }, { "epoch": 2.86144578313253, "grad_norm": 2.686661958694458, "learning_rate": 2.384538152610442e-06, "loss": 0.6234, "step": 475 }, { "epoch": 2.891566265060241, "grad_norm": 1.8316818475723267, "learning_rate": 2.4096385542168676e-06, "loss": 0.6229, "step": 480 }, { "epoch": 2.9216867469879517, "grad_norm": 2.257622241973877, "learning_rate": 2.4347389558232933e-06, "loss": 0.6702, "step": 485 }, { "epoch": 2.9518072289156625, "grad_norm": 2.138204574584961, "learning_rate": 2.459839357429719e-06, "loss": 0.6396, "step": 490 }, { "epoch": 2.9819277108433733, "grad_norm": 1.9312093257904053, "learning_rate": 2.4849397590361446e-06, "loss": 0.6088, "step": 495 }, { "epoch": 3.0, "eval_accuracy": 0.6594573338576485, "eval_auc": 0.6838651081869463, "eval_f1": 0.5725567620927937, "eval_loss": 0.6181133985519409, "eval_precision": 0.504786771105309, "eval_recall": 0.661345496009122, "eval_runtime": 19.5745, "eval_samples_per_second": 129.914, "eval_steps_per_second": 0.664, "step": 498 }, { "epoch": 3.0120481927710845, "grad_norm": 2.1444907188415527, "learning_rate": 2.5100401606425703e-06, "loss": 0.6148, "step": 500 }, { "epoch": 3.0421686746987953, "grad_norm": 2.5095620155334473, "learning_rate": 2.535140562248996e-06, "loss": 0.6332, "step": 505 }, { "epoch": 3.072289156626506, "grad_norm": 1.817351222038269, "learning_rate": 2.5602409638554217e-06, "loss": 0.6155, "step": 510 }, { "epoch": 3.102409638554217, "grad_norm": 2.1711740493774414, "learning_rate": 2.5853413654618473e-06, "loss": 0.6248, "step": 515 }, { "epoch": 3.1325301204819276, "grad_norm": 2.5385513305664062, "learning_rate": 2.610441767068273e-06, "loss": 0.5962, "step": 520 }, { "epoch": 3.1626506024096384, "grad_norm": 3.1502151489257812, "learning_rate": 2.635542168674699e-06, "loss": 0.6474, "step": 525 }, { "epoch": 3.1927710843373496, "grad_norm": 2.8188350200653076, "learning_rate": 2.6606425702811248e-06, "loss": 0.6637, "step": 530 }, { "epoch": 3.2228915662650603, "grad_norm": 4.671653747558594, "learning_rate": 2.6857429718875504e-06, "loss": 0.6052, "step": 535 }, { "epoch": 3.253012048192771, "grad_norm": 2.188119649887085, "learning_rate": 2.710843373493976e-06, "loss": 0.6422, "step": 540 }, { "epoch": 3.283132530120482, "grad_norm": 3.3826537132263184, "learning_rate": 2.735943775100402e-06, "loss": 0.619, "step": 545 }, { "epoch": 3.3132530120481927, "grad_norm": 2.027888298034668, "learning_rate": 2.7610441767068275e-06, "loss": 0.6217, "step": 550 }, { "epoch": 3.3433734939759034, "grad_norm": 1.8784477710723877, "learning_rate": 2.786144578313253e-06, "loss": 0.6295, "step": 555 }, { "epoch": 3.3734939759036147, "grad_norm": 2.3932571411132812, "learning_rate": 2.811244979919679e-06, "loss": 0.6064, "step": 560 }, { "epoch": 3.4036144578313254, "grad_norm": 2.1809239387512207, "learning_rate": 2.8363453815261045e-06, "loss": 0.6289, "step": 565 }, { "epoch": 3.433734939759036, "grad_norm": 2.666003704071045, "learning_rate": 2.86144578313253e-06, "loss": 0.6191, "step": 570 }, { "epoch": 3.463855421686747, "grad_norm": 3.224411725997925, "learning_rate": 2.8865461847389563e-06, "loss": 0.5992, "step": 575 }, { "epoch": 3.4939759036144578, "grad_norm": 1.847118616104126, "learning_rate": 2.911646586345382e-06, "loss": 0.6176, "step": 580 }, { "epoch": 3.5240963855421685, "grad_norm": 2.127950429916382, "learning_rate": 2.9367469879518076e-06, "loss": 0.6184, "step": 585 }, { "epoch": 3.5542168674698793, "grad_norm": 1.9930005073547363, "learning_rate": 2.9618473895582333e-06, "loss": 0.6008, "step": 590 }, { "epoch": 3.5843373493975905, "grad_norm": 1.9236960411071777, "learning_rate": 2.986947791164659e-06, "loss": 0.6371, "step": 595 }, { "epoch": 3.6144578313253013, "grad_norm": 2.96357798576355, "learning_rate": 3.0120481927710846e-06, "loss": 0.628, "step": 600 }, { "epoch": 3.644578313253012, "grad_norm": 2.2546164989471436, "learning_rate": 3.0371485943775103e-06, "loss": 0.6089, "step": 605 }, { "epoch": 3.674698795180723, "grad_norm": 2.080061197280884, "learning_rate": 3.062248995983936e-06, "loss": 0.6015, "step": 610 }, { "epoch": 3.7048192771084336, "grad_norm": 1.9112757444381714, "learning_rate": 3.0873493975903616e-06, "loss": 0.6229, "step": 615 }, { "epoch": 3.734939759036145, "grad_norm": 2.597161054611206, "learning_rate": 3.1124497991967873e-06, "loss": 0.6498, "step": 620 }, { "epoch": 3.765060240963855, "grad_norm": 2.107349157333374, "learning_rate": 3.137550200803213e-06, "loss": 0.6175, "step": 625 }, { "epoch": 3.7951807228915664, "grad_norm": 2.178757429122925, "learning_rate": 3.1626506024096387e-06, "loss": 0.6176, "step": 630 }, { "epoch": 3.825301204819277, "grad_norm": 1.8402270078659058, "learning_rate": 3.1877510040160643e-06, "loss": 0.605, "step": 635 }, { "epoch": 3.855421686746988, "grad_norm": 2.377106189727783, "learning_rate": 3.21285140562249e-06, "loss": 0.6259, "step": 640 }, { "epoch": 3.8855421686746987, "grad_norm": 1.693340539932251, "learning_rate": 3.2379518072289157e-06, "loss": 0.6122, "step": 645 }, { "epoch": 3.9156626506024095, "grad_norm": 1.722320556640625, "learning_rate": 3.2630522088353418e-06, "loss": 0.599, "step": 650 }, { "epoch": 3.9457831325301207, "grad_norm": 2.285301685333252, "learning_rate": 3.288152610441767e-06, "loss": 0.5832, "step": 655 }, { "epoch": 3.9759036144578315, "grad_norm": 2.8827247619628906, "learning_rate": 3.313253012048193e-06, "loss": 0.615, "step": 660 }, { "epoch": 4.0, "eval_accuracy": 0.6940621313409359, "eval_auc": 0.7312645696819207, "eval_f1": 0.6245173745173745, "eval_loss": 0.5903819799423218, "eval_precision": 0.5414225941422595, "eval_recall": 0.7377423033067275, "eval_runtime": 17.3385, "eval_samples_per_second": 146.668, "eval_steps_per_second": 0.75, "step": 664 }, { "epoch": 4.006024096385542, "grad_norm": 1.8333141803741455, "learning_rate": 3.3383534136546184e-06, "loss": 0.5948, "step": 665 }, { "epoch": 4.036144578313253, "grad_norm": 1.953653335571289, "learning_rate": 3.3634538152610445e-06, "loss": 0.5846, "step": 670 }, { "epoch": 4.066265060240964, "grad_norm": 2.1964218616485596, "learning_rate": 3.3885542168674697e-06, "loss": 0.6014, "step": 675 }, { "epoch": 4.096385542168675, "grad_norm": 1.7889600992202759, "learning_rate": 3.413654618473896e-06, "loss": 0.5968, "step": 680 }, { "epoch": 4.126506024096385, "grad_norm": 1.9411687850952148, "learning_rate": 3.438755020080321e-06, "loss": 0.6266, "step": 685 }, { "epoch": 4.156626506024097, "grad_norm": 3.8834457397460938, "learning_rate": 3.463855421686747e-06, "loss": 0.589, "step": 690 }, { "epoch": 4.186746987951807, "grad_norm": 1.8403301239013672, "learning_rate": 3.4889558232931724e-06, "loss": 0.6129, "step": 695 }, { "epoch": 4.216867469879518, "grad_norm": 2.3717918395996094, "learning_rate": 3.5140562248995985e-06, "loss": 0.5861, "step": 700 }, { "epoch": 4.246987951807229, "grad_norm": 1.6860854625701904, "learning_rate": 3.5391566265060246e-06, "loss": 0.6003, "step": 705 }, { "epoch": 4.27710843373494, "grad_norm": 2.402580738067627, "learning_rate": 3.56425702811245e-06, "loss": 0.5967, "step": 710 }, { "epoch": 4.307228915662651, "grad_norm": 2.134225845336914, "learning_rate": 3.589357429718876e-06, "loss": 0.6067, "step": 715 }, { "epoch": 4.337349397590361, "grad_norm": 1.800850749015808, "learning_rate": 3.614457831325301e-06, "loss": 0.5971, "step": 720 }, { "epoch": 4.367469879518072, "grad_norm": 2.6244163513183594, "learning_rate": 3.6395582329317273e-06, "loss": 0.6142, "step": 725 }, { "epoch": 4.397590361445783, "grad_norm": 1.7857623100280762, "learning_rate": 3.6646586345381526e-06, "loss": 0.6248, "step": 730 }, { "epoch": 4.427710843373494, "grad_norm": 2.7235026359558105, "learning_rate": 3.6897590361445786e-06, "loss": 0.5892, "step": 735 }, { "epoch": 4.457831325301205, "grad_norm": 2.886775016784668, "learning_rate": 3.714859437751004e-06, "loss": 0.5761, "step": 740 }, { "epoch": 4.4879518072289155, "grad_norm": 2.2062480449676514, "learning_rate": 3.73995983935743e-06, "loss": 0.5991, "step": 745 }, { "epoch": 4.518072289156627, "grad_norm": 2.093432664871216, "learning_rate": 3.765060240963856e-06, "loss": 0.5968, "step": 750 }, { "epoch": 4.548192771084337, "grad_norm": 2.6118621826171875, "learning_rate": 3.7901606425702813e-06, "loss": 0.6056, "step": 755 }, { "epoch": 4.578313253012048, "grad_norm": 1.7923548221588135, "learning_rate": 3.8152610441767074e-06, "loss": 0.5834, "step": 760 }, { "epoch": 4.608433734939759, "grad_norm": 2.803171157836914, "learning_rate": 3.840361445783132e-06, "loss": 0.6035, "step": 765 }, { "epoch": 4.63855421686747, "grad_norm": 2.002159357070923, "learning_rate": 3.865461847389559e-06, "loss": 0.5678, "step": 770 }, { "epoch": 4.668674698795181, "grad_norm": 1.9078603982925415, "learning_rate": 3.890562248995984e-06, "loss": 0.6102, "step": 775 }, { "epoch": 4.698795180722891, "grad_norm": 2.1809115409851074, "learning_rate": 3.91566265060241e-06, "loss": 0.5945, "step": 780 }, { "epoch": 4.728915662650603, "grad_norm": 2.0736207962036133, "learning_rate": 3.940763052208835e-06, "loss": 0.6274, "step": 785 }, { "epoch": 4.759036144578313, "grad_norm": 2.3214478492736816, "learning_rate": 3.9658634538152615e-06, "loss": 0.5932, "step": 790 }, { "epoch": 4.789156626506024, "grad_norm": 1.8077796697616577, "learning_rate": 3.990963855421686e-06, "loss": 0.586, "step": 795 }, { "epoch": 4.8192771084337345, "grad_norm": 1.7880562543869019, "learning_rate": 4.016064257028113e-06, "loss": 0.5747, "step": 800 }, { "epoch": 4.849397590361446, "grad_norm": 1.7393368482589722, "learning_rate": 4.0411646586345385e-06, "loss": 0.5664, "step": 805 }, { "epoch": 4.879518072289157, "grad_norm": 1.9396228790283203, "learning_rate": 4.066265060240964e-06, "loss": 0.5854, "step": 810 }, { "epoch": 4.909638554216867, "grad_norm": 2.0996456146240234, "learning_rate": 4.09136546184739e-06, "loss": 0.6016, "step": 815 }, { "epoch": 4.9397590361445785, "grad_norm": 2.321573495864868, "learning_rate": 4.1164658634538155e-06, "loss": 0.5594, "step": 820 }, { "epoch": 4.969879518072289, "grad_norm": 1.8303910493850708, "learning_rate": 4.141566265060241e-06, "loss": 0.5704, "step": 825 }, { "epoch": 5.0, "grad_norm": 2.0891566276550293, "learning_rate": 4.166666666666667e-06, "loss": 0.5958, "step": 830 }, { "epoch": 5.0, "eval_accuracy": 0.7286669288242233, "eval_auc": 0.7606243181423082, "eval_f1": 0.654308617234469, "eval_loss": 0.5537045001983643, "eval_precision": 0.5835567470956211, "eval_recall": 0.7445838084378563, "eval_runtime": 19.5383, "eval_samples_per_second": 130.155, "eval_steps_per_second": 0.665, "step": 830 }, { "epoch": 5.030120481927711, "grad_norm": 1.9920940399169922, "learning_rate": 4.1917670682730925e-06, "loss": 0.568, "step": 835 }, { "epoch": 5.0602409638554215, "grad_norm": 1.9087443351745605, "learning_rate": 4.216867469879518e-06, "loss": 0.5665, "step": 840 }, { "epoch": 5.090361445783133, "grad_norm": 2.4391868114471436, "learning_rate": 4.241967871485944e-06, "loss": 0.5713, "step": 845 }, { "epoch": 5.120481927710843, "grad_norm": 2.1306042671203613, "learning_rate": 4.26706827309237e-06, "loss": 0.5881, "step": 850 }, { "epoch": 5.150602409638554, "grad_norm": 2.5978026390075684, "learning_rate": 4.292168674698795e-06, "loss": 0.6233, "step": 855 }, { "epoch": 5.180722891566265, "grad_norm": 2.448408603668213, "learning_rate": 4.317269076305222e-06, "loss": 0.5802, "step": 860 }, { "epoch": 5.210843373493976, "grad_norm": 2.4194798469543457, "learning_rate": 4.342369477911647e-06, "loss": 0.5734, "step": 865 }, { "epoch": 5.240963855421687, "grad_norm": 1.9208903312683105, "learning_rate": 4.367469879518073e-06, "loss": 0.5666, "step": 870 }, { "epoch": 5.271084337349397, "grad_norm": 2.0400986671447754, "learning_rate": 4.392570281124498e-06, "loss": 0.5996, "step": 875 }, { "epoch": 5.301204819277109, "grad_norm": 3.45426607131958, "learning_rate": 4.417670682730924e-06, "loss": 0.5885, "step": 880 }, { "epoch": 5.331325301204819, "grad_norm": 2.500495433807373, "learning_rate": 4.442771084337349e-06, "loss": 0.554, "step": 885 }, { "epoch": 5.36144578313253, "grad_norm": 1.7928919792175293, "learning_rate": 4.467871485943775e-06, "loss": 0.5855, "step": 890 }, { "epoch": 5.391566265060241, "grad_norm": 9.793763160705566, "learning_rate": 4.492971887550201e-06, "loss": 0.5825, "step": 895 }, { "epoch": 5.421686746987952, "grad_norm": 2.1107916831970215, "learning_rate": 4.518072289156626e-06, "loss": 0.559, "step": 900 }, { "epoch": 5.451807228915663, "grad_norm": 2.334136962890625, "learning_rate": 4.543172690763053e-06, "loss": 0.532, "step": 905 }, { "epoch": 5.481927710843373, "grad_norm": 2.731076955795288, "learning_rate": 4.568273092369478e-06, "loss": 0.5743, "step": 910 }, { "epoch": 5.5120481927710845, "grad_norm": 2.3347549438476562, "learning_rate": 4.593373493975904e-06, "loss": 0.5561, "step": 915 }, { "epoch": 5.542168674698795, "grad_norm": 2.748056650161743, "learning_rate": 4.618473895582329e-06, "loss": 0.5781, "step": 920 }, { "epoch": 5.572289156626506, "grad_norm": 2.7957303524017334, "learning_rate": 4.6435742971887555e-06, "loss": 0.5558, "step": 925 }, { "epoch": 5.602409638554217, "grad_norm": 1.5970895290374756, "learning_rate": 4.66867469879518e-06, "loss": 0.5483, "step": 930 }, { "epoch": 5.632530120481928, "grad_norm": 2.1057193279266357, "learning_rate": 4.693775100401607e-06, "loss": 0.5366, "step": 935 }, { "epoch": 5.662650602409639, "grad_norm": 1.8413000106811523, "learning_rate": 4.718875502008032e-06, "loss": 0.6011, "step": 940 }, { "epoch": 5.692771084337349, "grad_norm": 2.379899263381958, "learning_rate": 4.743975903614458e-06, "loss": 0.5777, "step": 945 }, { "epoch": 5.72289156626506, "grad_norm": 2.024463415145874, "learning_rate": 4.769076305220884e-06, "loss": 0.5611, "step": 950 }, { "epoch": 5.753012048192771, "grad_norm": 2.208953619003296, "learning_rate": 4.7941767068273095e-06, "loss": 0.5552, "step": 955 }, { "epoch": 5.783132530120482, "grad_norm": 2.498267650604248, "learning_rate": 4.819277108433735e-06, "loss": 0.5749, "step": 960 }, { "epoch": 5.813253012048193, "grad_norm": 1.8806592226028442, "learning_rate": 4.844377510040161e-06, "loss": 0.5642, "step": 965 }, { "epoch": 5.843373493975903, "grad_norm": 2.3786778450012207, "learning_rate": 4.8694779116465866e-06, "loss": 0.5855, "step": 970 }, { "epoch": 5.873493975903615, "grad_norm": 1.818545937538147, "learning_rate": 4.894578313253012e-06, "loss": 0.5445, "step": 975 }, { "epoch": 5.903614457831325, "grad_norm": 2.958966016769409, "learning_rate": 4.919678714859438e-06, "loss": 0.5695, "step": 980 }, { "epoch": 5.933734939759036, "grad_norm": 2.4790141582489014, "learning_rate": 4.944779116465864e-06, "loss": 0.5498, "step": 985 }, { "epoch": 5.9638554216867465, "grad_norm": 2.4448063373565674, "learning_rate": 4.969879518072289e-06, "loss": 0.5355, "step": 990 }, { "epoch": 5.993975903614458, "grad_norm": 1.8333290815353394, "learning_rate": 4.994979919678715e-06, "loss": 0.5499, "step": 995 }, { "epoch": 6.0, "eval_accuracy": 0.739284309870232, "eval_auc": 0.7864267713927076, "eval_f1": 0.6475279106858054, "eval_loss": 0.5208005309104919, "eval_precision": 0.6065737051792829, "eval_recall": 0.6944127708095781, "eval_runtime": 19.8085, "eval_samples_per_second": 128.379, "eval_steps_per_second": 0.656, "step": 996 }, { "epoch": 6.024096385542169, "grad_norm": 1.8895107507705688, "learning_rate": 5.020080321285141e-06, "loss": 0.5548, "step": 1000 }, { "epoch": 6.054216867469879, "grad_norm": 2.6331355571746826, "learning_rate": 5.045180722891567e-06, "loss": 0.5754, "step": 1005 }, { "epoch": 6.0843373493975905, "grad_norm": 2.3016576766967773, "learning_rate": 5.070281124497992e-06, "loss": 0.5569, "step": 1010 }, { "epoch": 6.114457831325301, "grad_norm": 4.576088905334473, "learning_rate": 5.0953815261044185e-06, "loss": 0.5756, "step": 1015 }, { "epoch": 6.144578313253012, "grad_norm": 1.9560896158218384, "learning_rate": 5.120481927710843e-06, "loss": 0.5583, "step": 1020 }, { "epoch": 6.174698795180723, "grad_norm": 2.603187322616577, "learning_rate": 5.14558232931727e-06, "loss": 0.5079, "step": 1025 }, { "epoch": 6.204819277108434, "grad_norm": 1.9385886192321777, "learning_rate": 5.170682730923695e-06, "loss": 0.5322, "step": 1030 }, { "epoch": 6.234939759036145, "grad_norm": 2.257345199584961, "learning_rate": 5.195783132530121e-06, "loss": 0.594, "step": 1035 }, { "epoch": 6.265060240963855, "grad_norm": 1.9930522441864014, "learning_rate": 5.220883534136546e-06, "loss": 0.529, "step": 1040 }, { "epoch": 6.295180722891566, "grad_norm": 2.365818977355957, "learning_rate": 5.2459839357429725e-06, "loss": 0.5586, "step": 1045 }, { "epoch": 6.325301204819277, "grad_norm": 2.0368270874023438, "learning_rate": 5.271084337349398e-06, "loss": 0.5333, "step": 1050 }, { "epoch": 6.355421686746988, "grad_norm": 1.854719877243042, "learning_rate": 5.296184738955824e-06, "loss": 0.5721, "step": 1055 }, { "epoch": 6.385542168674699, "grad_norm": 2.101360321044922, "learning_rate": 5.3212851405622495e-06, "loss": 0.5424, "step": 1060 }, { "epoch": 6.4156626506024095, "grad_norm": 2.0200586318969727, "learning_rate": 5.346385542168675e-06, "loss": 0.5581, "step": 1065 }, { "epoch": 6.445783132530121, "grad_norm": 1.954229474067688, "learning_rate": 5.371485943775101e-06, "loss": 0.546, "step": 1070 }, { "epoch": 6.475903614457831, "grad_norm": 2.5045673847198486, "learning_rate": 5.3965863453815266e-06, "loss": 0.5167, "step": 1075 }, { "epoch": 6.506024096385542, "grad_norm": 2.311288595199585, "learning_rate": 5.421686746987952e-06, "loss": 0.5613, "step": 1080 }, { "epoch": 6.5361445783132535, "grad_norm": 1.9730944633483887, "learning_rate": 5.446787148594378e-06, "loss": 0.5447, "step": 1085 }, { "epoch": 6.566265060240964, "grad_norm": 1.8411797285079956, "learning_rate": 5.471887550200804e-06, "loss": 0.5209, "step": 1090 }, { "epoch": 6.596385542168675, "grad_norm": 3.7007882595062256, "learning_rate": 5.496987951807229e-06, "loss": 0.5771, "step": 1095 }, { "epoch": 6.626506024096385, "grad_norm": 2.768594264984131, "learning_rate": 5.522088353413655e-06, "loss": 0.5304, "step": 1100 }, { "epoch": 6.656626506024097, "grad_norm": 2.585475444793701, "learning_rate": 5.547188755020081e-06, "loss": 0.5882, "step": 1105 }, { "epoch": 6.686746987951807, "grad_norm": 1.8738479614257812, "learning_rate": 5.572289156626506e-06, "loss": 0.5168, "step": 1110 }, { "epoch": 6.716867469879518, "grad_norm": 2.3905439376831055, "learning_rate": 5.597389558232932e-06, "loss": 0.5593, "step": 1115 }, { "epoch": 6.746987951807229, "grad_norm": 2.611729860305786, "learning_rate": 5.622489959839358e-06, "loss": 0.5324, "step": 1120 }, { "epoch": 6.77710843373494, "grad_norm": 2.1663403511047363, "learning_rate": 5.647590361445783e-06, "loss": 0.5665, "step": 1125 }, { "epoch": 6.807228915662651, "grad_norm": 4.150970935821533, "learning_rate": 5.672690763052209e-06, "loss": 0.5385, "step": 1130 }, { "epoch": 6.837349397590361, "grad_norm": 2.477626323699951, "learning_rate": 5.697791164658635e-06, "loss": 0.5355, "step": 1135 }, { "epoch": 6.867469879518072, "grad_norm": 1.951925277709961, "learning_rate": 5.72289156626506e-06, "loss": 0.5418, "step": 1140 }, { "epoch": 6.897590361445783, "grad_norm": 2.934628963470459, "learning_rate": 5.747991967871486e-06, "loss": 0.5653, "step": 1145 }, { "epoch": 6.927710843373494, "grad_norm": 2.06850004196167, "learning_rate": 5.7730923694779125e-06, "loss": 0.5277, "step": 1150 }, { "epoch": 6.957831325301205, "grad_norm": 1.8905904293060303, "learning_rate": 5.798192771084337e-06, "loss": 0.5489, "step": 1155 }, { "epoch": 6.9879518072289155, "grad_norm": 2.704571008682251, "learning_rate": 5.823293172690764e-06, "loss": 0.5467, "step": 1160 }, { "epoch": 7.0, "eval_accuracy": 0.7561934722768384, "eval_auc": 0.7969008584049355, "eval_f1": 0.6544035674470458, "eval_loss": 0.4929519295692444, "eval_precision": 0.6401308615049073, "eval_recall": 0.669327251995439, "eval_runtime": 19.5191, "eval_samples_per_second": 130.283, "eval_steps_per_second": 0.666, "step": 1162 }, { "epoch": 7.018072289156627, "grad_norm": 2.4509992599487305, "learning_rate": 5.848393574297189e-06, "loss": 0.5328, "step": 1165 }, { "epoch": 7.048192771084337, "grad_norm": 2.994516372680664, "learning_rate": 5.873493975903615e-06, "loss": 0.5498, "step": 1170 }, { "epoch": 7.078313253012048, "grad_norm": 1.663866400718689, "learning_rate": 5.89859437751004e-06, "loss": 0.5231, "step": 1175 }, { "epoch": 7.108433734939759, "grad_norm": 2.1491575241088867, "learning_rate": 5.9236947791164665e-06, "loss": 0.5392, "step": 1180 }, { "epoch": 7.13855421686747, "grad_norm": 1.8787453174591064, "learning_rate": 5.948795180722891e-06, "loss": 0.5417, "step": 1185 }, { "epoch": 7.168674698795181, "grad_norm": 2.0878076553344727, "learning_rate": 5.973895582329318e-06, "loss": 0.5329, "step": 1190 }, { "epoch": 7.198795180722891, "grad_norm": 1.762899398803711, "learning_rate": 5.998995983935743e-06, "loss": 0.5118, "step": 1195 }, { "epoch": 7.228915662650603, "grad_norm": 2.437101125717163, "learning_rate": 6.024096385542169e-06, "loss": 0.5524, "step": 1200 }, { "epoch": 7.259036144578313, "grad_norm": 2.0478458404541016, "learning_rate": 6.049196787148595e-06, "loss": 0.5273, "step": 1205 }, { "epoch": 7.289156626506024, "grad_norm": 2.2171614170074463, "learning_rate": 6.074297188755021e-06, "loss": 0.5624, "step": 1210 }, { "epoch": 7.3192771084337345, "grad_norm": 2.5147898197174072, "learning_rate": 6.099397590361446e-06, "loss": 0.5172, "step": 1215 }, { "epoch": 7.349397590361446, "grad_norm": 1.7457013130187988, "learning_rate": 6.124497991967872e-06, "loss": 0.5085, "step": 1220 }, { "epoch": 7.379518072289157, "grad_norm": 2.980337142944336, "learning_rate": 6.149598393574298e-06, "loss": 0.5385, "step": 1225 }, { "epoch": 7.409638554216867, "grad_norm": 2.7685298919677734, "learning_rate": 6.174698795180723e-06, "loss": 0.4912, "step": 1230 }, { "epoch": 7.4397590361445785, "grad_norm": 2.4463353157043457, "learning_rate": 6.199799196787149e-06, "loss": 0.516, "step": 1235 }, { "epoch": 7.469879518072289, "grad_norm": 2.2994775772094727, "learning_rate": 6.224899598393575e-06, "loss": 0.5013, "step": 1240 }, { "epoch": 7.5, "grad_norm": 2.0714969635009766, "learning_rate": 6.25e-06, "loss": 0.5595, "step": 1245 }, { "epoch": 7.530120481927711, "grad_norm": 1.9828314781188965, "learning_rate": 6.275100401606426e-06, "loss": 0.5589, "step": 1250 }, { "epoch": 7.5602409638554215, "grad_norm": 3.4310250282287598, "learning_rate": 6.3002008032128525e-06, "loss": 0.5351, "step": 1255 }, { "epoch": 7.590361445783133, "grad_norm": 3.5097134113311768, "learning_rate": 6.325301204819277e-06, "loss": 0.5621, "step": 1260 }, { "epoch": 7.620481927710843, "grad_norm": 3.3920838832855225, "learning_rate": 6.350401606425703e-06, "loss": 0.5364, "step": 1265 }, { "epoch": 7.650602409638554, "grad_norm": 2.096998453140259, "learning_rate": 6.375502008032129e-06, "loss": 0.5467, "step": 1270 }, { "epoch": 7.6807228915662655, "grad_norm": 2.3254811763763428, "learning_rate": 6.400602409638555e-06, "loss": 0.5144, "step": 1275 }, { "epoch": 7.710843373493976, "grad_norm": 2.36531400680542, "learning_rate": 6.42570281124498e-06, "loss": 0.5264, "step": 1280 }, { "epoch": 7.740963855421687, "grad_norm": 2.1447479724884033, "learning_rate": 6.450803212851406e-06, "loss": 0.5059, "step": 1285 }, { "epoch": 7.771084337349397, "grad_norm": 2.3711814880371094, "learning_rate": 6.475903614457831e-06, "loss": 0.5012, "step": 1290 }, { "epoch": 7.801204819277109, "grad_norm": 1.8993377685546875, "learning_rate": 6.501004016064258e-06, "loss": 0.5117, "step": 1295 }, { "epoch": 7.831325301204819, "grad_norm": 2.4439542293548584, "learning_rate": 6.5261044176706836e-06, "loss": 0.5215, "step": 1300 }, { "epoch": 7.86144578313253, "grad_norm": 2.124606132507324, "learning_rate": 6.551204819277108e-06, "loss": 0.5428, "step": 1305 }, { "epoch": 7.891566265060241, "grad_norm": 2.4091856479644775, "learning_rate": 6.576305220883534e-06, "loss": 0.5675, "step": 1310 }, { "epoch": 7.921686746987952, "grad_norm": 2.2007946968078613, "learning_rate": 6.6014056224899606e-06, "loss": 0.5338, "step": 1315 }, { "epoch": 7.951807228915663, "grad_norm": 2.244727373123169, "learning_rate": 6.626506024096386e-06, "loss": 0.5405, "step": 1320 }, { "epoch": 7.981927710843373, "grad_norm": 2.5336217880249023, "learning_rate": 6.651606425702811e-06, "loss": 0.5484, "step": 1325 }, { "epoch": 8.0, "eval_accuracy": 0.7758552890287063, "eval_auc": 0.8200775178942729, "eval_f1": 0.6925566343042071, "eval_loss": 0.4728256165981293, "eval_precision": 0.6571136131013307, "eval_recall": 0.7320410490307868, "eval_runtime": 19.4979, "eval_samples_per_second": 130.424, "eval_steps_per_second": 0.667, "step": 1328 }, { "epoch": 8.012048192771084, "grad_norm": 2.269305944442749, "learning_rate": 6.676706827309237e-06, "loss": 0.4996, "step": 1330 }, { "epoch": 8.042168674698795, "grad_norm": 2.1622703075408936, "learning_rate": 6.701807228915663e-06, "loss": 0.5459, "step": 1335 }, { "epoch": 8.072289156626505, "grad_norm": 2.5812857151031494, "learning_rate": 6.726907630522089e-06, "loss": 0.5579, "step": 1340 }, { "epoch": 8.102409638554217, "grad_norm": 3.5632548332214355, "learning_rate": 6.7520080321285155e-06, "loss": 0.4909, "step": 1345 }, { "epoch": 8.132530120481928, "grad_norm": 1.9275950193405151, "learning_rate": 6.7771084337349394e-06, "loss": 0.4672, "step": 1350 }, { "epoch": 8.162650602409638, "grad_norm": 2.0507545471191406, "learning_rate": 6.802208835341366e-06, "loss": 0.4888, "step": 1355 }, { "epoch": 8.19277108433735, "grad_norm": 2.0971333980560303, "learning_rate": 6.827309236947792e-06, "loss": 0.5421, "step": 1360 }, { "epoch": 8.22289156626506, "grad_norm": 2.3234474658966064, "learning_rate": 6.852409638554218e-06, "loss": 0.5164, "step": 1365 }, { "epoch": 8.25301204819277, "grad_norm": 2.625166654586792, "learning_rate": 6.877510040160642e-06, "loss": 0.525, "step": 1370 }, { "epoch": 8.283132530120483, "grad_norm": 3.066610097885132, "learning_rate": 6.902610441767069e-06, "loss": 0.4918, "step": 1375 }, { "epoch": 8.313253012048193, "grad_norm": 2.1544108390808105, "learning_rate": 6.927710843373494e-06, "loss": 0.5355, "step": 1380 }, { "epoch": 8.343373493975903, "grad_norm": 2.05570387840271, "learning_rate": 6.95281124497992e-06, "loss": 0.5114, "step": 1385 }, { "epoch": 8.373493975903614, "grad_norm": 3.5560357570648193, "learning_rate": 6.977911646586345e-06, "loss": 0.5201, "step": 1390 }, { "epoch": 8.403614457831326, "grad_norm": 2.2558846473693848, "learning_rate": 7.003012048192771e-06, "loss": 0.5337, "step": 1395 }, { "epoch": 8.433734939759036, "grad_norm": 2.5230071544647217, "learning_rate": 7.028112449799197e-06, "loss": 0.5308, "step": 1400 }, { "epoch": 8.463855421686747, "grad_norm": 2.2817351818084717, "learning_rate": 7.053212851405623e-06, "loss": 0.53, "step": 1405 }, { "epoch": 8.493975903614459, "grad_norm": 1.9256802797317505, "learning_rate": 7.078313253012049e-06, "loss": 0.5086, "step": 1410 }, { "epoch": 8.524096385542169, "grad_norm": 2.562375068664551, "learning_rate": 7.103413654618474e-06, "loss": 0.5701, "step": 1415 }, { "epoch": 8.55421686746988, "grad_norm": 3.425631523132324, "learning_rate": 7.1285140562249e-06, "loss": 0.5305, "step": 1420 }, { "epoch": 8.58433734939759, "grad_norm": 2.4099252223968506, "learning_rate": 7.153614457831325e-06, "loss": 0.4723, "step": 1425 }, { "epoch": 8.614457831325302, "grad_norm": 1.8401768207550049, "learning_rate": 7.178714859437752e-06, "loss": 0.4943, "step": 1430 }, { "epoch": 8.644578313253012, "grad_norm": 2.6531455516815186, "learning_rate": 7.203815261044177e-06, "loss": 0.4671, "step": 1435 }, { "epoch": 8.674698795180722, "grad_norm": 1.9509261846542358, "learning_rate": 7.228915662650602e-06, "loss": 0.5076, "step": 1440 }, { "epoch": 8.704819277108435, "grad_norm": 2.1042916774749756, "learning_rate": 7.254016064257028e-06, "loss": 0.5346, "step": 1445 }, { "epoch": 8.734939759036145, "grad_norm": 3.418381929397583, "learning_rate": 7.279116465863455e-06, "loss": 0.4955, "step": 1450 }, { "epoch": 8.765060240963855, "grad_norm": 1.7887992858886719, "learning_rate": 7.30421686746988e-06, "loss": 0.5363, "step": 1455 }, { "epoch": 8.795180722891565, "grad_norm": 1.8168081045150757, "learning_rate": 7.329317269076305e-06, "loss": 0.5093, "step": 1460 }, { "epoch": 8.825301204819278, "grad_norm": 2.2657430171966553, "learning_rate": 7.354417670682731e-06, "loss": 0.5352, "step": 1465 }, { "epoch": 8.855421686746988, "grad_norm": 3.2351300716400146, "learning_rate": 7.379518072289157e-06, "loss": 0.4678, "step": 1470 }, { "epoch": 8.885542168674698, "grad_norm": 2.0001509189605713, "learning_rate": 7.404618473895583e-06, "loss": 0.5276, "step": 1475 }, { "epoch": 8.91566265060241, "grad_norm": 3.1765055656433105, "learning_rate": 7.429718875502008e-06, "loss": 0.5595, "step": 1480 }, { "epoch": 8.94578313253012, "grad_norm": 2.8290090560913086, "learning_rate": 7.4548192771084335e-06, "loss": 0.5012, "step": 1485 }, { "epoch": 8.975903614457831, "grad_norm": 1.8865560293197632, "learning_rate": 7.47991967871486e-06, "loss": 0.5722, "step": 1490 }, { "epoch": 9.0, "eval_accuracy": 0.7896185607550138, "eval_auc": 0.8347310417895779, "eval_f1": 0.7055586130985141, "eval_loss": 0.4508674442768097, "eval_precision": 0.6819148936170213, "eval_recall": 0.7309007981755986, "eval_runtime": 19.0422, "eval_samples_per_second": 133.546, "eval_steps_per_second": 0.683, "step": 1494 }, { "epoch": 9.006024096385541, "grad_norm": 3.6126842498779297, "learning_rate": 7.505020080321286e-06, "loss": 0.557, "step": 1495 }, { "epoch": 9.036144578313253, "grad_norm": 2.2869274616241455, "learning_rate": 7.530120481927712e-06, "loss": 0.5249, "step": 1500 }, { "epoch": 9.066265060240964, "grad_norm": 1.995977520942688, "learning_rate": 7.555220883534136e-06, "loss": 0.4981, "step": 1505 }, { "epoch": 9.096385542168674, "grad_norm": 3.088841676712036, "learning_rate": 7.580321285140563e-06, "loss": 0.5257, "step": 1510 }, { "epoch": 9.126506024096386, "grad_norm": 2.138551950454712, "learning_rate": 7.605421686746988e-06, "loss": 0.5238, "step": 1515 }, { "epoch": 9.156626506024097, "grad_norm": 2.013193130493164, "learning_rate": 7.630522088353415e-06, "loss": 0.5156, "step": 1520 }, { "epoch": 9.186746987951807, "grad_norm": 1.7738057374954224, "learning_rate": 7.655622489959839e-06, "loss": 0.4972, "step": 1525 }, { "epoch": 9.216867469879517, "grad_norm": 2.0512638092041016, "learning_rate": 7.680722891566265e-06, "loss": 0.4875, "step": 1530 }, { "epoch": 9.24698795180723, "grad_norm": 2.8747947216033936, "learning_rate": 7.705823293172692e-06, "loss": 0.5175, "step": 1535 }, { "epoch": 9.27710843373494, "grad_norm": 2.159099578857422, "learning_rate": 7.730923694779118e-06, "loss": 0.5318, "step": 1540 }, { "epoch": 9.30722891566265, "grad_norm": 2.510629653930664, "learning_rate": 7.756024096385543e-06, "loss": 0.4795, "step": 1545 }, { "epoch": 9.337349397590362, "grad_norm": 2.352501392364502, "learning_rate": 7.781124497991967e-06, "loss": 0.4842, "step": 1550 }, { "epoch": 9.367469879518072, "grad_norm": 2.652738571166992, "learning_rate": 7.806224899598395e-06, "loss": 0.5417, "step": 1555 }, { "epoch": 9.397590361445783, "grad_norm": 2.8062024116516113, "learning_rate": 7.83132530120482e-06, "loss": 0.4939, "step": 1560 }, { "epoch": 9.427710843373493, "grad_norm": 3.1697118282318115, "learning_rate": 7.856425702811246e-06, "loss": 0.4914, "step": 1565 }, { "epoch": 9.457831325301205, "grad_norm": 2.109936237335205, "learning_rate": 7.88152610441767e-06, "loss": 0.5043, "step": 1570 }, { "epoch": 9.487951807228916, "grad_norm": 1.8554120063781738, "learning_rate": 7.906626506024097e-06, "loss": 0.5032, "step": 1575 }, { "epoch": 9.518072289156626, "grad_norm": 2.0720715522766113, "learning_rate": 7.931726907630523e-06, "loss": 0.5306, "step": 1580 }, { "epoch": 9.548192771084338, "grad_norm": 1.990857720375061, "learning_rate": 7.956827309236949e-06, "loss": 0.5114, "step": 1585 }, { "epoch": 9.578313253012048, "grad_norm": 2.719435691833496, "learning_rate": 7.981927710843373e-06, "loss": 0.5023, "step": 1590 }, { "epoch": 9.608433734939759, "grad_norm": 1.8556512594223022, "learning_rate": 8.0070281124498e-06, "loss": 0.5046, "step": 1595 }, { "epoch": 9.638554216867469, "grad_norm": 3.1273837089538574, "learning_rate": 8.032128514056226e-06, "loss": 0.5234, "step": 1600 }, { "epoch": 9.668674698795181, "grad_norm": 2.0694265365600586, "learning_rate": 8.057228915662651e-06, "loss": 0.4927, "step": 1605 }, { "epoch": 9.698795180722891, "grad_norm": 1.7405294179916382, "learning_rate": 8.082329317269077e-06, "loss": 0.469, "step": 1610 }, { "epoch": 9.728915662650602, "grad_norm": 4.201533794403076, "learning_rate": 8.107429718875503e-06, "loss": 0.5196, "step": 1615 }, { "epoch": 9.759036144578314, "grad_norm": 2.02896785736084, "learning_rate": 8.132530120481928e-06, "loss": 0.5044, "step": 1620 }, { "epoch": 9.789156626506024, "grad_norm": 2.5728471279144287, "learning_rate": 8.157630522088354e-06, "loss": 0.5155, "step": 1625 }, { "epoch": 9.819277108433734, "grad_norm": 2.0008513927459717, "learning_rate": 8.18273092369478e-06, "loss": 0.5, "step": 1630 }, { "epoch": 9.849397590361447, "grad_norm": 2.3800718784332275, "learning_rate": 8.207831325301205e-06, "loss": 0.4815, "step": 1635 }, { "epoch": 9.879518072289157, "grad_norm": 1.8371376991271973, "learning_rate": 8.232931726907631e-06, "loss": 0.4782, "step": 1640 }, { "epoch": 9.909638554216867, "grad_norm": 3.298999309539795, "learning_rate": 8.258032128514057e-06, "loss": 0.5582, "step": 1645 }, { "epoch": 9.939759036144578, "grad_norm": 2.420517683029175, "learning_rate": 8.283132530120482e-06, "loss": 0.4519, "step": 1650 }, { "epoch": 9.96987951807229, "grad_norm": 2.1421029567718506, "learning_rate": 8.308232931726908e-06, "loss": 0.5498, "step": 1655 }, { "epoch": 10.0, "grad_norm": 3.2856643199920654, "learning_rate": 8.333333333333334e-06, "loss": 0.4841, "step": 1660 }, { "epoch": 10.0, "eval_accuracy": 0.782147070389304, "eval_auc": 0.8479315329324432, "eval_f1": 0.7123572170301142, "eval_loss": 0.44875216484069824, "eval_precision": 0.6539561487130601, "eval_recall": 0.7822120866590649, "eval_runtime": 19.4068, "eval_samples_per_second": 131.037, "eval_steps_per_second": 0.67, "step": 1660 }, { "epoch": 10.03012048192771, "grad_norm": 2.615487813949585, "learning_rate": 8.35843373493976e-06, "loss": 0.5128, "step": 1665 }, { "epoch": 10.060240963855422, "grad_norm": 2.062527894973755, "learning_rate": 8.383534136546185e-06, "loss": 0.485, "step": 1670 }, { "epoch": 10.090361445783133, "grad_norm": 2.5456483364105225, "learning_rate": 8.40863453815261e-06, "loss": 0.5274, "step": 1675 }, { "epoch": 10.120481927710843, "grad_norm": 2.4075815677642822, "learning_rate": 8.433734939759036e-06, "loss": 0.5046, "step": 1680 }, { "epoch": 10.150602409638553, "grad_norm": 2.2953052520751953, "learning_rate": 8.458835341365462e-06, "loss": 0.4945, "step": 1685 }, { "epoch": 10.180722891566266, "grad_norm": 2.051063060760498, "learning_rate": 8.483935742971888e-06, "loss": 0.4961, "step": 1690 }, { "epoch": 10.210843373493976, "grad_norm": 2.426858425140381, "learning_rate": 8.509036144578313e-06, "loss": 0.4804, "step": 1695 }, { "epoch": 10.240963855421686, "grad_norm": 2.5486855506896973, "learning_rate": 8.53413654618474e-06, "loss": 0.4972, "step": 1700 }, { "epoch": 10.271084337349398, "grad_norm": 2.055622100830078, "learning_rate": 8.559236947791165e-06, "loss": 0.4917, "step": 1705 }, { "epoch": 10.301204819277109, "grad_norm": 1.8970571756362915, "learning_rate": 8.58433734939759e-06, "loss": 0.4663, "step": 1710 }, { "epoch": 10.331325301204819, "grad_norm": 2.2279303073883057, "learning_rate": 8.609437751004016e-06, "loss": 0.5197, "step": 1715 }, { "epoch": 10.36144578313253, "grad_norm": 2.70774245262146, "learning_rate": 8.634538152610444e-06, "loss": 0.4919, "step": 1720 }, { "epoch": 10.391566265060241, "grad_norm": 2.5405726432800293, "learning_rate": 8.659638554216867e-06, "loss": 0.4688, "step": 1725 }, { "epoch": 10.421686746987952, "grad_norm": 2.2970728874206543, "learning_rate": 8.684738955823293e-06, "loss": 0.4864, "step": 1730 }, { "epoch": 10.451807228915662, "grad_norm": 2.4628303050994873, "learning_rate": 8.709839357429719e-06, "loss": 0.476, "step": 1735 }, { "epoch": 10.481927710843374, "grad_norm": 2.983382225036621, "learning_rate": 8.734939759036146e-06, "loss": 0.5057, "step": 1740 }, { "epoch": 10.512048192771084, "grad_norm": 2.126538038253784, "learning_rate": 8.760040160642572e-06, "loss": 0.5023, "step": 1745 }, { "epoch": 10.542168674698795, "grad_norm": 2.560659408569336, "learning_rate": 8.785140562248996e-06, "loss": 0.4987, "step": 1750 }, { "epoch": 10.572289156626507, "grad_norm": 2.0805766582489014, "learning_rate": 8.810240963855422e-06, "loss": 0.5026, "step": 1755 }, { "epoch": 10.602409638554217, "grad_norm": 1.8629183769226074, "learning_rate": 8.835341365461847e-06, "loss": 0.4684, "step": 1760 }, { "epoch": 10.632530120481928, "grad_norm": 2.280298948287964, "learning_rate": 8.860441767068275e-06, "loss": 0.4979, "step": 1765 }, { "epoch": 10.662650602409638, "grad_norm": 2.412539005279541, "learning_rate": 8.885542168674699e-06, "loss": 0.4823, "step": 1770 }, { "epoch": 10.69277108433735, "grad_norm": 2.6285996437072754, "learning_rate": 8.910642570281124e-06, "loss": 0.486, "step": 1775 }, { "epoch": 10.72289156626506, "grad_norm": 2.4618632793426514, "learning_rate": 8.93574297188755e-06, "loss": 0.4472, "step": 1780 }, { "epoch": 10.75301204819277, "grad_norm": 2.290416955947876, "learning_rate": 8.960843373493977e-06, "loss": 0.4769, "step": 1785 }, { "epoch": 10.783132530120483, "grad_norm": 3.2826364040374756, "learning_rate": 8.985943775100401e-06, "loss": 0.4776, "step": 1790 }, { "epoch": 10.813253012048193, "grad_norm": 2.097832441329956, "learning_rate": 9.011044176706827e-06, "loss": 0.5232, "step": 1795 }, { "epoch": 10.843373493975903, "grad_norm": 2.799823045730591, "learning_rate": 9.036144578313253e-06, "loss": 0.5045, "step": 1800 }, { "epoch": 10.873493975903614, "grad_norm": 2.0956716537475586, "learning_rate": 9.06124497991968e-06, "loss": 0.4784, "step": 1805 }, { "epoch": 10.903614457831326, "grad_norm": 3.0392680168151855, "learning_rate": 9.086345381526106e-06, "loss": 0.486, "step": 1810 }, { "epoch": 10.933734939759036, "grad_norm": 3.4051640033721924, "learning_rate": 9.11144578313253e-06, "loss": 0.4957, "step": 1815 }, { "epoch": 10.963855421686747, "grad_norm": 4.216493606567383, "learning_rate": 9.136546184738955e-06, "loss": 0.4817, "step": 1820 }, { "epoch": 10.993975903614459, "grad_norm": 3.6580519676208496, "learning_rate": 9.161646586345383e-06, "loss": 0.5234, "step": 1825 }, { "epoch": 11.0, "eval_accuracy": 0.7809673613841919, "eval_auc": 0.851010415568736, "eval_f1": 0.7041954328199681, "eval_loss": 0.44431841373443604, "eval_precision": 0.6590457256461233, "eval_recall": 0.7559863169897377, "eval_runtime": 20.0651, "eval_samples_per_second": 126.738, "eval_steps_per_second": 0.648, "step": 1826 }, { "epoch": 11.024096385542169, "grad_norm": 2.170461654663086, "learning_rate": 9.186746987951808e-06, "loss": 0.4666, "step": 1830 }, { "epoch": 11.05421686746988, "grad_norm": 1.8590593338012695, "learning_rate": 9.211847389558232e-06, "loss": 0.4547, "step": 1835 }, { "epoch": 11.08433734939759, "grad_norm": 1.9973385334014893, "learning_rate": 9.236947791164658e-06, "loss": 0.4707, "step": 1840 }, { "epoch": 11.114457831325302, "grad_norm": 2.076169013977051, "learning_rate": 9.262048192771085e-06, "loss": 0.5125, "step": 1845 }, { "epoch": 11.144578313253012, "grad_norm": 2.7113735675811768, "learning_rate": 9.287148594377511e-06, "loss": 0.4566, "step": 1850 }, { "epoch": 11.174698795180722, "grad_norm": 3.533003330230713, "learning_rate": 9.312248995983937e-06, "loss": 0.4702, "step": 1855 }, { "epoch": 11.204819277108435, "grad_norm": 2.180222749710083, "learning_rate": 9.33734939759036e-06, "loss": 0.4937, "step": 1860 }, { "epoch": 11.234939759036145, "grad_norm": 3.8985979557037354, "learning_rate": 9.362449799196788e-06, "loss": 0.4821, "step": 1865 }, { "epoch": 11.265060240963855, "grad_norm": 3.7206363677978516, "learning_rate": 9.387550200803214e-06, "loss": 0.4762, "step": 1870 }, { "epoch": 11.295180722891565, "grad_norm": 1.7842859029769897, "learning_rate": 9.41265060240964e-06, "loss": 0.4438, "step": 1875 }, { "epoch": 11.325301204819278, "grad_norm": 2.2433724403381348, "learning_rate": 9.437751004016063e-06, "loss": 0.5085, "step": 1880 }, { "epoch": 11.355421686746988, "grad_norm": 2.3268587589263916, "learning_rate": 9.46285140562249e-06, "loss": 0.4368, "step": 1885 }, { "epoch": 11.385542168674698, "grad_norm": 2.997973918914795, "learning_rate": 9.487951807228916e-06, "loss": 0.4838, "step": 1890 }, { "epoch": 11.41566265060241, "grad_norm": 2.7819128036499023, "learning_rate": 9.513052208835342e-06, "loss": 0.4669, "step": 1895 }, { "epoch": 11.44578313253012, "grad_norm": 2.327488660812378, "learning_rate": 9.538152610441768e-06, "loss": 0.4901, "step": 1900 }, { "epoch": 11.475903614457831, "grad_norm": 2.628075361251831, "learning_rate": 9.563253012048193e-06, "loss": 0.5057, "step": 1905 }, { "epoch": 11.506024096385541, "grad_norm": 2.0853092670440674, "learning_rate": 9.588353413654619e-06, "loss": 0.4993, "step": 1910 }, { "epoch": 11.536144578313253, "grad_norm": 2.37349271774292, "learning_rate": 9.613453815261045e-06, "loss": 0.4565, "step": 1915 }, { "epoch": 11.566265060240964, "grad_norm": 3.0365233421325684, "learning_rate": 9.63855421686747e-06, "loss": 0.4829, "step": 1920 }, { "epoch": 11.596385542168674, "grad_norm": 2.143648147583008, "learning_rate": 9.663654618473896e-06, "loss": 0.4739, "step": 1925 }, { "epoch": 11.626506024096386, "grad_norm": 2.349785327911377, "learning_rate": 9.688755020080322e-06, "loss": 0.5107, "step": 1930 }, { "epoch": 11.656626506024097, "grad_norm": 2.736833333969116, "learning_rate": 9.713855421686747e-06, "loss": 0.4743, "step": 1935 }, { "epoch": 11.686746987951807, "grad_norm": 3.4357142448425293, "learning_rate": 9.738955823293173e-06, "loss": 0.489, "step": 1940 }, { "epoch": 11.716867469879517, "grad_norm": 2.251582145690918, "learning_rate": 9.764056224899599e-06, "loss": 0.4888, "step": 1945 }, { "epoch": 11.74698795180723, "grad_norm": 2.892306089401245, "learning_rate": 9.789156626506024e-06, "loss": 0.5466, "step": 1950 }, { "epoch": 11.77710843373494, "grad_norm": 2.4566543102264404, "learning_rate": 9.81425702811245e-06, "loss": 0.4824, "step": 1955 }, { "epoch": 11.80722891566265, "grad_norm": 1.9853315353393555, "learning_rate": 9.839357429718876e-06, "loss": 0.4504, "step": 1960 }, { "epoch": 11.837349397590362, "grad_norm": 3.7275307178497314, "learning_rate": 9.864457831325302e-06, "loss": 0.462, "step": 1965 }, { "epoch": 11.867469879518072, "grad_norm": 2.083540916442871, "learning_rate": 9.889558232931727e-06, "loss": 0.4787, "step": 1970 }, { "epoch": 11.897590361445783, "grad_norm": 2.22110915184021, "learning_rate": 9.914658634538153e-06, "loss": 0.4848, "step": 1975 }, { "epoch": 11.927710843373493, "grad_norm": 2.9532299041748047, "learning_rate": 9.939759036144579e-06, "loss": 0.4916, "step": 1980 }, { "epoch": 11.957831325301205, "grad_norm": 3.3378384113311768, "learning_rate": 9.964859437751004e-06, "loss": 0.5118, "step": 1985 }, { "epoch": 11.987951807228916, "grad_norm": 3.9365689754486084, "learning_rate": 9.98995983935743e-06, "loss": 0.5137, "step": 1990 }, { "epoch": 12.0, "eval_accuracy": 0.7605190719622493, "eval_auc": 0.8553674605532064, "eval_f1": 0.7112375533428165, "eval_loss": 0.47835391759872437, "eval_precision": 0.6087662337662337, "eval_recall": 0.855188141391106, "eval_runtime": 17.1677, "eval_samples_per_second": 148.127, "eval_steps_per_second": 0.757, "step": 1992 }, { "epoch": 12.018072289156626, "grad_norm": 2.984025478363037, "learning_rate": 1.0015060240963856e-05, "loss": 0.5233, "step": 1995 }, { "epoch": 12.048192771084338, "grad_norm": 4.041134357452393, "learning_rate": 1.0040160642570281e-05, "loss": 0.5057, "step": 2000 }, { "epoch": 12.078313253012048, "grad_norm": 1.9200129508972168, "learning_rate": 1.0065261044176707e-05, "loss": 0.4799, "step": 2005 }, { "epoch": 12.108433734939759, "grad_norm": 1.8479945659637451, "learning_rate": 1.0090361445783134e-05, "loss": 0.4227, "step": 2010 }, { "epoch": 12.13855421686747, "grad_norm": 1.9779216051101685, "learning_rate": 1.0115461847389558e-05, "loss": 0.4581, "step": 2015 }, { "epoch": 12.168674698795181, "grad_norm": 2.1734094619750977, "learning_rate": 1.0140562248995984e-05, "loss": 0.429, "step": 2020 }, { "epoch": 12.198795180722891, "grad_norm": 2.0043869018554688, "learning_rate": 1.016566265060241e-05, "loss": 0.4395, "step": 2025 }, { "epoch": 12.228915662650602, "grad_norm": 3.386418342590332, "learning_rate": 1.0190763052208837e-05, "loss": 0.4714, "step": 2030 }, { "epoch": 12.259036144578314, "grad_norm": 1.8247755765914917, "learning_rate": 1.0215863453815261e-05, "loss": 0.4295, "step": 2035 }, { "epoch": 12.289156626506024, "grad_norm": 2.1945042610168457, "learning_rate": 1.0240963855421687e-05, "loss": 0.4613, "step": 2040 }, { "epoch": 12.319277108433734, "grad_norm": 2.047349214553833, "learning_rate": 1.0266064257028112e-05, "loss": 0.4658, "step": 2045 }, { "epoch": 12.349397590361447, "grad_norm": 2.9103140830993652, "learning_rate": 1.029116465863454e-05, "loss": 0.4641, "step": 2050 }, { "epoch": 12.379518072289157, "grad_norm": 3.0239338874816895, "learning_rate": 1.0316265060240965e-05, "loss": 0.4595, "step": 2055 }, { "epoch": 12.409638554216867, "grad_norm": 2.5606324672698975, "learning_rate": 1.034136546184739e-05, "loss": 0.4078, "step": 2060 }, { "epoch": 12.439759036144578, "grad_norm": 1.9044026136398315, "learning_rate": 1.0366465863453815e-05, "loss": 0.502, "step": 2065 }, { "epoch": 12.46987951807229, "grad_norm": 1.8545125722885132, "learning_rate": 1.0391566265060242e-05, "loss": 0.4857, "step": 2070 }, { "epoch": 12.5, "grad_norm": 2.916778087615967, "learning_rate": 1.0416666666666668e-05, "loss": 0.4664, "step": 2075 }, { "epoch": 12.53012048192771, "grad_norm": 2.8088219165802, "learning_rate": 1.0441767068273092e-05, "loss": 0.4933, "step": 2080 }, { "epoch": 12.560240963855422, "grad_norm": 2.9494364261627197, "learning_rate": 1.0466867469879518e-05, "loss": 0.4708, "step": 2085 }, { "epoch": 12.590361445783133, "grad_norm": 2.231259822845459, "learning_rate": 1.0491967871485945e-05, "loss": 0.4536, "step": 2090 }, { "epoch": 12.620481927710843, "grad_norm": 2.52738356590271, "learning_rate": 1.051706827309237e-05, "loss": 0.4716, "step": 2095 }, { "epoch": 12.650602409638553, "grad_norm": 2.3508124351501465, "learning_rate": 1.0542168674698796e-05, "loss": 0.4922, "step": 2100 }, { "epoch": 12.680722891566266, "grad_norm": 2.4535419940948486, "learning_rate": 1.056726907630522e-05, "loss": 0.4743, "step": 2105 }, { "epoch": 12.710843373493976, "grad_norm": 1.9749757051467896, "learning_rate": 1.0592369477911648e-05, "loss": 0.4867, "step": 2110 }, { "epoch": 12.740963855421686, "grad_norm": 2.2329463958740234, "learning_rate": 1.0617469879518073e-05, "loss": 0.5101, "step": 2115 }, { "epoch": 12.771084337349398, "grad_norm": 2.830862045288086, "learning_rate": 1.0642570281124499e-05, "loss": 0.5009, "step": 2120 }, { "epoch": 12.801204819277109, "grad_norm": 2.107712984085083, "learning_rate": 1.0667670682730923e-05, "loss": 0.505, "step": 2125 }, { "epoch": 12.831325301204819, "grad_norm": 2.4264347553253174, "learning_rate": 1.069277108433735e-05, "loss": 0.4932, "step": 2130 }, { "epoch": 12.861445783132531, "grad_norm": 2.3123373985290527, "learning_rate": 1.0717871485943776e-05, "loss": 0.5024, "step": 2135 }, { "epoch": 12.891566265060241, "grad_norm": 2.1964051723480225, "learning_rate": 1.0742971887550202e-05, "loss": 0.467, "step": 2140 }, { "epoch": 12.921686746987952, "grad_norm": 2.1655547618865967, "learning_rate": 1.0768072289156627e-05, "loss": 0.4637, "step": 2145 }, { "epoch": 12.951807228915662, "grad_norm": 2.684300184249878, "learning_rate": 1.0793172690763053e-05, "loss": 0.4325, "step": 2150 }, { "epoch": 12.981927710843374, "grad_norm": 3.0702404975891113, "learning_rate": 1.0818273092369479e-05, "loss": 0.4745, "step": 2155 }, { "epoch": 13.0, "eval_accuracy": 0.8057412504915454, "eval_auc": 0.8595369732841825, "eval_f1": 0.7366737739872068, "eval_loss": 0.41435614228248596, "eval_precision": 0.6916916916916916, "eval_recall": 0.7879133409350056, "eval_runtime": 18.3313, "eval_samples_per_second": 138.724, "eval_steps_per_second": 0.709, "step": 2158 }, { "epoch": 13.012048192771084, "grad_norm": 1.9328081607818604, "learning_rate": 1.0843373493975904e-05, "loss": 0.4262, "step": 2160 }, { "epoch": 13.042168674698795, "grad_norm": 2.341899871826172, "learning_rate": 1.086847389558233e-05, "loss": 0.4617, "step": 2165 }, { "epoch": 13.072289156626505, "grad_norm": 3.2174510955810547, "learning_rate": 1.0893574297188756e-05, "loss": 0.4322, "step": 2170 }, { "epoch": 13.102409638554217, "grad_norm": 2.326920509338379, "learning_rate": 1.0918674698795181e-05, "loss": 0.4778, "step": 2175 }, { "epoch": 13.132530120481928, "grad_norm": 2.192728281021118, "learning_rate": 1.0943775100401607e-05, "loss": 0.443, "step": 2180 }, { "epoch": 13.162650602409638, "grad_norm": 2.808758497238159, "learning_rate": 1.0968875502008033e-05, "loss": 0.474, "step": 2185 }, { "epoch": 13.19277108433735, "grad_norm": 2.31294846534729, "learning_rate": 1.0993975903614459e-05, "loss": 0.4829, "step": 2190 }, { "epoch": 13.22289156626506, "grad_norm": 1.9450610876083374, "learning_rate": 1.1019076305220884e-05, "loss": 0.4561, "step": 2195 }, { "epoch": 13.25301204819277, "grad_norm": 1.8041836023330688, "learning_rate": 1.104417670682731e-05, "loss": 0.4123, "step": 2200 }, { "epoch": 13.283132530120483, "grad_norm": 2.511220932006836, "learning_rate": 1.1069277108433736e-05, "loss": 0.4709, "step": 2205 }, { "epoch": 13.313253012048193, "grad_norm": 2.2910356521606445, "learning_rate": 1.1094377510040161e-05, "loss": 0.4616, "step": 2210 }, { "epoch": 13.343373493975903, "grad_norm": 2.4841256141662598, "learning_rate": 1.1119477911646587e-05, "loss": 0.4435, "step": 2215 }, { "epoch": 13.373493975903614, "grad_norm": 2.2364025115966797, "learning_rate": 1.1144578313253013e-05, "loss": 0.4511, "step": 2220 }, { "epoch": 13.403614457831326, "grad_norm": 2.3213870525360107, "learning_rate": 1.1169678714859438e-05, "loss": 0.465, "step": 2225 }, { "epoch": 13.433734939759036, "grad_norm": 4.151866436004639, "learning_rate": 1.1194779116465864e-05, "loss": 0.4568, "step": 2230 }, { "epoch": 13.463855421686747, "grad_norm": 2.545639991760254, "learning_rate": 1.121987951807229e-05, "loss": 0.4145, "step": 2235 }, { "epoch": 13.493975903614459, "grad_norm": 2.292720317840576, "learning_rate": 1.1244979919678715e-05, "loss": 0.4596, "step": 2240 }, { "epoch": 13.524096385542169, "grad_norm": 2.5688793659210205, "learning_rate": 1.1270080321285141e-05, "loss": 0.454, "step": 2245 }, { "epoch": 13.55421686746988, "grad_norm": 2.5358495712280273, "learning_rate": 1.1295180722891567e-05, "loss": 0.4683, "step": 2250 }, { "epoch": 13.58433734939759, "grad_norm": 2.0106098651885986, "learning_rate": 1.1320281124497994e-05, "loss": 0.4849, "step": 2255 }, { "epoch": 13.614457831325302, "grad_norm": 2.150888204574585, "learning_rate": 1.1345381526104418e-05, "loss": 0.4287, "step": 2260 }, { "epoch": 13.644578313253012, "grad_norm": 4.17469596862793, "learning_rate": 1.1370481927710844e-05, "loss": 0.462, "step": 2265 }, { "epoch": 13.674698795180722, "grad_norm": 3.2579774856567383, "learning_rate": 1.139558232931727e-05, "loss": 0.4311, "step": 2270 }, { "epoch": 13.704819277108435, "grad_norm": 1.8688870668411255, "learning_rate": 1.1420682730923695e-05, "loss": 0.4558, "step": 2275 }, { "epoch": 13.734939759036145, "grad_norm": 2.2652182579040527, "learning_rate": 1.144578313253012e-05, "loss": 0.4802, "step": 2280 }, { "epoch": 13.765060240963855, "grad_norm": 2.247284412384033, "learning_rate": 1.1470883534136546e-05, "loss": 0.4435, "step": 2285 }, { "epoch": 13.795180722891565, "grad_norm": 1.9053183794021606, "learning_rate": 1.1495983935742972e-05, "loss": 0.4649, "step": 2290 }, { "epoch": 13.825301204819278, "grad_norm": 2.277320623397827, "learning_rate": 1.1521084337349398e-05, "loss": 0.4766, "step": 2295 }, { "epoch": 13.855421686746988, "grad_norm": 2.1285111904144287, "learning_rate": 1.1546184738955825e-05, "loss": 0.4283, "step": 2300 }, { "epoch": 13.885542168674698, "grad_norm": 2.0318009853363037, "learning_rate": 1.1571285140562249e-05, "loss": 0.4696, "step": 2305 }, { "epoch": 13.91566265060241, "grad_norm": 1.9120746850967407, "learning_rate": 1.1596385542168675e-05, "loss": 0.4624, "step": 2310 }, { "epoch": 13.94578313253012, "grad_norm": 2.1122283935546875, "learning_rate": 1.16214859437751e-05, "loss": 0.4681, "step": 2315 }, { "epoch": 13.975903614457831, "grad_norm": 2.152334451675415, "learning_rate": 1.1646586345381528e-05, "loss": 0.4586, "step": 2320 }, { "epoch": 14.0, "eval_accuracy": 0.8183248132127409, "eval_auc": 0.8854995818167631, "eval_f1": 0.7478165938864629, "eval_loss": 0.38780173659324646, "eval_precision": 0.7172774869109948, "eval_recall": 0.7810718358038768, "eval_runtime": 17.2376, "eval_samples_per_second": 147.526, "eval_steps_per_second": 0.754, "step": 2324 }, { "epoch": 14.006024096385541, "grad_norm": 2.3053746223449707, "learning_rate": 1.1671686746987952e-05, "loss": 0.4225, "step": 2325 }, { "epoch": 14.036144578313253, "grad_norm": 2.693596601486206, "learning_rate": 1.1696787148594377e-05, "loss": 0.455, "step": 2330 }, { "epoch": 14.066265060240964, "grad_norm": 3.381603240966797, "learning_rate": 1.1721887550200803e-05, "loss": 0.4546, "step": 2335 }, { "epoch": 14.096385542168674, "grad_norm": 2.240210771560669, "learning_rate": 1.174698795180723e-05, "loss": 0.447, "step": 2340 }, { "epoch": 14.126506024096386, "grad_norm": 1.8732998371124268, "learning_rate": 1.1772088353413656e-05, "loss": 0.4198, "step": 2345 }, { "epoch": 14.156626506024097, "grad_norm": 2.2043216228485107, "learning_rate": 1.179718875502008e-05, "loss": 0.4458, "step": 2350 }, { "epoch": 14.186746987951807, "grad_norm": 2.2034971714019775, "learning_rate": 1.1822289156626506e-05, "loss": 0.4537, "step": 2355 }, { "epoch": 14.216867469879517, "grad_norm": 2.6626431941986084, "learning_rate": 1.1847389558232933e-05, "loss": 0.4189, "step": 2360 }, { "epoch": 14.24698795180723, "grad_norm": 2.321343421936035, "learning_rate": 1.1872489959839359e-05, "loss": 0.4419, "step": 2365 }, { "epoch": 14.27710843373494, "grad_norm": 2.189931631088257, "learning_rate": 1.1897590361445783e-05, "loss": 0.4476, "step": 2370 }, { "epoch": 14.30722891566265, "grad_norm": 2.4229791164398193, "learning_rate": 1.1922690763052208e-05, "loss": 0.4404, "step": 2375 }, { "epoch": 14.337349397590362, "grad_norm": 1.694631814956665, "learning_rate": 1.1947791164658636e-05, "loss": 0.4097, "step": 2380 }, { "epoch": 14.367469879518072, "grad_norm": 2.774210214614868, "learning_rate": 1.1972891566265061e-05, "loss": 0.4623, "step": 2385 }, { "epoch": 14.397590361445783, "grad_norm": 2.141517162322998, "learning_rate": 1.1997991967871485e-05, "loss": 0.4368, "step": 2390 }, { "epoch": 14.427710843373493, "grad_norm": 2.6954643726348877, "learning_rate": 1.2023092369477911e-05, "loss": 0.4576, "step": 2395 }, { "epoch": 14.457831325301205, "grad_norm": 3.5512561798095703, "learning_rate": 1.2048192771084338e-05, "loss": 0.4513, "step": 2400 }, { "epoch": 14.487951807228916, "grad_norm": 2.3789210319519043, "learning_rate": 1.2073293172690764e-05, "loss": 0.4625, "step": 2405 }, { "epoch": 14.518072289156626, "grad_norm": 2.6126534938812256, "learning_rate": 1.209839357429719e-05, "loss": 0.4442, "step": 2410 }, { "epoch": 14.548192771084338, "grad_norm": 2.2687761783599854, "learning_rate": 1.2123493975903614e-05, "loss": 0.4364, "step": 2415 }, { "epoch": 14.578313253012048, "grad_norm": 2.199284791946411, "learning_rate": 1.2148594377510041e-05, "loss": 0.4618, "step": 2420 }, { "epoch": 14.608433734939759, "grad_norm": 2.1206400394439697, "learning_rate": 1.2173694779116467e-05, "loss": 0.4652, "step": 2425 }, { "epoch": 14.638554216867469, "grad_norm": 2.170057535171509, "learning_rate": 1.2198795180722893e-05, "loss": 0.4771, "step": 2430 }, { "epoch": 14.668674698795181, "grad_norm": 2.2524478435516357, "learning_rate": 1.2223895582329316e-05, "loss": 0.4573, "step": 2435 }, { "epoch": 14.698795180722891, "grad_norm": 1.919073462486267, "learning_rate": 1.2248995983935744e-05, "loss": 0.3936, "step": 2440 }, { "epoch": 14.728915662650602, "grad_norm": 2.0246472358703613, "learning_rate": 1.227409638554217e-05, "loss": 0.431, "step": 2445 }, { "epoch": 14.759036144578314, "grad_norm": 4.081275463104248, "learning_rate": 1.2299196787148595e-05, "loss": 0.4491, "step": 2450 }, { "epoch": 14.789156626506024, "grad_norm": 2.2841455936431885, "learning_rate": 1.2324297188755021e-05, "loss": 0.4857, "step": 2455 }, { "epoch": 14.819277108433734, "grad_norm": 3.2180285453796387, "learning_rate": 1.2349397590361447e-05, "loss": 0.4147, "step": 2460 }, { "epoch": 14.849397590361447, "grad_norm": 2.3523502349853516, "learning_rate": 1.2374497991967872e-05, "loss": 0.4693, "step": 2465 }, { "epoch": 14.879518072289157, "grad_norm": 3.6546506881713867, "learning_rate": 1.2399598393574298e-05, "loss": 0.4628, "step": 2470 }, { "epoch": 14.909638554216867, "grad_norm": 1.9689726829528809, "learning_rate": 1.2424698795180724e-05, "loss": 0.4647, "step": 2475 }, { "epoch": 14.939759036144578, "grad_norm": 2.322652578353882, "learning_rate": 1.244979919678715e-05, "loss": 0.4137, "step": 2480 }, { "epoch": 14.96987951807229, "grad_norm": 4.471823215484619, "learning_rate": 1.2474899598393575e-05, "loss": 0.5001, "step": 2485 }, { "epoch": 15.0, "grad_norm": 3.832202911376953, "learning_rate": 1.25e-05, "loss": 0.437, "step": 2490 }, { "epoch": 15.0, "eval_accuracy": 0.8222571765631145, "eval_auc": 0.8772050439331947, "eval_f1": 0.7431818181818182, "eval_loss": 0.38412806391716003, "eval_precision": 0.7406568516421291, "eval_recall": 0.7457240592930444, "eval_runtime": 17.2897, "eval_samples_per_second": 147.082, "eval_steps_per_second": 0.752, "step": 2490 }, { "epoch": 15.03012048192771, "grad_norm": 2.557508945465088, "learning_rate": 1.2525100401606426e-05, "loss": 0.4283, "step": 2495 }, { "epoch": 15.060240963855422, "grad_norm": 2.2491331100463867, "learning_rate": 1.2550200803212852e-05, "loss": 0.4186, "step": 2500 }, { "epoch": 15.090361445783133, "grad_norm": 2.5220303535461426, "learning_rate": 1.257530120481928e-05, "loss": 0.4495, "step": 2505 }, { "epoch": 15.120481927710843, "grad_norm": 2.5314533710479736, "learning_rate": 1.2600401606425705e-05, "loss": 0.4852, "step": 2510 }, { "epoch": 15.150602409638553, "grad_norm": 2.1047887802124023, "learning_rate": 1.2625502008032127e-05, "loss": 0.4289, "step": 2515 }, { "epoch": 15.180722891566266, "grad_norm": 2.526026725769043, "learning_rate": 1.2650602409638555e-05, "loss": 0.409, "step": 2520 }, { "epoch": 15.210843373493976, "grad_norm": 3.2399747371673584, "learning_rate": 1.267570281124498e-05, "loss": 0.4763, "step": 2525 }, { "epoch": 15.240963855421686, "grad_norm": 2.312688112258911, "learning_rate": 1.2700803212851406e-05, "loss": 0.4512, "step": 2530 }, { "epoch": 15.271084337349398, "grad_norm": 2.7564501762390137, "learning_rate": 1.2725903614457832e-05, "loss": 0.4458, "step": 2535 }, { "epoch": 15.301204819277109, "grad_norm": 2.0259008407592773, "learning_rate": 1.2751004016064257e-05, "loss": 0.4129, "step": 2540 }, { "epoch": 15.331325301204819, "grad_norm": 1.8796688318252563, "learning_rate": 1.2776104417670685e-05, "loss": 0.443, "step": 2545 }, { "epoch": 15.36144578313253, "grad_norm": 5.187371730804443, "learning_rate": 1.280120481927711e-05, "loss": 0.426, "step": 2550 }, { "epoch": 15.391566265060241, "grad_norm": 2.6445512771606445, "learning_rate": 1.2826305220883536e-05, "loss": 0.4807, "step": 2555 }, { "epoch": 15.421686746987952, "grad_norm": 2.3695054054260254, "learning_rate": 1.285140562248996e-05, "loss": 0.3946, "step": 2560 }, { "epoch": 15.451807228915662, "grad_norm": 2.0859386920928955, "learning_rate": 1.2876506024096386e-05, "loss": 0.4344, "step": 2565 }, { "epoch": 15.481927710843374, "grad_norm": 2.2627930641174316, "learning_rate": 1.2901606425702811e-05, "loss": 0.4037, "step": 2570 }, { "epoch": 15.512048192771084, "grad_norm": 2.312237024307251, "learning_rate": 1.2926706827309237e-05, "loss": 0.4294, "step": 2575 }, { "epoch": 15.542168674698795, "grad_norm": 2.807929515838623, "learning_rate": 1.2951807228915663e-05, "loss": 0.4332, "step": 2580 }, { "epoch": 15.572289156626507, "grad_norm": 2.819624662399292, "learning_rate": 1.297690763052209e-05, "loss": 0.433, "step": 2585 }, { "epoch": 15.602409638554217, "grad_norm": 2.2831618785858154, "learning_rate": 1.3002008032128516e-05, "loss": 0.4538, "step": 2590 }, { "epoch": 15.632530120481928, "grad_norm": 2.8296332359313965, "learning_rate": 1.3027108433734941e-05, "loss": 0.3969, "step": 2595 }, { "epoch": 15.662650602409638, "grad_norm": 2.7965662479400635, "learning_rate": 1.3052208835341367e-05, "loss": 0.4303, "step": 2600 }, { "epoch": 15.69277108433735, "grad_norm": 2.252262830734253, "learning_rate": 1.3077309236947791e-05, "loss": 0.4252, "step": 2605 }, { "epoch": 15.72289156626506, "grad_norm": 2.0010712146759033, "learning_rate": 1.3102409638554217e-05, "loss": 0.4336, "step": 2610 }, { "epoch": 15.75301204819277, "grad_norm": 2.653491497039795, "learning_rate": 1.3127510040160642e-05, "loss": 0.4209, "step": 2615 }, { "epoch": 15.783132530120483, "grad_norm": 2.3631601333618164, "learning_rate": 1.3152610441767068e-05, "loss": 0.4405, "step": 2620 }, { "epoch": 15.813253012048193, "grad_norm": 1.9908761978149414, "learning_rate": 1.3177710843373495e-05, "loss": 0.4168, "step": 2625 }, { "epoch": 15.843373493975903, "grad_norm": 2.210310697555542, "learning_rate": 1.3202811244979921e-05, "loss": 0.46, "step": 2630 }, { "epoch": 15.873493975903614, "grad_norm": 2.0091185569763184, "learning_rate": 1.3227911646586347e-05, "loss": 0.4702, "step": 2635 }, { "epoch": 15.903614457831326, "grad_norm": 2.4611587524414062, "learning_rate": 1.3253012048192772e-05, "loss": 0.4707, "step": 2640 }, { "epoch": 15.933734939759036, "grad_norm": 2.493058443069458, "learning_rate": 1.3278112449799198e-05, "loss": 0.4458, "step": 2645 }, { "epoch": 15.963855421686747, "grad_norm": 3.501375436782837, "learning_rate": 1.3303212851405622e-05, "loss": 0.4466, "step": 2650 }, { "epoch": 15.993975903614459, "grad_norm": 3.2416062355041504, "learning_rate": 1.3328313253012048e-05, "loss": 0.429, "step": 2655 }, { "epoch": 16.0, "eval_accuracy": 0.8301219032638616, "eval_auc": 0.8901451116364448, "eval_f1": 0.76, "eval_loss": 0.3746268153190613, "eval_precision": 0.7410617551462622, "eval_recall": 0.7799315849486887, "eval_runtime": 17.3366, "eval_samples_per_second": 146.684, "eval_steps_per_second": 0.75, "step": 2656 }, { "epoch": 16.02409638554217, "grad_norm": 2.0801899433135986, "learning_rate": 1.3353413654618473e-05, "loss": 0.4283, "step": 2660 }, { "epoch": 16.05421686746988, "grad_norm": 2.239642858505249, "learning_rate": 1.3378514056224901e-05, "loss": 0.4091, "step": 2665 }, { "epoch": 16.08433734939759, "grad_norm": 2.6524431705474854, "learning_rate": 1.3403614457831327e-05, "loss": 0.4487, "step": 2670 }, { "epoch": 16.1144578313253, "grad_norm": 2.178330183029175, "learning_rate": 1.3428714859437752e-05, "loss": 0.4181, "step": 2675 }, { "epoch": 16.14457831325301, "grad_norm": 1.9692491292953491, "learning_rate": 1.3453815261044178e-05, "loss": 0.3775, "step": 2680 }, { "epoch": 16.174698795180724, "grad_norm": 2.0983362197875977, "learning_rate": 1.3478915662650604e-05, "loss": 0.3768, "step": 2685 }, { "epoch": 16.204819277108435, "grad_norm": 2.7713608741760254, "learning_rate": 1.3504016064257031e-05, "loss": 0.4351, "step": 2690 }, { "epoch": 16.234939759036145, "grad_norm": 2.1661791801452637, "learning_rate": 1.3529116465863453e-05, "loss": 0.4291, "step": 2695 }, { "epoch": 16.265060240963855, "grad_norm": 3.4588308334350586, "learning_rate": 1.3554216867469879e-05, "loss": 0.4601, "step": 2700 }, { "epoch": 16.295180722891565, "grad_norm": 2.1662585735321045, "learning_rate": 1.3579317269076306e-05, "loss": 0.4379, "step": 2705 }, { "epoch": 16.325301204819276, "grad_norm": 4.048949241638184, "learning_rate": 1.3604417670682732e-05, "loss": 0.4661, "step": 2710 }, { "epoch": 16.355421686746986, "grad_norm": 2.671419858932495, "learning_rate": 1.3629518072289158e-05, "loss": 0.4433, "step": 2715 }, { "epoch": 16.3855421686747, "grad_norm": 2.4763331413269043, "learning_rate": 1.3654618473895583e-05, "loss": 0.4544, "step": 2720 }, { "epoch": 16.41566265060241, "grad_norm": 2.3892018795013428, "learning_rate": 1.3679718875502009e-05, "loss": 0.4358, "step": 2725 }, { "epoch": 16.44578313253012, "grad_norm": 2.494692325592041, "learning_rate": 1.3704819277108436e-05, "loss": 0.4141, "step": 2730 }, { "epoch": 16.47590361445783, "grad_norm": 3.030679702758789, "learning_rate": 1.3729919678714859e-05, "loss": 0.4181, "step": 2735 }, { "epoch": 16.50602409638554, "grad_norm": 2.8109540939331055, "learning_rate": 1.3755020080321284e-05, "loss": 0.4175, "step": 2740 }, { "epoch": 16.53614457831325, "grad_norm": 2.295210599899292, "learning_rate": 1.378012048192771e-05, "loss": 0.4048, "step": 2745 }, { "epoch": 16.566265060240966, "grad_norm": 2.345912456512451, "learning_rate": 1.3805220883534137e-05, "loss": 0.4578, "step": 2750 }, { "epoch": 16.596385542168676, "grad_norm": 4.10270357131958, "learning_rate": 1.3830321285140563e-05, "loss": 0.4514, "step": 2755 }, { "epoch": 16.626506024096386, "grad_norm": 3.1970739364624023, "learning_rate": 1.3855421686746989e-05, "loss": 0.4348, "step": 2760 }, { "epoch": 16.656626506024097, "grad_norm": 2.188840866088867, "learning_rate": 1.3880522088353414e-05, "loss": 0.383, "step": 2765 }, { "epoch": 16.686746987951807, "grad_norm": 2.1729989051818848, "learning_rate": 1.390562248995984e-05, "loss": 0.4274, "step": 2770 }, { "epoch": 16.716867469879517, "grad_norm": 2.270476818084717, "learning_rate": 1.3930722891566267e-05, "loss": 0.4234, "step": 2775 }, { "epoch": 16.746987951807228, "grad_norm": 2.173185110092163, "learning_rate": 1.395582329317269e-05, "loss": 0.3879, "step": 2780 }, { "epoch": 16.77710843373494, "grad_norm": 2.8911218643188477, "learning_rate": 1.3980923694779115e-05, "loss": 0.456, "step": 2785 }, { "epoch": 16.80722891566265, "grad_norm": 2.338966131210327, "learning_rate": 1.4006024096385543e-05, "loss": 0.458, "step": 2790 }, { "epoch": 16.837349397590362, "grad_norm": 2.3324530124664307, "learning_rate": 1.4031124497991968e-05, "loss": 0.4686, "step": 2795 }, { "epoch": 16.867469879518072, "grad_norm": 2.3572144508361816, "learning_rate": 1.4056224899598394e-05, "loss": 0.3995, "step": 2800 }, { "epoch": 16.897590361445783, "grad_norm": 1.8775089979171753, "learning_rate": 1.408132530120482e-05, "loss": 0.3767, "step": 2805 }, { "epoch": 16.927710843373493, "grad_norm": 2.193387508392334, "learning_rate": 1.4106425702811245e-05, "loss": 0.4548, "step": 2810 }, { "epoch": 16.957831325301203, "grad_norm": 2.899744987487793, "learning_rate": 1.4131526104417673e-05, "loss": 0.4614, "step": 2815 }, { "epoch": 16.987951807228917, "grad_norm": 2.185055732727051, "learning_rate": 1.4156626506024098e-05, "loss": 0.4286, "step": 2820 }, { "epoch": 17.0, "eval_accuracy": 0.8305151395988989, "eval_auc": 0.8864002841729622, "eval_f1": 0.7598885793871867, "eval_loss": 0.37183433771133423, "eval_precision": 0.7429193899782135, "eval_recall": 0.7776510832383124, "eval_runtime": 19.1171, "eval_samples_per_second": 133.022, "eval_steps_per_second": 0.68, "step": 2822 }, { "epoch": 17.018072289156628, "grad_norm": 2.4193804264068604, "learning_rate": 1.418172690763052e-05, "loss": 0.4336, "step": 2825 }, { "epoch": 17.048192771084338, "grad_norm": 2.2396481037139893, "learning_rate": 1.4206827309236948e-05, "loss": 0.4114, "step": 2830 }, { "epoch": 17.07831325301205, "grad_norm": 3.2262861728668213, "learning_rate": 1.4231927710843374e-05, "loss": 0.4276, "step": 2835 }, { "epoch": 17.10843373493976, "grad_norm": 2.5907819271087646, "learning_rate": 1.42570281124498e-05, "loss": 0.4324, "step": 2840 }, { "epoch": 17.13855421686747, "grad_norm": 2.5259037017822266, "learning_rate": 1.4282128514056225e-05, "loss": 0.4699, "step": 2845 }, { "epoch": 17.16867469879518, "grad_norm": 2.109802007675171, "learning_rate": 1.430722891566265e-05, "loss": 0.3989, "step": 2850 }, { "epoch": 17.198795180722893, "grad_norm": 2.3156588077545166, "learning_rate": 1.4332329317269078e-05, "loss": 0.4175, "step": 2855 }, { "epoch": 17.228915662650603, "grad_norm": 2.192671060562134, "learning_rate": 1.4357429718875504e-05, "loss": 0.3869, "step": 2860 }, { "epoch": 17.259036144578314, "grad_norm": 3.8719916343688965, "learning_rate": 1.438253012048193e-05, "loss": 0.442, "step": 2865 }, { "epoch": 17.289156626506024, "grad_norm": 2.4408538341522217, "learning_rate": 1.4407630522088353e-05, "loss": 0.3831, "step": 2870 }, { "epoch": 17.319277108433734, "grad_norm": 2.5923538208007812, "learning_rate": 1.4432730923694779e-05, "loss": 0.4272, "step": 2875 }, { "epoch": 17.349397590361445, "grad_norm": 2.708015203475952, "learning_rate": 1.4457831325301205e-05, "loss": 0.4667, "step": 2880 }, { "epoch": 17.379518072289155, "grad_norm": 2.7255101203918457, "learning_rate": 1.448293172690763e-05, "loss": 0.4217, "step": 2885 }, { "epoch": 17.40963855421687, "grad_norm": 1.8880677223205566, "learning_rate": 1.4508032128514056e-05, "loss": 0.3921, "step": 2890 }, { "epoch": 17.43975903614458, "grad_norm": 3.514613389968872, "learning_rate": 1.4533132530120484e-05, "loss": 0.4307, "step": 2895 }, { "epoch": 17.46987951807229, "grad_norm": 2.4978322982788086, "learning_rate": 1.455823293172691e-05, "loss": 0.3752, "step": 2900 }, { "epoch": 17.5, "grad_norm": 2.1256654262542725, "learning_rate": 1.4583333333333335e-05, "loss": 0.4139, "step": 2905 }, { "epoch": 17.53012048192771, "grad_norm": 2.6442389488220215, "learning_rate": 1.460843373493976e-05, "loss": 0.4283, "step": 2910 }, { "epoch": 17.56024096385542, "grad_norm": 2.4239025115966797, "learning_rate": 1.4633534136546185e-05, "loss": 0.3972, "step": 2915 }, { "epoch": 17.59036144578313, "grad_norm": 2.8832712173461914, "learning_rate": 1.465863453815261e-05, "loss": 0.4248, "step": 2920 }, { "epoch": 17.620481927710845, "grad_norm": 2.305220603942871, "learning_rate": 1.4683734939759036e-05, "loss": 0.3948, "step": 2925 }, { "epoch": 17.650602409638555, "grad_norm": 3.6476662158966064, "learning_rate": 1.4708835341365462e-05, "loss": 0.427, "step": 2930 }, { "epoch": 17.680722891566266, "grad_norm": 2.280860424041748, "learning_rate": 1.4733935742971889e-05, "loss": 0.431, "step": 2935 }, { "epoch": 17.710843373493976, "grad_norm": 2.509981155395508, "learning_rate": 1.4759036144578315e-05, "loss": 0.423, "step": 2940 }, { "epoch": 17.740963855421686, "grad_norm": 2.111283302307129, "learning_rate": 1.478413654618474e-05, "loss": 0.4201, "step": 2945 }, { "epoch": 17.771084337349397, "grad_norm": 2.2227985858917236, "learning_rate": 1.4809236947791166e-05, "loss": 0.3701, "step": 2950 }, { "epoch": 17.801204819277107, "grad_norm": 2.4401426315307617, "learning_rate": 1.4834337349397592e-05, "loss": 0.425, "step": 2955 }, { "epoch": 17.83132530120482, "grad_norm": 2.3500068187713623, "learning_rate": 1.4859437751004016e-05, "loss": 0.4274, "step": 2960 }, { "epoch": 17.86144578313253, "grad_norm": 2.8407883644104004, "learning_rate": 1.4884538152610441e-05, "loss": 0.4129, "step": 2965 }, { "epoch": 17.89156626506024, "grad_norm": 3.6744883060455322, "learning_rate": 1.4909638554216867e-05, "loss": 0.3994, "step": 2970 }, { "epoch": 17.92168674698795, "grad_norm": 2.0906033515930176, "learning_rate": 1.4934738955823294e-05, "loss": 0.3993, "step": 2975 }, { "epoch": 17.951807228915662, "grad_norm": 2.5579147338867188, "learning_rate": 1.495983935742972e-05, "loss": 0.3937, "step": 2980 }, { "epoch": 17.981927710843372, "grad_norm": 2.253021001815796, "learning_rate": 1.4984939759036146e-05, "loss": 0.3662, "step": 2985 }, { "epoch": 18.0, "eval_accuracy": 0.8462445930003932, "eval_auc": 0.9037278537412684, "eval_f1": 0.7751581368602645, "eval_loss": 0.3433316648006439, "eval_precision": 0.7819025522041764, "eval_recall": 0.7685290763968073, "eval_runtime": 18.8459, "eval_samples_per_second": 134.937, "eval_steps_per_second": 0.69, "step": 2988 }, { "epoch": 18.012048192771083, "grad_norm": 2.6839776039123535, "learning_rate": 1.5010040160642571e-05, "loss": 0.405, "step": 2990 }, { "epoch": 18.042168674698797, "grad_norm": 2.047701120376587, "learning_rate": 1.5035140562248997e-05, "loss": 0.3964, "step": 2995 }, { "epoch": 18.072289156626507, "grad_norm": 2.45595383644104, "learning_rate": 1.5060240963855424e-05, "loss": 0.4076, "step": 3000 }, { "epoch": 18.102409638554217, "grad_norm": 2.435271978378296, "learning_rate": 1.5085341365461847e-05, "loss": 0.4107, "step": 3005 }, { "epoch": 18.132530120481928, "grad_norm": 2.5313150882720947, "learning_rate": 1.5110441767068272e-05, "loss": 0.3662, "step": 3010 }, { "epoch": 18.162650602409638, "grad_norm": 2.0792062282562256, "learning_rate": 1.51355421686747e-05, "loss": 0.3682, "step": 3015 }, { "epoch": 18.19277108433735, "grad_norm": 2.737271308898926, "learning_rate": 1.5160642570281125e-05, "loss": 0.4058, "step": 3020 }, { "epoch": 18.22289156626506, "grad_norm": 2.3708345890045166, "learning_rate": 1.5185742971887551e-05, "loss": 0.4121, "step": 3025 }, { "epoch": 18.253012048192772, "grad_norm": 2.9924778938293457, "learning_rate": 1.5210843373493977e-05, "loss": 0.4572, "step": 3030 }, { "epoch": 18.283132530120483, "grad_norm": 2.2808122634887695, "learning_rate": 1.5235943775100402e-05, "loss": 0.4439, "step": 3035 }, { "epoch": 18.313253012048193, "grad_norm": 2.355435371398926, "learning_rate": 1.526104417670683e-05, "loss": 0.4418, "step": 3040 }, { "epoch": 18.343373493975903, "grad_norm": 2.5382118225097656, "learning_rate": 1.5286144578313255e-05, "loss": 0.3806, "step": 3045 }, { "epoch": 18.373493975903614, "grad_norm": 2.1816794872283936, "learning_rate": 1.5311244979919678e-05, "loss": 0.4308, "step": 3050 }, { "epoch": 18.403614457831324, "grad_norm": 3.9759576320648193, "learning_rate": 1.5336345381526103e-05, "loss": 0.4222, "step": 3055 }, { "epoch": 18.433734939759034, "grad_norm": 1.9446662664413452, "learning_rate": 1.536144578313253e-05, "loss": 0.3724, "step": 3060 }, { "epoch": 18.46385542168675, "grad_norm": 2.1402506828308105, "learning_rate": 1.5386546184738955e-05, "loss": 0.3833, "step": 3065 }, { "epoch": 18.49397590361446, "grad_norm": 2.0538694858551025, "learning_rate": 1.5411646586345384e-05, "loss": 0.4076, "step": 3070 }, { "epoch": 18.52409638554217, "grad_norm": 4.788428783416748, "learning_rate": 1.543674698795181e-05, "loss": 0.3939, "step": 3075 }, { "epoch": 18.55421686746988, "grad_norm": 2.8548316955566406, "learning_rate": 1.5461847389558235e-05, "loss": 0.4389, "step": 3080 }, { "epoch": 18.58433734939759, "grad_norm": 2.549076795578003, "learning_rate": 1.548694779116466e-05, "loss": 0.3949, "step": 3085 }, { "epoch": 18.6144578313253, "grad_norm": 3.204897165298462, "learning_rate": 1.5512048192771086e-05, "loss": 0.4241, "step": 3090 }, { "epoch": 18.644578313253014, "grad_norm": 2.265899181365967, "learning_rate": 1.553714859437751e-05, "loss": 0.4176, "step": 3095 }, { "epoch": 18.674698795180724, "grad_norm": 2.1757800579071045, "learning_rate": 1.5562248995983934e-05, "loss": 0.3993, "step": 3100 }, { "epoch": 18.704819277108435, "grad_norm": 2.321087121963501, "learning_rate": 1.558734939759036e-05, "loss": 0.3926, "step": 3105 }, { "epoch": 18.734939759036145, "grad_norm": 2.9241983890533447, "learning_rate": 1.561244979919679e-05, "loss": 0.3926, "step": 3110 }, { "epoch": 18.765060240963855, "grad_norm": 2.5136919021606445, "learning_rate": 1.5637550200803215e-05, "loss": 0.3915, "step": 3115 }, { "epoch": 18.795180722891565, "grad_norm": 2.293180227279663, "learning_rate": 1.566265060240964e-05, "loss": 0.4226, "step": 3120 }, { "epoch": 18.825301204819276, "grad_norm": 3.1935389041900635, "learning_rate": 1.5687751004016066e-05, "loss": 0.4073, "step": 3125 }, { "epoch": 18.855421686746986, "grad_norm": 2.0698330402374268, "learning_rate": 1.5712851405622492e-05, "loss": 0.4049, "step": 3130 }, { "epoch": 18.8855421686747, "grad_norm": 2.8074769973754883, "learning_rate": 1.5737951807228914e-05, "loss": 0.4053, "step": 3135 }, { "epoch": 18.91566265060241, "grad_norm": 3.0730950832366943, "learning_rate": 1.576305220883534e-05, "loss": 0.4107, "step": 3140 }, { "epoch": 18.94578313253012, "grad_norm": 2.4239420890808105, "learning_rate": 1.5788152610441766e-05, "loss": 0.4033, "step": 3145 }, { "epoch": 18.97590361445783, "grad_norm": 2.209717035293579, "learning_rate": 1.5813253012048195e-05, "loss": 0.4602, "step": 3150 }, { "epoch": 19.0, "eval_accuracy": 0.853716083366103, "eval_auc": 0.912781418154491, "eval_f1": 0.7912457912457912, "eval_loss": 0.33109456300735474, "eval_precision": 0.7790055248618785, "eval_recall": 0.8038768529076397, "eval_runtime": 19.3279, "eval_samples_per_second": 131.571, "eval_steps_per_second": 0.673, "step": 3154 }, { "epoch": 19.00602409638554, "grad_norm": 2.2700791358947754, "learning_rate": 1.583835341365462e-05, "loss": 0.4574, "step": 3155 }, { "epoch": 19.03614457831325, "grad_norm": 1.9966039657592773, "learning_rate": 1.5863453815261046e-05, "loss": 0.3946, "step": 3160 }, { "epoch": 19.066265060240966, "grad_norm": 2.130915403366089, "learning_rate": 1.588855421686747e-05, "loss": 0.3663, "step": 3165 }, { "epoch": 19.096385542168676, "grad_norm": 2.385338306427002, "learning_rate": 1.5913654618473897e-05, "loss": 0.4464, "step": 3170 }, { "epoch": 19.126506024096386, "grad_norm": 2.0878496170043945, "learning_rate": 1.5938755020080323e-05, "loss": 0.3871, "step": 3175 }, { "epoch": 19.156626506024097, "grad_norm": 3.1889541149139404, "learning_rate": 1.5963855421686745e-05, "loss": 0.3963, "step": 3180 }, { "epoch": 19.186746987951807, "grad_norm": 2.2727162837982178, "learning_rate": 1.598895582329317e-05, "loss": 0.3706, "step": 3185 }, { "epoch": 19.216867469879517, "grad_norm": 2.489370107650757, "learning_rate": 1.60140562248996e-05, "loss": 0.4172, "step": 3190 }, { "epoch": 19.246987951807228, "grad_norm": 2.4061601161956787, "learning_rate": 1.6039156626506026e-05, "loss": 0.3973, "step": 3195 }, { "epoch": 19.27710843373494, "grad_norm": 2.163954734802246, "learning_rate": 1.606425702811245e-05, "loss": 0.347, "step": 3200 }, { "epoch": 19.30722891566265, "grad_norm": 1.9165501594543457, "learning_rate": 1.6089357429718877e-05, "loss": 0.3723, "step": 3205 }, { "epoch": 19.337349397590362, "grad_norm": 2.6702020168304443, "learning_rate": 1.6114457831325303e-05, "loss": 0.4096, "step": 3210 }, { "epoch": 19.367469879518072, "grad_norm": 2.3621468544006348, "learning_rate": 1.613955823293173e-05, "loss": 0.3835, "step": 3215 }, { "epoch": 19.397590361445783, "grad_norm": 2.0021228790283203, "learning_rate": 1.6164658634538154e-05, "loss": 0.3666, "step": 3220 }, { "epoch": 19.427710843373493, "grad_norm": 2.663166046142578, "learning_rate": 1.6189759036144576e-05, "loss": 0.4176, "step": 3225 }, { "epoch": 19.457831325301203, "grad_norm": 2.1119351387023926, "learning_rate": 1.6214859437751005e-05, "loss": 0.4178, "step": 3230 }, { "epoch": 19.487951807228917, "grad_norm": 2.3716650009155273, "learning_rate": 1.623995983935743e-05, "loss": 0.4237, "step": 3235 }, { "epoch": 19.518072289156628, "grad_norm": 3.074990749359131, "learning_rate": 1.6265060240963857e-05, "loss": 0.4067, "step": 3240 }, { "epoch": 19.548192771084338, "grad_norm": 2.659364938735962, "learning_rate": 1.6290160642570282e-05, "loss": 0.3987, "step": 3245 }, { "epoch": 19.57831325301205, "grad_norm": 2.0575156211853027, "learning_rate": 1.6315261044176708e-05, "loss": 0.3946, "step": 3250 }, { "epoch": 19.60843373493976, "grad_norm": 2.162975311279297, "learning_rate": 1.6340361445783134e-05, "loss": 0.3781, "step": 3255 }, { "epoch": 19.63855421686747, "grad_norm": 3.1139814853668213, "learning_rate": 1.636546184738956e-05, "loss": 0.4018, "step": 3260 }, { "epoch": 19.66867469879518, "grad_norm": 2.6446077823638916, "learning_rate": 1.6390562248995985e-05, "loss": 0.3795, "step": 3265 }, { "epoch": 19.698795180722893, "grad_norm": 2.3214240074157715, "learning_rate": 1.641566265060241e-05, "loss": 0.3774, "step": 3270 }, { "epoch": 19.728915662650603, "grad_norm": 2.515679359436035, "learning_rate": 1.6440763052208836e-05, "loss": 0.3648, "step": 3275 }, { "epoch": 19.759036144578314, "grad_norm": 3.9370508193969727, "learning_rate": 1.6465863453815262e-05, "loss": 0.3597, "step": 3280 }, { "epoch": 19.789156626506024, "grad_norm": 2.8674943447113037, "learning_rate": 1.6490963855421688e-05, "loss": 0.4004, "step": 3285 }, { "epoch": 19.819277108433734, "grad_norm": 2.5606460571289062, "learning_rate": 1.6516064257028113e-05, "loss": 0.3831, "step": 3290 }, { "epoch": 19.849397590361445, "grad_norm": 2.4989705085754395, "learning_rate": 1.654116465863454e-05, "loss": 0.4079, "step": 3295 }, { "epoch": 19.879518072289155, "grad_norm": 2.4194695949554443, "learning_rate": 1.6566265060240965e-05, "loss": 0.3862, "step": 3300 }, { "epoch": 19.90963855421687, "grad_norm": 2.933333396911621, "learning_rate": 1.659136546184739e-05, "loss": 0.4224, "step": 3305 }, { "epoch": 19.93975903614458, "grad_norm": 2.5552918910980225, "learning_rate": 1.6616465863453816e-05, "loss": 0.3786, "step": 3310 }, { "epoch": 19.96987951807229, "grad_norm": 2.3705222606658936, "learning_rate": 1.6641566265060242e-05, "loss": 0.3776, "step": 3315 }, { "epoch": 20.0, "grad_norm": 2.3991634845733643, "learning_rate": 1.6666666666666667e-05, "loss": 0.3774, "step": 3320 }, { "epoch": 20.0, "eval_accuracy": 0.8446716476602438, "eval_auc": 0.9032141933170075, "eval_f1": 0.7796988287785834, "eval_loss": 0.344701886177063, "eval_precision": 0.7631004366812227, "eval_recall": 0.7970353477765109, "eval_runtime": 19.3433, "eval_samples_per_second": 131.467, "eval_steps_per_second": 0.672, "step": 3320 }, { "epoch": 20.03012048192771, "grad_norm": 2.448899984359741, "learning_rate": 1.6691767068273093e-05, "loss": 0.4055, "step": 3325 }, { "epoch": 20.06024096385542, "grad_norm": 2.2480709552764893, "learning_rate": 1.671686746987952e-05, "loss": 0.377, "step": 3330 }, { "epoch": 20.09036144578313, "grad_norm": 2.342336654663086, "learning_rate": 1.6741967871485944e-05, "loss": 0.3567, "step": 3335 }, { "epoch": 20.120481927710845, "grad_norm": 3.282813549041748, "learning_rate": 1.676706827309237e-05, "loss": 0.3792, "step": 3340 }, { "epoch": 20.150602409638555, "grad_norm": 15.908650398254395, "learning_rate": 1.6792168674698796e-05, "loss": 0.3796, "step": 3345 }, { "epoch": 20.180722891566266, "grad_norm": 2.0319290161132812, "learning_rate": 1.681726907630522e-05, "loss": 0.3438, "step": 3350 }, { "epoch": 20.210843373493976, "grad_norm": 2.748540163040161, "learning_rate": 1.6842369477911647e-05, "loss": 0.3816, "step": 3355 }, { "epoch": 20.240963855421686, "grad_norm": 2.2890756130218506, "learning_rate": 1.6867469879518073e-05, "loss": 0.3743, "step": 3360 }, { "epoch": 20.271084337349397, "grad_norm": 2.790903091430664, "learning_rate": 1.68925702811245e-05, "loss": 0.3602, "step": 3365 }, { "epoch": 20.301204819277107, "grad_norm": 2.8439552783966064, "learning_rate": 1.6917670682730924e-05, "loss": 0.3961, "step": 3370 }, { "epoch": 20.33132530120482, "grad_norm": 2.9442138671875, "learning_rate": 1.694277108433735e-05, "loss": 0.4049, "step": 3375 }, { "epoch": 20.36144578313253, "grad_norm": 2.309587240219116, "learning_rate": 1.6967871485943776e-05, "loss": 0.3813, "step": 3380 }, { "epoch": 20.39156626506024, "grad_norm": 2.2793304920196533, "learning_rate": 1.69929718875502e-05, "loss": 0.3744, "step": 3385 }, { "epoch": 20.42168674698795, "grad_norm": 3.1960504055023193, "learning_rate": 1.7018072289156627e-05, "loss": 0.3549, "step": 3390 }, { "epoch": 20.451807228915662, "grad_norm": 3.467097520828247, "learning_rate": 1.7043172690763053e-05, "loss": 0.393, "step": 3395 }, { "epoch": 20.481927710843372, "grad_norm": 2.76165771484375, "learning_rate": 1.706827309236948e-05, "loss": 0.3616, "step": 3400 }, { "epoch": 20.512048192771083, "grad_norm": 2.5405423641204834, "learning_rate": 1.7093373493975904e-05, "loss": 0.3904, "step": 3405 }, { "epoch": 20.542168674698797, "grad_norm": 2.286052703857422, "learning_rate": 1.711847389558233e-05, "loss": 0.3578, "step": 3410 }, { "epoch": 20.572289156626507, "grad_norm": 2.32137131690979, "learning_rate": 1.7143574297188755e-05, "loss": 0.3789, "step": 3415 }, { "epoch": 20.602409638554217, "grad_norm": 4.155764579772949, "learning_rate": 1.716867469879518e-05, "loss": 0.4012, "step": 3420 }, { "epoch": 20.632530120481928, "grad_norm": 3.8825199604034424, "learning_rate": 1.7193775100401607e-05, "loss": 0.4325, "step": 3425 }, { "epoch": 20.662650602409638, "grad_norm": 2.3800666332244873, "learning_rate": 1.7218875502008032e-05, "loss": 0.3944, "step": 3430 }, { "epoch": 20.69277108433735, "grad_norm": 2.744089126586914, "learning_rate": 1.7243975903614458e-05, "loss": 0.3912, "step": 3435 }, { "epoch": 20.72289156626506, "grad_norm": 2.666856527328491, "learning_rate": 1.7269076305220887e-05, "loss": 0.4301, "step": 3440 }, { "epoch": 20.753012048192772, "grad_norm": 2.391982316970825, "learning_rate": 1.7294176706827313e-05, "loss": 0.3911, "step": 3445 }, { "epoch": 20.783132530120483, "grad_norm": 2.4049274921417236, "learning_rate": 1.7319277108433735e-05, "loss": 0.4008, "step": 3450 }, { "epoch": 20.813253012048193, "grad_norm": 2.314380645751953, "learning_rate": 1.734437751004016e-05, "loss": 0.38, "step": 3455 }, { "epoch": 20.843373493975903, "grad_norm": 2.4678962230682373, "learning_rate": 1.7369477911646586e-05, "loss": 0.3519, "step": 3460 }, { "epoch": 20.873493975903614, "grad_norm": 2.493690013885498, "learning_rate": 1.7394578313253012e-05, "loss": 0.3785, "step": 3465 }, { "epoch": 20.903614457831324, "grad_norm": 3.2063677310943604, "learning_rate": 1.7419678714859438e-05, "loss": 0.3828, "step": 3470 }, { "epoch": 20.933734939759034, "grad_norm": 2.120603561401367, "learning_rate": 1.7444779116465863e-05, "loss": 0.368, "step": 3475 }, { "epoch": 20.96385542168675, "grad_norm": 4.08734655380249, "learning_rate": 1.7469879518072292e-05, "loss": 0.3879, "step": 3480 }, { "epoch": 20.99397590361446, "grad_norm": 2.1004254817962646, "learning_rate": 1.7494979919678718e-05, "loss": 0.3923, "step": 3485 }, { "epoch": 21.0, "eval_accuracy": 0.8623672827369249, "eval_auc": 0.9178071456632825, "eval_f1": 0.7928994082840237, "eval_loss": 0.32685163617134094, "eval_precision": 0.8241082410824109, "eval_recall": 0.7639680729760547, "eval_runtime": 20.6758, "eval_samples_per_second": 122.994, "eval_steps_per_second": 0.629, "step": 3486 }, { "epoch": 21.02409638554217, "grad_norm": 2.5413122177124023, "learning_rate": 1.7520080321285144e-05, "loss": 0.4122, "step": 3490 }, { "epoch": 21.05421686746988, "grad_norm": 2.392362594604492, "learning_rate": 1.7545180722891566e-05, "loss": 0.3893, "step": 3495 }, { "epoch": 21.08433734939759, "grad_norm": 2.0848727226257324, "learning_rate": 1.7570281124497992e-05, "loss": 0.3634, "step": 3500 }, { "epoch": 21.1144578313253, "grad_norm": 2.455618381500244, "learning_rate": 1.7595381526104417e-05, "loss": 0.364, "step": 3505 }, { "epoch": 21.14457831325301, "grad_norm": 2.857142686843872, "learning_rate": 1.7620481927710843e-05, "loss": 0.3587, "step": 3510 }, { "epoch": 21.174698795180724, "grad_norm": 2.2013301849365234, "learning_rate": 1.764558232931727e-05, "loss": 0.3519, "step": 3515 }, { "epoch": 21.204819277108435, "grad_norm": 2.3021109104156494, "learning_rate": 1.7670682730923694e-05, "loss": 0.348, "step": 3520 }, { "epoch": 21.234939759036145, "grad_norm": 3.415651559829712, "learning_rate": 1.7695783132530123e-05, "loss": 0.3986, "step": 3525 }, { "epoch": 21.265060240963855, "grad_norm": 2.0990231037139893, "learning_rate": 1.772088353413655e-05, "loss": 0.3364, "step": 3530 }, { "epoch": 21.295180722891565, "grad_norm": 3.0432047843933105, "learning_rate": 1.774598393574297e-05, "loss": 0.3789, "step": 3535 }, { "epoch": 21.325301204819276, "grad_norm": 2.8268072605133057, "learning_rate": 1.7771084337349397e-05, "loss": 0.3893, "step": 3540 }, { "epoch": 21.355421686746986, "grad_norm": 2.920424699783325, "learning_rate": 1.7796184738955823e-05, "loss": 0.352, "step": 3545 }, { "epoch": 21.3855421686747, "grad_norm": 3.2624945640563965, "learning_rate": 1.782128514056225e-05, "loss": 0.3555, "step": 3550 }, { "epoch": 21.41566265060241, "grad_norm": 2.2209317684173584, "learning_rate": 1.7846385542168674e-05, "loss": 0.3672, "step": 3555 }, { "epoch": 21.44578313253012, "grad_norm": 2.224517822265625, "learning_rate": 1.78714859437751e-05, "loss": 0.3419, "step": 3560 }, { "epoch": 21.47590361445783, "grad_norm": 2.8915982246398926, "learning_rate": 1.789658634538153e-05, "loss": 0.3926, "step": 3565 }, { "epoch": 21.50602409638554, "grad_norm": 2.213768243789673, "learning_rate": 1.7921686746987955e-05, "loss": 0.3947, "step": 3570 }, { "epoch": 21.53614457831325, "grad_norm": 2.0302085876464844, "learning_rate": 1.794678714859438e-05, "loss": 0.3402, "step": 3575 }, { "epoch": 21.566265060240966, "grad_norm": 2.2996370792388916, "learning_rate": 1.7971887550200802e-05, "loss": 0.3934, "step": 3580 }, { "epoch": 21.596385542168676, "grad_norm": 2.4716296195983887, "learning_rate": 1.7996987951807228e-05, "loss": 0.3488, "step": 3585 }, { "epoch": 21.626506024096386, "grad_norm": 3.6752865314483643, "learning_rate": 1.8022088353413654e-05, "loss": 0.3884, "step": 3590 }, { "epoch": 21.656626506024097, "grad_norm": 2.3490021228790283, "learning_rate": 1.804718875502008e-05, "loss": 0.4053, "step": 3595 }, { "epoch": 21.686746987951807, "grad_norm": 3.6254935264587402, "learning_rate": 1.8072289156626505e-05, "loss": 0.3961, "step": 3600 }, { "epoch": 21.716867469879517, "grad_norm": 2.5923497676849365, "learning_rate": 1.8097389558232934e-05, "loss": 0.3617, "step": 3605 }, { "epoch": 21.746987951807228, "grad_norm": 2.2978155612945557, "learning_rate": 1.812248995983936e-05, "loss": 0.3901, "step": 3610 }, { "epoch": 21.77710843373494, "grad_norm": 2.2318062782287598, "learning_rate": 1.8147590361445786e-05, "loss": 0.3437, "step": 3615 }, { "epoch": 21.80722891566265, "grad_norm": 2.4424312114715576, "learning_rate": 1.817269076305221e-05, "loss": 0.3888, "step": 3620 }, { "epoch": 21.837349397590362, "grad_norm": 1.9771126508712769, "learning_rate": 1.8197791164658634e-05, "loss": 0.4051, "step": 3625 }, { "epoch": 21.867469879518072, "grad_norm": 2.4145617485046387, "learning_rate": 1.822289156626506e-05, "loss": 0.3732, "step": 3630 }, { "epoch": 21.897590361445783, "grad_norm": 3.3357062339782715, "learning_rate": 1.8247991967871485e-05, "loss": 0.3943, "step": 3635 }, { "epoch": 21.927710843373493, "grad_norm": 2.0927658081054688, "learning_rate": 1.827309236947791e-05, "loss": 0.3556, "step": 3640 }, { "epoch": 21.957831325301203, "grad_norm": 2.8143198490142822, "learning_rate": 1.829819277108434e-05, "loss": 0.4176, "step": 3645 }, { "epoch": 21.987951807228917, "grad_norm": 4.187919616699219, "learning_rate": 1.8323293172690765e-05, "loss": 0.4007, "step": 3650 }, { "epoch": 22.0, "eval_accuracy": 0.8568619740464019, "eval_auc": 0.9210646630373929, "eval_f1": 0.8019586507072906, "eval_loss": 0.3195960521697998, "eval_precision": 0.7669094693028096, "eval_recall": 0.8403648802736602, "eval_runtime": 19.5548, "eval_samples_per_second": 130.045, "eval_steps_per_second": 0.665, "step": 3652 }, { "epoch": 22.018072289156628, "grad_norm": 2.3164548873901367, "learning_rate": 1.834839357429719e-05, "loss": 0.3858, "step": 3655 }, { "epoch": 22.048192771084338, "grad_norm": 2.90985107421875, "learning_rate": 1.8373493975903617e-05, "loss": 0.36, "step": 3660 }, { "epoch": 22.07831325301205, "grad_norm": 1.9936435222625732, "learning_rate": 1.8398594377510042e-05, "loss": 0.3651, "step": 3665 }, { "epoch": 22.10843373493976, "grad_norm": 2.573457956314087, "learning_rate": 1.8423694779116465e-05, "loss": 0.355, "step": 3670 }, { "epoch": 22.13855421686747, "grad_norm": 2.2051055431365967, "learning_rate": 1.844879518072289e-05, "loss": 0.3728, "step": 3675 }, { "epoch": 22.16867469879518, "grad_norm": 3.0916407108306885, "learning_rate": 1.8473895582329316e-05, "loss": 0.3307, "step": 3680 }, { "epoch": 22.198795180722893, "grad_norm": 1.9302294254302979, "learning_rate": 1.8498995983935745e-05, "loss": 0.3339, "step": 3685 }, { "epoch": 22.228915662650603, "grad_norm": 2.323669672012329, "learning_rate": 1.852409638554217e-05, "loss": 0.3519, "step": 3690 }, { "epoch": 22.259036144578314, "grad_norm": 2.630171537399292, "learning_rate": 1.8549196787148596e-05, "loss": 0.3389, "step": 3695 }, { "epoch": 22.289156626506024, "grad_norm": 3.4154629707336426, "learning_rate": 1.8574297188755022e-05, "loss": 0.3859, "step": 3700 }, { "epoch": 22.319277108433734, "grad_norm": 2.571350574493408, "learning_rate": 1.8599397590361448e-05, "loss": 0.4023, "step": 3705 }, { "epoch": 22.349397590361445, "grad_norm": 3.9686989784240723, "learning_rate": 1.8624497991967873e-05, "loss": 0.3795, "step": 3710 }, { "epoch": 22.379518072289155, "grad_norm": 2.3565986156463623, "learning_rate": 1.8649598393574296e-05, "loss": 0.3461, "step": 3715 }, { "epoch": 22.40963855421687, "grad_norm": 2.3396501541137695, "learning_rate": 1.867469879518072e-05, "loss": 0.3685, "step": 3720 }, { "epoch": 22.43975903614458, "grad_norm": 3.2269747257232666, "learning_rate": 1.869979919678715e-05, "loss": 0.3571, "step": 3725 }, { "epoch": 22.46987951807229, "grad_norm": 2.388921022415161, "learning_rate": 1.8724899598393576e-05, "loss": 0.38, "step": 3730 }, { "epoch": 22.5, "grad_norm": 2.4904425144195557, "learning_rate": 1.8750000000000002e-05, "loss": 0.3453, "step": 3735 }, { "epoch": 22.53012048192771, "grad_norm": 3.9464221000671387, "learning_rate": 1.8775100401606427e-05, "loss": 0.3572, "step": 3740 }, { "epoch": 22.56024096385542, "grad_norm": 2.9925246238708496, "learning_rate": 1.8800200803212853e-05, "loss": 0.389, "step": 3745 }, { "epoch": 22.59036144578313, "grad_norm": 2.383758544921875, "learning_rate": 1.882530120481928e-05, "loss": 0.3399, "step": 3750 }, { "epoch": 22.620481927710845, "grad_norm": 2.634162187576294, "learning_rate": 1.8850401606425704e-05, "loss": 0.3316, "step": 3755 }, { "epoch": 22.650602409638555, "grad_norm": 2.5719075202941895, "learning_rate": 1.8875502008032127e-05, "loss": 0.3976, "step": 3760 }, { "epoch": 22.680722891566266, "grad_norm": 2.7136175632476807, "learning_rate": 1.8900602409638556e-05, "loss": 0.3312, "step": 3765 }, { "epoch": 22.710843373493976, "grad_norm": 3.161940097808838, "learning_rate": 1.892570281124498e-05, "loss": 0.4142, "step": 3770 }, { "epoch": 22.740963855421686, "grad_norm": 2.9586448669433594, "learning_rate": 1.8950803212851407e-05, "loss": 0.35, "step": 3775 }, { "epoch": 22.771084337349397, "grad_norm": 2.721144437789917, "learning_rate": 1.8975903614457833e-05, "loss": 0.3401, "step": 3780 }, { "epoch": 22.801204819277107, "grad_norm": 3.0611133575439453, "learning_rate": 1.900100401606426e-05, "loss": 0.3541, "step": 3785 }, { "epoch": 22.83132530120482, "grad_norm": 2.4524519443511963, "learning_rate": 1.9026104417670684e-05, "loss": 0.3899, "step": 3790 }, { "epoch": 22.86144578313253, "grad_norm": 2.0838871002197266, "learning_rate": 1.905120481927711e-05, "loss": 0.3568, "step": 3795 }, { "epoch": 22.89156626506024, "grad_norm": 3.6144070625305176, "learning_rate": 1.9076305220883535e-05, "loss": 0.3253, "step": 3800 }, { "epoch": 22.92168674698795, "grad_norm": 3.958813190460205, "learning_rate": 1.910140562248996e-05, "loss": 0.3907, "step": 3805 }, { "epoch": 22.951807228915662, "grad_norm": 2.1493561267852783, "learning_rate": 1.9126506024096387e-05, "loss": 0.3422, "step": 3810 }, { "epoch": 22.981927710843372, "grad_norm": 2.8188695907592773, "learning_rate": 1.9151606425702813e-05, "loss": 0.3662, "step": 3815 }, { "epoch": 23.0, "eval_accuracy": 0.8686590640975226, "eval_auc": 0.9263426008944056, "eval_f1": 0.7938271604938272, "eval_loss": 0.3158295452594757, "eval_precision": 0.8654104979811574, "eval_recall": 0.7331812998859749, "eval_runtime": 19.0721, "eval_samples_per_second": 133.336, "eval_steps_per_second": 0.682, "step": 3818 }, { "epoch": 23.012048192771083, "grad_norm": 2.2430033683776855, "learning_rate": 1.9176706827309238e-05, "loss": 0.3296, "step": 3820 }, { "epoch": 23.042168674698797, "grad_norm": 3.505044460296631, "learning_rate": 1.9201807228915664e-05, "loss": 0.3289, "step": 3825 }, { "epoch": 23.072289156626507, "grad_norm": 2.142225980758667, "learning_rate": 1.922690763052209e-05, "loss": 0.3551, "step": 3830 }, { "epoch": 23.102409638554217, "grad_norm": 2.1978600025177, "learning_rate": 1.9252008032128515e-05, "loss": 0.3303, "step": 3835 }, { "epoch": 23.132530120481928, "grad_norm": 1.8615117073059082, "learning_rate": 1.927710843373494e-05, "loss": 0.3353, "step": 3840 }, { "epoch": 23.162650602409638, "grad_norm": 2.1642777919769287, "learning_rate": 1.9302208835341367e-05, "loss": 0.318, "step": 3845 }, { "epoch": 23.19277108433735, "grad_norm": 2.3634889125823975, "learning_rate": 1.9327309236947792e-05, "loss": 0.3373, "step": 3850 }, { "epoch": 23.22289156626506, "grad_norm": 2.5314648151397705, "learning_rate": 1.9352409638554218e-05, "loss": 0.3287, "step": 3855 }, { "epoch": 23.253012048192772, "grad_norm": 2.921504259109497, "learning_rate": 1.9377510040160644e-05, "loss": 0.3802, "step": 3860 }, { "epoch": 23.283132530120483, "grad_norm": 1.9109606742858887, "learning_rate": 1.940261044176707e-05, "loss": 0.3371, "step": 3865 }, { "epoch": 23.313253012048193, "grad_norm": 2.454645872116089, "learning_rate": 1.9427710843373495e-05, "loss": 0.3005, "step": 3870 }, { "epoch": 23.343373493975903, "grad_norm": 2.995619058609009, "learning_rate": 1.945281124497992e-05, "loss": 0.3862, "step": 3875 }, { "epoch": 23.373493975903614, "grad_norm": 2.480074644088745, "learning_rate": 1.9477911646586346e-05, "loss": 0.3488, "step": 3880 }, { "epoch": 23.403614457831324, "grad_norm": 3.2270078659057617, "learning_rate": 1.9503012048192772e-05, "loss": 0.3759, "step": 3885 }, { "epoch": 23.433734939759034, "grad_norm": 2.5098702907562256, "learning_rate": 1.9528112449799198e-05, "loss": 0.3453, "step": 3890 }, { "epoch": 23.46385542168675, "grad_norm": 3.194838523864746, "learning_rate": 1.9553212851405623e-05, "loss": 0.3524, "step": 3895 }, { "epoch": 23.49397590361446, "grad_norm": 2.51259446144104, "learning_rate": 1.957831325301205e-05, "loss": 0.3899, "step": 3900 }, { "epoch": 23.52409638554217, "grad_norm": 2.8412113189697266, "learning_rate": 1.9603413654618475e-05, "loss": 0.3071, "step": 3905 }, { "epoch": 23.55421686746988, "grad_norm": 2.583353281021118, "learning_rate": 1.96285140562249e-05, "loss": 0.377, "step": 3910 }, { "epoch": 23.58433734939759, "grad_norm": 2.712259292602539, "learning_rate": 1.9653614457831326e-05, "loss": 0.3414, "step": 3915 }, { "epoch": 23.6144578313253, "grad_norm": 2.5728349685668945, "learning_rate": 1.967871485943775e-05, "loss": 0.3841, "step": 3920 }, { "epoch": 23.644578313253014, "grad_norm": 2.7579598426818848, "learning_rate": 1.9703815261044177e-05, "loss": 0.3236, "step": 3925 }, { "epoch": 23.674698795180724, "grad_norm": 3.745579481124878, "learning_rate": 1.9728915662650603e-05, "loss": 0.3788, "step": 3930 }, { "epoch": 23.704819277108435, "grad_norm": 3.3026535511016846, "learning_rate": 1.9754016064257032e-05, "loss": 0.35, "step": 3935 }, { "epoch": 23.734939759036145, "grad_norm": 2.341129779815674, "learning_rate": 1.9779116465863454e-05, "loss": 0.3162, "step": 3940 }, { "epoch": 23.765060240963855, "grad_norm": 2.3009142875671387, "learning_rate": 1.980421686746988e-05, "loss": 0.3607, "step": 3945 }, { "epoch": 23.795180722891565, "grad_norm": 2.9585721492767334, "learning_rate": 1.9829317269076306e-05, "loss": 0.368, "step": 3950 }, { "epoch": 23.825301204819276, "grad_norm": 2.5716402530670166, "learning_rate": 1.985441767068273e-05, "loss": 0.3882, "step": 3955 }, { "epoch": 23.855421686746986, "grad_norm": 3.239875316619873, "learning_rate": 1.9879518072289157e-05, "loss": 0.3542, "step": 3960 }, { "epoch": 23.8855421686747, "grad_norm": 3.1310906410217285, "learning_rate": 1.9904618473895583e-05, "loss": 0.3364, "step": 3965 }, { "epoch": 23.91566265060241, "grad_norm": 2.2465121746063232, "learning_rate": 1.992971887550201e-05, "loss": 0.3696, "step": 3970 }, { "epoch": 23.94578313253012, "grad_norm": 2.290149450302124, "learning_rate": 1.9954819277108437e-05, "loss": 0.4005, "step": 3975 }, { "epoch": 23.97590361445783, "grad_norm": 2.4030425548553467, "learning_rate": 1.997991967871486e-05, "loss": 0.3689, "step": 3980 }, { "epoch": 24.0, "eval_accuracy": 0.8769170271333071, "eval_auc": 0.938771745870526, "eval_f1": 0.8187608569774175, "eval_loss": 0.2837126851081848, "eval_precision": 0.831764705882353, "eval_recall": 0.806157354618016, "eval_runtime": 19.0066, "eval_samples_per_second": 133.796, "eval_steps_per_second": 0.684, "step": 3984 }, { "epoch": 24.00602409638554, "grad_norm": 2.0611824989318848, "learning_rate": 2.0005020080321285e-05, "loss": 0.3252, "step": 3985 }, { "epoch": 24.03614457831325, "grad_norm": 2.9021127223968506, "learning_rate": 2.003012048192771e-05, "loss": 0.3678, "step": 3990 }, { "epoch": 24.066265060240966, "grad_norm": 1.9793673753738403, "learning_rate": 2.0055220883534137e-05, "loss": 0.3251, "step": 3995 }, { "epoch": 24.096385542168676, "grad_norm": 2.8167641162872314, "learning_rate": 2.0080321285140562e-05, "loss": 0.3344, "step": 4000 }, { "epoch": 24.126506024096386, "grad_norm": 2.1713151931762695, "learning_rate": 2.0105421686746988e-05, "loss": 0.3358, "step": 4005 }, { "epoch": 24.156626506024097, "grad_norm": 2.3592758178710938, "learning_rate": 2.0130522088353414e-05, "loss": 0.3663, "step": 4010 }, { "epoch": 24.186746987951807, "grad_norm": 3.2822067737579346, "learning_rate": 2.015562248995984e-05, "loss": 0.367, "step": 4015 }, { "epoch": 24.216867469879517, "grad_norm": 3.195669412612915, "learning_rate": 2.018072289156627e-05, "loss": 0.3254, "step": 4020 }, { "epoch": 24.246987951807228, "grad_norm": 2.3356804847717285, "learning_rate": 2.020582329317269e-05, "loss": 0.3005, "step": 4025 }, { "epoch": 24.27710843373494, "grad_norm": 2.135450839996338, "learning_rate": 2.0230923694779116e-05, "loss": 0.359, "step": 4030 }, { "epoch": 24.30722891566265, "grad_norm": 2.1273319721221924, "learning_rate": 2.0256024096385542e-05, "loss": 0.3043, "step": 4035 }, { "epoch": 24.337349397590362, "grad_norm": 2.6562674045562744, "learning_rate": 2.0281124497991968e-05, "loss": 0.2992, "step": 4040 }, { "epoch": 24.367469879518072, "grad_norm": 2.4005606174468994, "learning_rate": 2.0306224899598393e-05, "loss": 0.3517, "step": 4045 }, { "epoch": 24.397590361445783, "grad_norm": 3.0539345741271973, "learning_rate": 2.033132530120482e-05, "loss": 0.3625, "step": 4050 }, { "epoch": 24.427710843373493, "grad_norm": 2.6296353340148926, "learning_rate": 2.0356425702811245e-05, "loss": 0.3369, "step": 4055 }, { "epoch": 24.457831325301203, "grad_norm": 2.197061061859131, "learning_rate": 2.0381526104417674e-05, "loss": 0.3291, "step": 4060 }, { "epoch": 24.487951807228917, "grad_norm": 2.6472768783569336, "learning_rate": 2.04066265060241e-05, "loss": 0.3227, "step": 4065 }, { "epoch": 24.518072289156628, "grad_norm": 2.7284440994262695, "learning_rate": 2.0431726907630522e-05, "loss": 0.3645, "step": 4070 }, { "epoch": 24.548192771084338, "grad_norm": 1.8101915121078491, "learning_rate": 2.0456827309236948e-05, "loss": 0.3179, "step": 4075 }, { "epoch": 24.57831325301205, "grad_norm": 1.9829938411712646, "learning_rate": 2.0481927710843373e-05, "loss": 0.3132, "step": 4080 }, { "epoch": 24.60843373493976, "grad_norm": 2.770651340484619, "learning_rate": 2.05070281124498e-05, "loss": 0.366, "step": 4085 }, { "epoch": 24.63855421686747, "grad_norm": 2.8670294284820557, "learning_rate": 2.0532128514056225e-05, "loss": 0.3599, "step": 4090 }, { "epoch": 24.66867469879518, "grad_norm": 2.399897336959839, "learning_rate": 2.055722891566265e-05, "loss": 0.323, "step": 4095 }, { "epoch": 24.698795180722893, "grad_norm": 1.969288945198059, "learning_rate": 2.058232931726908e-05, "loss": 0.3776, "step": 4100 }, { "epoch": 24.728915662650603, "grad_norm": 2.6503965854644775, "learning_rate": 2.0607429718875505e-05, "loss": 0.3266, "step": 4105 }, { "epoch": 24.759036144578314, "grad_norm": 3.4772753715515137, "learning_rate": 2.063253012048193e-05, "loss": 0.361, "step": 4110 }, { "epoch": 24.789156626506024, "grad_norm": 2.970876932144165, "learning_rate": 2.0657630522088353e-05, "loss": 0.3393, "step": 4115 }, { "epoch": 24.819277108433734, "grad_norm": 2.6036980152130127, "learning_rate": 2.068273092369478e-05, "loss": 0.381, "step": 4120 }, { "epoch": 24.849397590361445, "grad_norm": 2.629304885864258, "learning_rate": 2.0707831325301204e-05, "loss": 0.3413, "step": 4125 }, { "epoch": 24.879518072289155, "grad_norm": 2.1564583778381348, "learning_rate": 2.073293172690763e-05, "loss": 0.3209, "step": 4130 }, { "epoch": 24.90963855421687, "grad_norm": 2.885538339614868, "learning_rate": 2.0758032128514056e-05, "loss": 0.3361, "step": 4135 }, { "epoch": 24.93975903614458, "grad_norm": 2.5669989585876465, "learning_rate": 2.0783132530120485e-05, "loss": 0.3291, "step": 4140 }, { "epoch": 24.96987951807229, "grad_norm": 2.1680426597595215, "learning_rate": 2.080823293172691e-05, "loss": 0.2835, "step": 4145 }, { "epoch": 25.0, "grad_norm": 2.454000473022461, "learning_rate": 2.0833333333333336e-05, "loss": 0.3682, "step": 4150 }, { "epoch": 25.0, "eval_accuracy": 0.8725914274478962, "eval_auc": 0.934971822252276, "eval_f1": 0.8038740920096852, "eval_loss": 0.29524460434913635, "eval_precision": 0.8567741935483871, "eval_recall": 0.7571265678449259, "eval_runtime": 19.0056, "eval_samples_per_second": 133.803, "eval_steps_per_second": 0.684, "step": 4150 }, { "epoch": 25.03012048192771, "grad_norm": 3.144068479537964, "learning_rate": 2.0858433734939762e-05, "loss": 0.3608, "step": 4155 }, { "epoch": 25.06024096385542, "grad_norm": 3.0255117416381836, "learning_rate": 2.0883534136546184e-05, "loss": 0.2815, "step": 4160 }, { "epoch": 25.09036144578313, "grad_norm": 3.002054452896118, "learning_rate": 2.090863453815261e-05, "loss": 0.3316, "step": 4165 }, { "epoch": 25.120481927710845, "grad_norm": 2.5188658237457275, "learning_rate": 2.0933734939759035e-05, "loss": 0.3329, "step": 4170 }, { "epoch": 25.150602409638555, "grad_norm": 2.7840096950531006, "learning_rate": 2.095883534136546e-05, "loss": 0.3082, "step": 4175 }, { "epoch": 25.180722891566266, "grad_norm": 3.077366352081299, "learning_rate": 2.098393574297189e-05, "loss": 0.3485, "step": 4180 }, { "epoch": 25.210843373493976, "grad_norm": 3.5452566146850586, "learning_rate": 2.1009036144578316e-05, "loss": 0.3514, "step": 4185 }, { "epoch": 25.240963855421686, "grad_norm": 3.2208802700042725, "learning_rate": 2.103413654618474e-05, "loss": 0.3577, "step": 4190 }, { "epoch": 25.271084337349397, "grad_norm": 3.3380303382873535, "learning_rate": 2.1059236947791167e-05, "loss": 0.3381, "step": 4195 }, { "epoch": 25.301204819277107, "grad_norm": 2.096365451812744, "learning_rate": 2.1084337349397593e-05, "loss": 0.3047, "step": 4200 }, { "epoch": 25.33132530120482, "grad_norm": 6.2428812980651855, "learning_rate": 2.1109437751004015e-05, "loss": 0.3539, "step": 4205 }, { "epoch": 25.36144578313253, "grad_norm": 1.7556854486465454, "learning_rate": 2.113453815261044e-05, "loss": 0.3485, "step": 4210 }, { "epoch": 25.39156626506024, "grad_norm": 2.071019172668457, "learning_rate": 2.1159638554216866e-05, "loss": 0.3556, "step": 4215 }, { "epoch": 25.42168674698795, "grad_norm": 2.454308271408081, "learning_rate": 2.1184738955823295e-05, "loss": 0.3335, "step": 4220 }, { "epoch": 25.451807228915662, "grad_norm": 2.7519564628601074, "learning_rate": 2.120983935742972e-05, "loss": 0.315, "step": 4225 }, { "epoch": 25.481927710843372, "grad_norm": 2.2847065925598145, "learning_rate": 2.1234939759036147e-05, "loss": 0.2953, "step": 4230 }, { "epoch": 25.512048192771083, "grad_norm": 2.7434380054473877, "learning_rate": 2.1260040160642572e-05, "loss": 0.3394, "step": 4235 }, { "epoch": 25.542168674698797, "grad_norm": 2.0522360801696777, "learning_rate": 2.1285140562248998e-05, "loss": 0.3153, "step": 4240 }, { "epoch": 25.572289156626507, "grad_norm": 2.233391046524048, "learning_rate": 2.1310240963855424e-05, "loss": 0.3331, "step": 4245 }, { "epoch": 25.602409638554217, "grad_norm": 2.3065338134765625, "learning_rate": 2.1335341365461846e-05, "loss": 0.3479, "step": 4250 }, { "epoch": 25.632530120481928, "grad_norm": 2.2913033962249756, "learning_rate": 2.1360441767068272e-05, "loss": 0.3246, "step": 4255 }, { "epoch": 25.662650602409638, "grad_norm": 2.851597785949707, "learning_rate": 2.13855421686747e-05, "loss": 0.3538, "step": 4260 }, { "epoch": 25.69277108433735, "grad_norm": 2.6081836223602295, "learning_rate": 2.1410642570281127e-05, "loss": 0.3465, "step": 4265 }, { "epoch": 25.72289156626506, "grad_norm": 2.6602559089660645, "learning_rate": 2.1435742971887552e-05, "loss": 0.3445, "step": 4270 }, { "epoch": 25.753012048192772, "grad_norm": 2.375763416290283, "learning_rate": 2.1460843373493978e-05, "loss": 0.3392, "step": 4275 }, { "epoch": 25.783132530120483, "grad_norm": 2.4636614322662354, "learning_rate": 2.1485943775100404e-05, "loss": 0.3553, "step": 4280 }, { "epoch": 25.813253012048193, "grad_norm": 2.436706304550171, "learning_rate": 2.151104417670683e-05, "loss": 0.2978, "step": 4285 }, { "epoch": 25.843373493975903, "grad_norm": 2.432201862335205, "learning_rate": 2.1536144578313255e-05, "loss": 0.3598, "step": 4290 }, { "epoch": 25.873493975903614, "grad_norm": 2.2942583560943604, "learning_rate": 2.1561244979919677e-05, "loss": 0.3526, "step": 4295 }, { "epoch": 25.903614457831324, "grad_norm": 2.467564821243286, "learning_rate": 2.1586345381526106e-05, "loss": 0.317, "step": 4300 }, { "epoch": 25.933734939759034, "grad_norm": 3.1461193561553955, "learning_rate": 2.1611445783132532e-05, "loss": 0.337, "step": 4305 }, { "epoch": 25.96385542168675, "grad_norm": 3.0762431621551514, "learning_rate": 2.1636546184738958e-05, "loss": 0.3591, "step": 4310 }, { "epoch": 25.99397590361446, "grad_norm": 2.0135581493377686, "learning_rate": 2.1661646586345383e-05, "loss": 0.3498, "step": 4315 }, { "epoch": 26.0, "eval_accuracy": 0.8560755013763272, "eval_auc": 0.9323354199148303, "eval_f1": 0.805111821086262, "eval_loss": 0.320932537317276, "eval_precision": 0.7552447552447552, "eval_recall": 0.8620296465222349, "eval_runtime": 20.2403, "eval_samples_per_second": 125.64, "eval_steps_per_second": 0.642, "step": 4316 }, { "epoch": 26.02409638554217, "grad_norm": 2.15742564201355, "learning_rate": 2.168674698795181e-05, "loss": 0.3685, "step": 4320 }, { "epoch": 26.05421686746988, "grad_norm": 2.6329147815704346, "learning_rate": 2.1711847389558235e-05, "loss": 0.3217, "step": 4325 }, { "epoch": 26.08433734939759, "grad_norm": 3.114494562149048, "learning_rate": 2.173694779116466e-05, "loss": 0.3356, "step": 4330 }, { "epoch": 26.1144578313253, "grad_norm": 2.8055694103240967, "learning_rate": 2.1762048192771086e-05, "loss": 0.3055, "step": 4335 }, { "epoch": 26.14457831325301, "grad_norm": 2.372647762298584, "learning_rate": 2.178714859437751e-05, "loss": 0.317, "step": 4340 }, { "epoch": 26.174698795180724, "grad_norm": 2.197999954223633, "learning_rate": 2.1812248995983937e-05, "loss": 0.2848, "step": 4345 }, { "epoch": 26.204819277108435, "grad_norm": 2.3357388973236084, "learning_rate": 2.1837349397590363e-05, "loss": 0.3206, "step": 4350 }, { "epoch": 26.234939759036145, "grad_norm": 2.4657440185546875, "learning_rate": 2.186244979919679e-05, "loss": 0.3049, "step": 4355 }, { "epoch": 26.265060240963855, "grad_norm": 2.735355854034424, "learning_rate": 2.1887550200803214e-05, "loss": 0.3481, "step": 4360 }, { "epoch": 26.295180722891565, "grad_norm": 2.7895607948303223, "learning_rate": 2.191265060240964e-05, "loss": 0.3654, "step": 4365 }, { "epoch": 26.325301204819276, "grad_norm": 2.3301947116851807, "learning_rate": 2.1937751004016066e-05, "loss": 0.2964, "step": 4370 }, { "epoch": 26.355421686746986, "grad_norm": 2.4436914920806885, "learning_rate": 2.196285140562249e-05, "loss": 0.3042, "step": 4375 }, { "epoch": 26.3855421686747, "grad_norm": 2.6277318000793457, "learning_rate": 2.1987951807228917e-05, "loss": 0.3552, "step": 4380 }, { "epoch": 26.41566265060241, "grad_norm": 2.0885562896728516, "learning_rate": 2.2013052208835343e-05, "loss": 0.2793, "step": 4385 }, { "epoch": 26.44578313253012, "grad_norm": 2.6496646404266357, "learning_rate": 2.203815261044177e-05, "loss": 0.4158, "step": 4390 }, { "epoch": 26.47590361445783, "grad_norm": 2.3065197467803955, "learning_rate": 2.2063253012048194e-05, "loss": 0.3017, "step": 4395 }, { "epoch": 26.50602409638554, "grad_norm": 2.375241279602051, "learning_rate": 2.208835341365462e-05, "loss": 0.3368, "step": 4400 }, { "epoch": 26.53614457831325, "grad_norm": 3.1456806659698486, "learning_rate": 2.2113453815261045e-05, "loss": 0.3444, "step": 4405 }, { "epoch": 26.566265060240966, "grad_norm": 2.517967939376831, "learning_rate": 2.213855421686747e-05, "loss": 0.3547, "step": 4410 }, { "epoch": 26.596385542168676, "grad_norm": 3.2278215885162354, "learning_rate": 2.2163654618473897e-05, "loss": 0.3366, "step": 4415 }, { "epoch": 26.626506024096386, "grad_norm": 2.074816942214966, "learning_rate": 2.2188755020080322e-05, "loss": 0.3233, "step": 4420 }, { "epoch": 26.656626506024097, "grad_norm": 2.988708972930908, "learning_rate": 2.2213855421686748e-05, "loss": 0.3688, "step": 4425 }, { "epoch": 26.686746987951807, "grad_norm": 2.934281349182129, "learning_rate": 2.2238955823293174e-05, "loss": 0.3232, "step": 4430 }, { "epoch": 26.716867469879517, "grad_norm": 2.9825849533081055, "learning_rate": 2.22640562248996e-05, "loss": 0.3339, "step": 4435 }, { "epoch": 26.746987951807228, "grad_norm": 2.0730199813842773, "learning_rate": 2.2289156626506025e-05, "loss": 0.3341, "step": 4440 }, { "epoch": 26.77710843373494, "grad_norm": 2.3709566593170166, "learning_rate": 2.231425702811245e-05, "loss": 0.3125, "step": 4445 }, { "epoch": 26.80722891566265, "grad_norm": 2.547858476638794, "learning_rate": 2.2339357429718876e-05, "loss": 0.3113, "step": 4450 }, { "epoch": 26.837349397590362, "grad_norm": 2.848167896270752, "learning_rate": 2.2364457831325302e-05, "loss": 0.3405, "step": 4455 }, { "epoch": 26.867469879518072, "grad_norm": 3.4910738468170166, "learning_rate": 2.2389558232931728e-05, "loss": 0.3602, "step": 4460 }, { "epoch": 26.897590361445783, "grad_norm": 1.9076507091522217, "learning_rate": 2.2414658634538153e-05, "loss": 0.3015, "step": 4465 }, { "epoch": 26.927710843373493, "grad_norm": 2.182180643081665, "learning_rate": 2.243975903614458e-05, "loss": 0.2855, "step": 4470 }, { "epoch": 26.957831325301203, "grad_norm": 2.6377947330474854, "learning_rate": 2.2464859437751005e-05, "loss": 0.2983, "step": 4475 }, { "epoch": 26.987951807228917, "grad_norm": 3.0566041469573975, "learning_rate": 2.248995983935743e-05, "loss": 0.3086, "step": 4480 }, { "epoch": 27.0, "eval_accuracy": 0.8847817538340542, "eval_auc": 0.9410505365201953, "eval_f1": 0.8223165554881746, "eval_loss": 0.2839481830596924, "eval_precision": 0.8782383419689119, "eval_recall": 0.7730900798175598, "eval_runtime": 20.3148, "eval_samples_per_second": 125.18, "eval_steps_per_second": 0.64, "step": 4482 }, { "epoch": 27.018072289156628, "grad_norm": 2.496335983276367, "learning_rate": 2.2515060240963856e-05, "loss": 0.29, "step": 4485 }, { "epoch": 27.048192771084338, "grad_norm": 2.7706382274627686, "learning_rate": 2.2540160642570282e-05, "loss": 0.319, "step": 4490 }, { "epoch": 27.07831325301205, "grad_norm": 2.7136242389678955, "learning_rate": 2.2565261044176707e-05, "loss": 0.3257, "step": 4495 }, { "epoch": 27.10843373493976, "grad_norm": 2.489665985107422, "learning_rate": 2.2590361445783133e-05, "loss": 0.2822, "step": 4500 }, { "epoch": 27.13855421686747, "grad_norm": 2.4213666915893555, "learning_rate": 2.261546184738956e-05, "loss": 0.3223, "step": 4505 }, { "epoch": 27.16867469879518, "grad_norm": 2.8688440322875977, "learning_rate": 2.2640562248995988e-05, "loss": 0.2624, "step": 4510 }, { "epoch": 27.198795180722893, "grad_norm": 2.302858829498291, "learning_rate": 2.266566265060241e-05, "loss": 0.299, "step": 4515 }, { "epoch": 27.228915662650603, "grad_norm": 3.0998318195343018, "learning_rate": 2.2690763052208836e-05, "loss": 0.278, "step": 4520 }, { "epoch": 27.259036144578314, "grad_norm": 2.804643392562866, "learning_rate": 2.271586345381526e-05, "loss": 0.3197, "step": 4525 }, { "epoch": 27.289156626506024, "grad_norm": 3.7301785945892334, "learning_rate": 2.2740963855421687e-05, "loss": 0.3267, "step": 4530 }, { "epoch": 27.319277108433734, "grad_norm": 2.538990020751953, "learning_rate": 2.2766064257028113e-05, "loss": 0.3259, "step": 4535 }, { "epoch": 27.349397590361445, "grad_norm": 2.8566887378692627, "learning_rate": 2.279116465863454e-05, "loss": 0.3106, "step": 4540 }, { "epoch": 27.379518072289155, "grad_norm": 2.617983818054199, "learning_rate": 2.2816265060240964e-05, "loss": 0.3084, "step": 4545 }, { "epoch": 27.40963855421687, "grad_norm": 4.547421455383301, "learning_rate": 2.284136546184739e-05, "loss": 0.3253, "step": 4550 }, { "epoch": 27.43975903614458, "grad_norm": 2.438436985015869, "learning_rate": 2.286646586345382e-05, "loss": 0.3303, "step": 4555 }, { "epoch": 27.46987951807229, "grad_norm": 2.6192996501922607, "learning_rate": 2.289156626506024e-05, "loss": 0.353, "step": 4560 }, { "epoch": 27.5, "grad_norm": 3.8371706008911133, "learning_rate": 2.2916666666666667e-05, "loss": 0.3507, "step": 4565 }, { "epoch": 27.53012048192771, "grad_norm": 2.9321835041046143, "learning_rate": 2.2941767068273093e-05, "loss": 0.3357, "step": 4570 }, { "epoch": 27.56024096385542, "grad_norm": 2.8408875465393066, "learning_rate": 2.2966867469879518e-05, "loss": 0.388, "step": 4575 }, { "epoch": 27.59036144578313, "grad_norm": 2.674762010574341, "learning_rate": 2.2991967871485944e-05, "loss": 0.3919, "step": 4580 }, { "epoch": 27.620481927710845, "grad_norm": 2.97341251373291, "learning_rate": 2.301706827309237e-05, "loss": 0.2904, "step": 4585 }, { "epoch": 27.650602409638555, "grad_norm": 3.0861120223999023, "learning_rate": 2.3042168674698795e-05, "loss": 0.3829, "step": 4590 }, { "epoch": 27.680722891566266, "grad_norm": 2.3060944080352783, "learning_rate": 2.3067269076305224e-05, "loss": 0.2962, "step": 4595 }, { "epoch": 27.710843373493976, "grad_norm": 2.146397352218628, "learning_rate": 2.309236947791165e-05, "loss": 0.3021, "step": 4600 }, { "epoch": 27.740963855421686, "grad_norm": 2.135899305343628, "learning_rate": 2.3117469879518072e-05, "loss": 0.286, "step": 4605 }, { "epoch": 27.771084337349397, "grad_norm": 3.1837735176086426, "learning_rate": 2.3142570281124498e-05, "loss": 0.3592, "step": 4610 }, { "epoch": 27.801204819277107, "grad_norm": 2.126497507095337, "learning_rate": 2.3167670682730924e-05, "loss": 0.2836, "step": 4615 }, { "epoch": 27.83132530120482, "grad_norm": 2.43947434425354, "learning_rate": 2.319277108433735e-05, "loss": 0.3092, "step": 4620 }, { "epoch": 27.86144578313253, "grad_norm": 2.086073875427246, "learning_rate": 2.3217871485943775e-05, "loss": 0.2689, "step": 4625 }, { "epoch": 27.89156626506024, "grad_norm": 2.582994222640991, "learning_rate": 2.32429718875502e-05, "loss": 0.3375, "step": 4630 }, { "epoch": 27.92168674698795, "grad_norm": 2.3924102783203125, "learning_rate": 2.326807228915663e-05, "loss": 0.3274, "step": 4635 }, { "epoch": 27.951807228915662, "grad_norm": 2.193665027618408, "learning_rate": 2.3293172690763055e-05, "loss": 0.3148, "step": 4640 }, { "epoch": 27.981927710843372, "grad_norm": 2.6703038215637207, "learning_rate": 2.331827309236948e-05, "loss": 0.2451, "step": 4645 }, { "epoch": 28.0, "eval_accuracy": 0.8761305544632324, "eval_auc": 0.9346111306552267, "eval_f1": 0.804953560371517, "eval_loss": 0.30121734738349915, "eval_precision": 0.8807588075880759, "eval_recall": 0.7411630558722919, "eval_runtime": 19.0477, "eval_samples_per_second": 133.507, "eval_steps_per_second": 0.682, "step": 4648 }, { "epoch": 28.012048192771083, "grad_norm": 2.621669054031372, "learning_rate": 2.3343373493975903e-05, "loss": 0.2893, "step": 4650 }, { "epoch": 28.042168674698797, "grad_norm": 2.453294277191162, "learning_rate": 2.336847389558233e-05, "loss": 0.3258, "step": 4655 }, { "epoch": 28.072289156626507, "grad_norm": 2.217541456222534, "learning_rate": 2.3393574297188755e-05, "loss": 0.2883, "step": 4660 }, { "epoch": 28.102409638554217, "grad_norm": 2.2983946800231934, "learning_rate": 2.341867469879518e-05, "loss": 0.3041, "step": 4665 }, { "epoch": 28.132530120481928, "grad_norm": 2.4966137409210205, "learning_rate": 2.3443775100401606e-05, "loss": 0.3251, "step": 4670 }, { "epoch": 28.162650602409638, "grad_norm": 2.3451552391052246, "learning_rate": 2.3468875502008035e-05, "loss": 0.3188, "step": 4675 }, { "epoch": 28.19277108433735, "grad_norm": 1.914646029472351, "learning_rate": 2.349397590361446e-05, "loss": 0.2955, "step": 4680 }, { "epoch": 28.22289156626506, "grad_norm": 2.2887744903564453, "learning_rate": 2.3519076305220886e-05, "loss": 0.3102, "step": 4685 }, { "epoch": 28.253012048192772, "grad_norm": 2.1860218048095703, "learning_rate": 2.3544176706827312e-05, "loss": 0.272, "step": 4690 }, { "epoch": 28.283132530120483, "grad_norm": 2.304988145828247, "learning_rate": 2.3569277108433734e-05, "loss": 0.3393, "step": 4695 }, { "epoch": 28.313253012048193, "grad_norm": 2.7805275917053223, "learning_rate": 2.359437751004016e-05, "loss": 0.2752, "step": 4700 }, { "epoch": 28.343373493975903, "grad_norm": 2.5115225315093994, "learning_rate": 2.3619477911646586e-05, "loss": 0.3172, "step": 4705 }, { "epoch": 28.373493975903614, "grad_norm": 1.7648457288742065, "learning_rate": 2.364457831325301e-05, "loss": 0.2794, "step": 4710 }, { "epoch": 28.403614457831324, "grad_norm": 2.6415631771087646, "learning_rate": 2.366967871485944e-05, "loss": 0.2879, "step": 4715 }, { "epoch": 28.433734939759034, "grad_norm": 2.885535478591919, "learning_rate": 2.3694779116465866e-05, "loss": 0.3089, "step": 4720 }, { "epoch": 28.46385542168675, "grad_norm": 2.2063450813293457, "learning_rate": 2.3719879518072292e-05, "loss": 0.3147, "step": 4725 }, { "epoch": 28.49397590361446, "grad_norm": 2.069905996322632, "learning_rate": 2.3744979919678718e-05, "loss": 0.2878, "step": 4730 }, { "epoch": 28.52409638554217, "grad_norm": 2.4482076168060303, "learning_rate": 2.3770080321285143e-05, "loss": 0.286, "step": 4735 }, { "epoch": 28.55421686746988, "grad_norm": 2.077254295349121, "learning_rate": 2.3795180722891565e-05, "loss": 0.2983, "step": 4740 }, { "epoch": 28.58433734939759, "grad_norm": 2.402545928955078, "learning_rate": 2.382028112449799e-05, "loss": 0.278, "step": 4745 }, { "epoch": 28.6144578313253, "grad_norm": 2.59379506111145, "learning_rate": 2.3845381526104417e-05, "loss": 0.3238, "step": 4750 }, { "epoch": 28.644578313253014, "grad_norm": 2.294840097427368, "learning_rate": 2.3870481927710846e-05, "loss": 0.26, "step": 4755 }, { "epoch": 28.674698795180724, "grad_norm": 2.939584732055664, "learning_rate": 2.389558232931727e-05, "loss": 0.2753, "step": 4760 }, { "epoch": 28.704819277108435, "grad_norm": 2.7921321392059326, "learning_rate": 2.3920682730923697e-05, "loss": 0.3366, "step": 4765 }, { "epoch": 28.734939759036145, "grad_norm": 2.302945375442505, "learning_rate": 2.3945783132530123e-05, "loss": 0.3063, "step": 4770 }, { "epoch": 28.765060240963855, "grad_norm": 3.6218788623809814, "learning_rate": 2.397088353413655e-05, "loss": 0.3335, "step": 4775 }, { "epoch": 28.795180722891565, "grad_norm": 2.533871650695801, "learning_rate": 2.399598393574297e-05, "loss": 0.2699, "step": 4780 }, { "epoch": 28.825301204819276, "grad_norm": 2.306363344192505, "learning_rate": 2.4021084337349397e-05, "loss": 0.3398, "step": 4785 }, { "epoch": 28.855421686746986, "grad_norm": 2.817366600036621, "learning_rate": 2.4046184738955822e-05, "loss": 0.3025, "step": 4790 }, { "epoch": 28.8855421686747, "grad_norm": 4.681380271911621, "learning_rate": 2.407128514056225e-05, "loss": 0.3139, "step": 4795 }, { "epoch": 28.91566265060241, "grad_norm": 2.63634991645813, "learning_rate": 2.4096385542168677e-05, "loss": 0.3137, "step": 4800 }, { "epoch": 28.94578313253012, "grad_norm": 2.8113434314727783, "learning_rate": 2.4121485943775103e-05, "loss": 0.3196, "step": 4805 }, { "epoch": 28.97590361445783, "grad_norm": 3.485682964324951, "learning_rate": 2.4146586345381528e-05, "loss": 0.3329, "step": 4810 }, { "epoch": 29.0, "eval_accuracy": 0.8871411718442784, "eval_auc": 0.942342045141888, "eval_f1": 0.8366533864541833, "eval_loss": 0.2750353217124939, "eval_precision": 0.8352272727272727, "eval_recall": 0.8380843785632839, "eval_runtime": 20.3781, "eval_samples_per_second": 124.791, "eval_steps_per_second": 0.638, "step": 4814 }, { "epoch": 29.00602409638554, "grad_norm": 2.1197657585144043, "learning_rate": 2.4171686746987954e-05, "loss": 0.2904, "step": 4815 }, { "epoch": 29.03614457831325, "grad_norm": 2.599250555038452, "learning_rate": 2.419678714859438e-05, "loss": 0.3039, "step": 4820 }, { "epoch": 29.066265060240966, "grad_norm": 2.422440767288208, "learning_rate": 2.4221887550200802e-05, "loss": 0.3298, "step": 4825 }, { "epoch": 29.096385542168676, "grad_norm": 2.069216012954712, "learning_rate": 2.4246987951807228e-05, "loss": 0.2861, "step": 4830 }, { "epoch": 29.126506024096386, "grad_norm": 2.8253629207611084, "learning_rate": 2.4272088353413657e-05, "loss": 0.3208, "step": 4835 }, { "epoch": 29.156626506024097, "grad_norm": 2.8499755859375, "learning_rate": 2.4297188755020082e-05, "loss": 0.2923, "step": 4840 }, { "epoch": 29.186746987951807, "grad_norm": 2.887667179107666, "learning_rate": 2.4322289156626508e-05, "loss": 0.2739, "step": 4845 }, { "epoch": 29.216867469879517, "grad_norm": 2.273495674133301, "learning_rate": 2.4347389558232934e-05, "loss": 0.2687, "step": 4850 }, { "epoch": 29.246987951807228, "grad_norm": 2.1487019062042236, "learning_rate": 2.437248995983936e-05, "loss": 0.2888, "step": 4855 }, { "epoch": 29.27710843373494, "grad_norm": 1.8572500944137573, "learning_rate": 2.4397590361445785e-05, "loss": 0.293, "step": 4860 }, { "epoch": 29.30722891566265, "grad_norm": 5.140269756317139, "learning_rate": 2.442269076305221e-05, "loss": 0.2901, "step": 4865 }, { "epoch": 29.337349397590362, "grad_norm": 2.208930730819702, "learning_rate": 2.4447791164658633e-05, "loss": 0.2808, "step": 4870 }, { "epoch": 29.367469879518072, "grad_norm": 2.0343732833862305, "learning_rate": 2.4472891566265062e-05, "loss": 0.278, "step": 4875 }, { "epoch": 29.397590361445783, "grad_norm": 2.5664570331573486, "learning_rate": 2.4497991967871488e-05, "loss": 0.3075, "step": 4880 }, { "epoch": 29.427710843373493, "grad_norm": 2.6145036220550537, "learning_rate": 2.4523092369477913e-05, "loss": 0.3107, "step": 4885 }, { "epoch": 29.457831325301203, "grad_norm": 2.0771539211273193, "learning_rate": 2.454819277108434e-05, "loss": 0.3047, "step": 4890 }, { "epoch": 29.487951807228917, "grad_norm": 2.758699893951416, "learning_rate": 2.4573293172690765e-05, "loss": 0.2777, "step": 4895 }, { "epoch": 29.518072289156628, "grad_norm": 2.6498754024505615, "learning_rate": 2.459839357429719e-05, "loss": 0.2903, "step": 4900 }, { "epoch": 29.548192771084338, "grad_norm": 1.9495166540145874, "learning_rate": 2.4623493975903616e-05, "loss": 0.2703, "step": 4905 }, { "epoch": 29.57831325301205, "grad_norm": 2.5841169357299805, "learning_rate": 2.4648594377510042e-05, "loss": 0.3075, "step": 4910 }, { "epoch": 29.60843373493976, "grad_norm": 2.615114688873291, "learning_rate": 2.4673694779116467e-05, "loss": 0.2809, "step": 4915 }, { "epoch": 29.63855421686747, "grad_norm": 2.9745523929595947, "learning_rate": 2.4698795180722893e-05, "loss": 0.3286, "step": 4920 }, { "epoch": 29.66867469879518, "grad_norm": 2.6587026119232178, "learning_rate": 2.472389558232932e-05, "loss": 0.3095, "step": 4925 }, { "epoch": 29.698795180722893, "grad_norm": 2.4806618690490723, "learning_rate": 2.4748995983935744e-05, "loss": 0.324, "step": 4930 }, { "epoch": 29.728915662650603, "grad_norm": 1.9983023405075073, "learning_rate": 2.477409638554217e-05, "loss": 0.3014, "step": 4935 }, { "epoch": 29.759036144578314, "grad_norm": 2.5685226917266846, "learning_rate": 2.4799196787148596e-05, "loss": 0.2951, "step": 4940 }, { "epoch": 29.789156626506024, "grad_norm": 2.277895927429199, "learning_rate": 2.482429718875502e-05, "loss": 0.2378, "step": 4945 }, { "epoch": 29.819277108433734, "grad_norm": 3.36366605758667, "learning_rate": 2.4849397590361447e-05, "loss": 0.2895, "step": 4950 }, { "epoch": 29.849397590361445, "grad_norm": 2.543626070022583, "learning_rate": 2.4874497991967873e-05, "loss": 0.3248, "step": 4955 }, { "epoch": 29.879518072289155, "grad_norm": 2.6698496341705322, "learning_rate": 2.48995983935743e-05, "loss": 0.3171, "step": 4960 }, { "epoch": 29.90963855421687, "grad_norm": 2.6674630641937256, "learning_rate": 2.4924698795180724e-05, "loss": 0.323, "step": 4965 }, { "epoch": 29.93975903614458, "grad_norm": 1.9268379211425781, "learning_rate": 2.494979919678715e-05, "loss": 0.3019, "step": 4970 }, { "epoch": 29.96987951807229, "grad_norm": 2.4100546836853027, "learning_rate": 2.4974899598393576e-05, "loss": 0.3307, "step": 4975 }, { "epoch": 30.0, "grad_norm": 2.917056083679199, "learning_rate": 2.5e-05, "loss": 0.3131, "step": 4980 }, { "epoch": 30.0, "eval_accuracy": 0.8906802988596146, "eval_auc": 0.9512488005464445, "eval_f1": 0.8398617511520737, "eval_loss": 0.24600017070770264, "eval_precision": 0.8486612339930152, "eval_recall": 0.8312428734321551, "eval_runtime": 17.5103, "eval_samples_per_second": 145.229, "eval_steps_per_second": 0.742, "step": 4980 }, { "epoch": 30.03012048192771, "grad_norm": 2.3530986309051514, "learning_rate": 2.5025100401606427e-05, "loss": 0.2913, "step": 4985 }, { "epoch": 30.06024096385542, "grad_norm": 2.8278725147247314, "learning_rate": 2.5050200803212853e-05, "loss": 0.2972, "step": 4990 }, { "epoch": 30.09036144578313, "grad_norm": 2.1610164642333984, "learning_rate": 2.5075301204819278e-05, "loss": 0.2711, "step": 4995 }, { "epoch": 30.120481927710845, "grad_norm": 2.423787832260132, "learning_rate": 2.5100401606425704e-05, "loss": 0.2504, "step": 5000 }, { "epoch": 30.150602409638555, "grad_norm": 3.632481575012207, "learning_rate": 2.5125502008032133e-05, "loss": 0.2882, "step": 5005 }, { "epoch": 30.180722891566266, "grad_norm": 3.5192201137542725, "learning_rate": 2.515060240963856e-05, "loss": 0.3301, "step": 5010 }, { "epoch": 30.210843373493976, "grad_norm": 2.7295660972595215, "learning_rate": 2.5175702811244984e-05, "loss": 0.272, "step": 5015 }, { "epoch": 30.240963855421686, "grad_norm": 2.8712375164031982, "learning_rate": 2.520080321285141e-05, "loss": 0.2634, "step": 5020 }, { "epoch": 30.271084337349397, "grad_norm": 2.6461923122406006, "learning_rate": 2.522590361445783e-05, "loss": 0.277, "step": 5025 }, { "epoch": 30.301204819277107, "grad_norm": 2.4615397453308105, "learning_rate": 2.5251004016064255e-05, "loss": 0.3056, "step": 5030 }, { "epoch": 30.33132530120482, "grad_norm": 2.3607475757598877, "learning_rate": 2.527610441767068e-05, "loss": 0.287, "step": 5035 }, { "epoch": 30.36144578313253, "grad_norm": 2.6344406604766846, "learning_rate": 2.530120481927711e-05, "loss": 0.3256, "step": 5040 }, { "epoch": 30.39156626506024, "grad_norm": 2.357358455657959, "learning_rate": 2.5326305220883535e-05, "loss": 0.2615, "step": 5045 }, { "epoch": 30.42168674698795, "grad_norm": 4.351990699768066, "learning_rate": 2.535140562248996e-05, "loss": 0.3105, "step": 5050 }, { "epoch": 30.451807228915662, "grad_norm": 2.7113723754882812, "learning_rate": 2.5376506024096386e-05, "loss": 0.2849, "step": 5055 }, { "epoch": 30.481927710843372, "grad_norm": 1.9872444868087769, "learning_rate": 2.5401606425702812e-05, "loss": 0.2922, "step": 5060 }, { "epoch": 30.512048192771083, "grad_norm": 2.4583382606506348, "learning_rate": 2.5426706827309238e-05, "loss": 0.3054, "step": 5065 }, { "epoch": 30.542168674698797, "grad_norm": 2.36474871635437, "learning_rate": 2.5451807228915663e-05, "loss": 0.2723, "step": 5070 }, { "epoch": 30.572289156626507, "grad_norm": 2.877342700958252, "learning_rate": 2.547690763052209e-05, "loss": 0.3518, "step": 5075 }, { "epoch": 30.602409638554217, "grad_norm": 2.3828818798065186, "learning_rate": 2.5502008032128515e-05, "loss": 0.334, "step": 5080 }, { "epoch": 30.632530120481928, "grad_norm": 2.944418430328369, "learning_rate": 2.552710843373494e-05, "loss": 0.2791, "step": 5085 }, { "epoch": 30.662650602409638, "grad_norm": 2.43225359916687, "learning_rate": 2.555220883534137e-05, "loss": 0.3237, "step": 5090 }, { "epoch": 30.69277108433735, "grad_norm": 2.221041202545166, "learning_rate": 2.5577309236947795e-05, "loss": 0.306, "step": 5095 }, { "epoch": 30.72289156626506, "grad_norm": 2.513659715652466, "learning_rate": 2.560240963855422e-05, "loss": 0.2948, "step": 5100 }, { "epoch": 30.753012048192772, "grad_norm": 1.9354315996170044, "learning_rate": 2.5627510040160646e-05, "loss": 0.3008, "step": 5105 }, { "epoch": 30.783132530120483, "grad_norm": 3.294189929962158, "learning_rate": 2.5652610441767072e-05, "loss": 0.3362, "step": 5110 }, { "epoch": 30.813253012048193, "grad_norm": 3.190519094467163, "learning_rate": 2.567771084337349e-05, "loss": 0.3287, "step": 5115 }, { "epoch": 30.843373493975903, "grad_norm": 2.336742639541626, "learning_rate": 2.570281124497992e-05, "loss": 0.3103, "step": 5120 }, { "epoch": 30.873493975903614, "grad_norm": 2.363055944442749, "learning_rate": 2.5727911646586346e-05, "loss": 0.3117, "step": 5125 }, { "epoch": 30.903614457831324, "grad_norm": 2.6356406211853027, "learning_rate": 2.575301204819277e-05, "loss": 0.2972, "step": 5130 }, { "epoch": 30.933734939759034, "grad_norm": 2.487208366394043, "learning_rate": 2.5778112449799197e-05, "loss": 0.2578, "step": 5135 }, { "epoch": 30.96385542168675, "grad_norm": 2.5588066577911377, "learning_rate": 2.5803212851405623e-05, "loss": 0.276, "step": 5140 }, { "epoch": 30.99397590361446, "grad_norm": 2.008023977279663, "learning_rate": 2.582831325301205e-05, "loss": 0.2947, "step": 5145 }, { "epoch": 31.0, "eval_accuracy": 0.892253244199764, "eval_auc": 0.9521710622675524, "eval_f1": 0.8399532710280374, "eval_loss": 0.25605228543281555, "eval_precision": 0.8610778443113772, "eval_recall": 0.8198403648802737, "eval_runtime": 19.4574, "eval_samples_per_second": 130.695, "eval_steps_per_second": 0.668, "step": 5146 }, { "epoch": 31.02409638554217, "grad_norm": 2.0197880268096924, "learning_rate": 2.5853413654618474e-05, "loss": 0.2584, "step": 5150 }, { "epoch": 31.05421686746988, "grad_norm": 3.194411516189575, "learning_rate": 2.58785140562249e-05, "loss": 0.2708, "step": 5155 }, { "epoch": 31.08433734939759, "grad_norm": 2.815106153488159, "learning_rate": 2.5903614457831325e-05, "loss": 0.265, "step": 5160 }, { "epoch": 31.1144578313253, "grad_norm": 2.1786208152770996, "learning_rate": 2.592871485943775e-05, "loss": 0.2492, "step": 5165 }, { "epoch": 31.14457831325301, "grad_norm": 2.171779155731201, "learning_rate": 2.595381526104418e-05, "loss": 0.2593, "step": 5170 }, { "epoch": 31.174698795180724, "grad_norm": 2.532412052154541, "learning_rate": 2.5978915662650606e-05, "loss": 0.2688, "step": 5175 }, { "epoch": 31.204819277108435, "grad_norm": 2.324094295501709, "learning_rate": 2.600401606425703e-05, "loss": 0.2687, "step": 5180 }, { "epoch": 31.234939759036145, "grad_norm": 2.6688435077667236, "learning_rate": 2.6029116465863457e-05, "loss": 0.2775, "step": 5185 }, { "epoch": 31.265060240963855, "grad_norm": 2.4075028896331787, "learning_rate": 2.6054216867469883e-05, "loss": 0.2541, "step": 5190 }, { "epoch": 31.295180722891565, "grad_norm": 2.7049026489257812, "learning_rate": 2.607931726907631e-05, "loss": 0.2834, "step": 5195 }, { "epoch": 31.325301204819276, "grad_norm": 3.187969923019409, "learning_rate": 2.6104417670682734e-05, "loss": 0.3352, "step": 5200 }, { "epoch": 31.355421686746986, "grad_norm": 3.6192030906677246, "learning_rate": 2.6129518072289157e-05, "loss": 0.2842, "step": 5205 }, { "epoch": 31.3855421686747, "grad_norm": 2.197908878326416, "learning_rate": 2.6154618473895582e-05, "loss": 0.2872, "step": 5210 }, { "epoch": 31.41566265060241, "grad_norm": 2.4250454902648926, "learning_rate": 2.6179718875502008e-05, "loss": 0.2835, "step": 5215 }, { "epoch": 31.44578313253012, "grad_norm": 2.701233148574829, "learning_rate": 2.6204819277108434e-05, "loss": 0.2726, "step": 5220 }, { "epoch": 31.47590361445783, "grad_norm": 2.1720259189605713, "learning_rate": 2.622991967871486e-05, "loss": 0.2698, "step": 5225 }, { "epoch": 31.50602409638554, "grad_norm": 2.6421265602111816, "learning_rate": 2.6255020080321285e-05, "loss": 0.2931, "step": 5230 }, { "epoch": 31.53614457831325, "grad_norm": 3.280252695083618, "learning_rate": 2.628012048192771e-05, "loss": 0.299, "step": 5235 }, { "epoch": 31.566265060240966, "grad_norm": 2.5766305923461914, "learning_rate": 2.6305220883534136e-05, "loss": 0.3077, "step": 5240 }, { "epoch": 31.596385542168676, "grad_norm": 2.385478973388672, "learning_rate": 2.6330321285140562e-05, "loss": 0.2979, "step": 5245 }, { "epoch": 31.626506024096386, "grad_norm": 2.208635091781616, "learning_rate": 2.635542168674699e-05, "loss": 0.2661, "step": 5250 }, { "epoch": 31.656626506024097, "grad_norm": 3.1388778686523438, "learning_rate": 2.6380522088353417e-05, "loss": 0.3259, "step": 5255 }, { "epoch": 31.686746987951807, "grad_norm": 2.6264169216156006, "learning_rate": 2.6405622489959842e-05, "loss": 0.2976, "step": 5260 }, { "epoch": 31.716867469879517, "grad_norm": 2.8914477825164795, "learning_rate": 2.6430722891566268e-05, "loss": 0.2797, "step": 5265 }, { "epoch": 31.746987951807228, "grad_norm": 1.6656322479248047, "learning_rate": 2.6455823293172694e-05, "loss": 0.2702, "step": 5270 }, { "epoch": 31.77710843373494, "grad_norm": 2.7814254760742188, "learning_rate": 2.648092369477912e-05, "loss": 0.3048, "step": 5275 }, { "epoch": 31.80722891566265, "grad_norm": 2.2331786155700684, "learning_rate": 2.6506024096385545e-05, "loss": 0.2608, "step": 5280 }, { "epoch": 31.837349397590362, "grad_norm": 3.587602138519287, "learning_rate": 2.653112449799197e-05, "loss": 0.2824, "step": 5285 }, { "epoch": 31.867469879518072, "grad_norm": 2.4856202602386475, "learning_rate": 2.6556224899598396e-05, "loss": 0.2636, "step": 5290 }, { "epoch": 31.897590361445783, "grad_norm": 1.821752667427063, "learning_rate": 2.658132530120482e-05, "loss": 0.2835, "step": 5295 }, { "epoch": 31.927710843373493, "grad_norm": 2.444525957107544, "learning_rate": 2.6606425702811244e-05, "loss": 0.2744, "step": 5300 }, { "epoch": 31.957831325301203, "grad_norm": 3.3772637844085693, "learning_rate": 2.663152610441767e-05, "loss": 0.3373, "step": 5305 }, { "epoch": 31.987951807228917, "grad_norm": 2.0322577953338623, "learning_rate": 2.6656626506024096e-05, "loss": 0.2789, "step": 5310 }, { "epoch": 32.0, "eval_accuracy": 0.8930397168698387, "eval_auc": 0.9527353700887423, "eval_f1": 0.8434982738780207, "eval_loss": 0.25190040469169617, "eval_precision": 0.851335656213705, "eval_recall": 0.8358038768529077, "eval_runtime": 18.443, "eval_samples_per_second": 137.884, "eval_steps_per_second": 0.705, "step": 5312 }, { "epoch": 32.01807228915663, "grad_norm": 3.5874390602111816, "learning_rate": 2.668172690763052e-05, "loss": 0.3335, "step": 5315 }, { "epoch": 32.04819277108434, "grad_norm": 3.012038469314575, "learning_rate": 2.6706827309236947e-05, "loss": 0.2645, "step": 5320 }, { "epoch": 32.07831325301205, "grad_norm": 2.6814894676208496, "learning_rate": 2.6731927710843373e-05, "loss": 0.2605, "step": 5325 }, { "epoch": 32.10843373493976, "grad_norm": 2.470123052597046, "learning_rate": 2.6757028112449802e-05, "loss": 0.252, "step": 5330 }, { "epoch": 32.13855421686747, "grad_norm": 2.329923629760742, "learning_rate": 2.6782128514056227e-05, "loss": 0.2885, "step": 5335 }, { "epoch": 32.16867469879518, "grad_norm": 2.523280382156372, "learning_rate": 2.6807228915662653e-05, "loss": 0.2695, "step": 5340 }, { "epoch": 32.19879518072289, "grad_norm": 2.2069311141967773, "learning_rate": 2.683232931726908e-05, "loss": 0.2958, "step": 5345 }, { "epoch": 32.2289156626506, "grad_norm": 2.726677417755127, "learning_rate": 2.6857429718875504e-05, "loss": 0.3159, "step": 5350 }, { "epoch": 32.25903614457831, "grad_norm": 3.455636739730835, "learning_rate": 2.688253012048193e-05, "loss": 0.3096, "step": 5355 }, { "epoch": 32.28915662650602, "grad_norm": 2.2268497943878174, "learning_rate": 2.6907630522088356e-05, "loss": 0.2715, "step": 5360 }, { "epoch": 32.31927710843374, "grad_norm": 2.406238317489624, "learning_rate": 2.693273092369478e-05, "loss": 0.2928, "step": 5365 }, { "epoch": 32.34939759036145, "grad_norm": 2.166278123855591, "learning_rate": 2.6957831325301207e-05, "loss": 0.2765, "step": 5370 }, { "epoch": 32.37951807228916, "grad_norm": 1.9824190139770508, "learning_rate": 2.6982931726907633e-05, "loss": 0.3195, "step": 5375 }, { "epoch": 32.40963855421687, "grad_norm": 1.7944291830062866, "learning_rate": 2.7008032128514062e-05, "loss": 0.2438, "step": 5380 }, { "epoch": 32.43975903614458, "grad_norm": 2.768326759338379, "learning_rate": 2.703313253012048e-05, "loss": 0.3254, "step": 5385 }, { "epoch": 32.46987951807229, "grad_norm": 2.4229800701141357, "learning_rate": 2.7058232931726906e-05, "loss": 0.2791, "step": 5390 }, { "epoch": 32.5, "grad_norm": 2.3203210830688477, "learning_rate": 2.7083333333333332e-05, "loss": 0.2886, "step": 5395 }, { "epoch": 32.53012048192771, "grad_norm": 2.4342074394226074, "learning_rate": 2.7108433734939758e-05, "loss": 0.273, "step": 5400 }, { "epoch": 32.56024096385542, "grad_norm": 2.3602750301361084, "learning_rate": 2.7133534136546183e-05, "loss": 0.248, "step": 5405 }, { "epoch": 32.59036144578313, "grad_norm": 2.7906014919281006, "learning_rate": 2.7158634538152612e-05, "loss": 0.3255, "step": 5410 }, { "epoch": 32.62048192771084, "grad_norm": 2.2469797134399414, "learning_rate": 2.7183734939759038e-05, "loss": 0.2498, "step": 5415 }, { "epoch": 32.65060240963855, "grad_norm": 2.7367377281188965, "learning_rate": 2.7208835341365464e-05, "loss": 0.2596, "step": 5420 }, { "epoch": 32.68072289156626, "grad_norm": 2.7294719219207764, "learning_rate": 2.723393574297189e-05, "loss": 0.28, "step": 5425 }, { "epoch": 32.71084337349397, "grad_norm": 1.9965656995773315, "learning_rate": 2.7259036144578315e-05, "loss": 0.2654, "step": 5430 }, { "epoch": 32.74096385542169, "grad_norm": 2.9363596439361572, "learning_rate": 2.728413654618474e-05, "loss": 0.3083, "step": 5435 }, { "epoch": 32.7710843373494, "grad_norm": 2.3121836185455322, "learning_rate": 2.7309236947791167e-05, "loss": 0.3089, "step": 5440 }, { "epoch": 32.80120481927711, "grad_norm": 2.360670566558838, "learning_rate": 2.7334337349397592e-05, "loss": 0.3105, "step": 5445 }, { "epoch": 32.83132530120482, "grad_norm": 2.362351417541504, "learning_rate": 2.7359437751004018e-05, "loss": 0.2843, "step": 5450 }, { "epoch": 32.86144578313253, "grad_norm": 2.308806896209717, "learning_rate": 2.7384538152610444e-05, "loss": 0.2634, "step": 5455 }, { "epoch": 32.89156626506024, "grad_norm": 2.528810739517212, "learning_rate": 2.7409638554216873e-05, "loss": 0.2863, "step": 5460 }, { "epoch": 32.92168674698795, "grad_norm": 2.1996474266052246, "learning_rate": 2.7434738955823298e-05, "loss": 0.2991, "step": 5465 }, { "epoch": 32.95180722891566, "grad_norm": 2.9335763454437256, "learning_rate": 2.7459839357429717e-05, "loss": 0.2628, "step": 5470 }, { "epoch": 32.98192771084337, "grad_norm": 3.3871850967407227, "learning_rate": 2.7484939759036143e-05, "loss": 0.2774, "step": 5475 }, { "epoch": 33.0, "eval_accuracy": 0.9044435705859222, "eval_auc": 0.9555521182247131, "eval_f1": 0.8504615384615385, "eval_loss": 0.253192663192749, "eval_precision": 0.9237967914438503, "eval_recall": 0.7879133409350056, "eval_runtime": 17.3797, "eval_samples_per_second": 146.32, "eval_steps_per_second": 0.748, "step": 5478 }, { "epoch": 33.01204819277108, "grad_norm": 2.615572929382324, "learning_rate": 2.751004016064257e-05, "loss": 0.2574, "step": 5480 }, { "epoch": 33.04216867469879, "grad_norm": 1.9657039642333984, "learning_rate": 2.7535140562248994e-05, "loss": 0.2799, "step": 5485 }, { "epoch": 33.0722891566265, "grad_norm": 2.3047263622283936, "learning_rate": 2.756024096385542e-05, "loss": 0.2787, "step": 5490 }, { "epoch": 33.102409638554214, "grad_norm": 1.813161849975586, "learning_rate": 2.758534136546185e-05, "loss": 0.234, "step": 5495 }, { "epoch": 33.13253012048193, "grad_norm": 2.4800333976745605, "learning_rate": 2.7610441767068275e-05, "loss": 0.2717, "step": 5500 }, { "epoch": 33.16265060240964, "grad_norm": 1.8988195657730103, "learning_rate": 2.76355421686747e-05, "loss": 0.2628, "step": 5505 }, { "epoch": 33.19277108433735, "grad_norm": 2.371368885040283, "learning_rate": 2.7660642570281126e-05, "loss": 0.2665, "step": 5510 }, { "epoch": 33.22289156626506, "grad_norm": 2.7762610912323, "learning_rate": 2.768574297188755e-05, "loss": 0.2797, "step": 5515 }, { "epoch": 33.25301204819277, "grad_norm": 1.872565746307373, "learning_rate": 2.7710843373493977e-05, "loss": 0.2721, "step": 5520 }, { "epoch": 33.28313253012048, "grad_norm": 2.8854243755340576, "learning_rate": 2.7735943775100403e-05, "loss": 0.306, "step": 5525 }, { "epoch": 33.31325301204819, "grad_norm": 2.694687843322754, "learning_rate": 2.776104417670683e-05, "loss": 0.2859, "step": 5530 }, { "epoch": 33.3433734939759, "grad_norm": 2.2184691429138184, "learning_rate": 2.7786144578313254e-05, "loss": 0.2645, "step": 5535 }, { "epoch": 33.373493975903614, "grad_norm": 3.2997360229492188, "learning_rate": 2.781124497991968e-05, "loss": 0.2841, "step": 5540 }, { "epoch": 33.403614457831324, "grad_norm": 2.774646520614624, "learning_rate": 2.783634538152611e-05, "loss": 0.2917, "step": 5545 }, { "epoch": 33.433734939759034, "grad_norm": 1.8466839790344238, "learning_rate": 2.7861445783132535e-05, "loss": 0.2519, "step": 5550 }, { "epoch": 33.463855421686745, "grad_norm": 2.7159290313720703, "learning_rate": 2.788654618473896e-05, "loss": 0.2991, "step": 5555 }, { "epoch": 33.493975903614455, "grad_norm": 2.4454333782196045, "learning_rate": 2.791164658634538e-05, "loss": 0.2997, "step": 5560 }, { "epoch": 33.524096385542165, "grad_norm": 2.329941987991333, "learning_rate": 2.7936746987951805e-05, "loss": 0.2437, "step": 5565 }, { "epoch": 33.55421686746988, "grad_norm": 2.7902016639709473, "learning_rate": 2.796184738955823e-05, "loss": 0.2614, "step": 5570 }, { "epoch": 33.58433734939759, "grad_norm": 2.3303303718566895, "learning_rate": 2.798694779116466e-05, "loss": 0.2841, "step": 5575 }, { "epoch": 33.6144578313253, "grad_norm": 2.3831629753112793, "learning_rate": 2.8012048192771085e-05, "loss": 0.2626, "step": 5580 }, { "epoch": 33.644578313253014, "grad_norm": 1.8426225185394287, "learning_rate": 2.803714859437751e-05, "loss": 0.2828, "step": 5585 }, { "epoch": 33.674698795180724, "grad_norm": 2.2950408458709717, "learning_rate": 2.8062248995983937e-05, "loss": 0.2557, "step": 5590 }, { "epoch": 33.704819277108435, "grad_norm": 2.125751256942749, "learning_rate": 2.8087349397590362e-05, "loss": 0.2585, "step": 5595 }, { "epoch": 33.734939759036145, "grad_norm": 2.5507137775421143, "learning_rate": 2.8112449799196788e-05, "loss": 0.2856, "step": 5600 }, { "epoch": 33.765060240963855, "grad_norm": 3.3059866428375244, "learning_rate": 2.8137550200803214e-05, "loss": 0.282, "step": 5605 }, { "epoch": 33.795180722891565, "grad_norm": 2.6528871059417725, "learning_rate": 2.816265060240964e-05, "loss": 0.3061, "step": 5610 }, { "epoch": 33.825301204819276, "grad_norm": 3.684744358062744, "learning_rate": 2.8187751004016065e-05, "loss": 0.2988, "step": 5615 }, { "epoch": 33.855421686746986, "grad_norm": 2.484884738922119, "learning_rate": 2.821285140562249e-05, "loss": 0.2788, "step": 5620 }, { "epoch": 33.8855421686747, "grad_norm": 1.8708103895187378, "learning_rate": 2.823795180722892e-05, "loss": 0.236, "step": 5625 }, { "epoch": 33.91566265060241, "grad_norm": 2.291910171508789, "learning_rate": 2.8263052208835346e-05, "loss": 0.2555, "step": 5630 }, { "epoch": 33.94578313253012, "grad_norm": 2.124656915664673, "learning_rate": 2.828815261044177e-05, "loss": 0.2616, "step": 5635 }, { "epoch": 33.975903614457835, "grad_norm": 3.6708576679229736, "learning_rate": 2.8313253012048197e-05, "loss": 0.2232, "step": 5640 }, { "epoch": 34.0, "eval_accuracy": 0.9040503342508848, "eval_auc": 0.9544108407331006, "eval_f1": 0.8586326767091541, "eval_loss": 0.2504521608352661, "eval_precision": 0.872791519434629, "eval_recall": 0.8449258836944128, "eval_runtime": 17.3902, "eval_samples_per_second": 146.232, "eval_steps_per_second": 0.748, "step": 5644 }, { "epoch": 34.006024096385545, "grad_norm": 3.1373565196990967, "learning_rate": 2.8338353413654623e-05, "loss": 0.331, "step": 5645 }, { "epoch": 34.036144578313255, "grad_norm": 2.5919554233551025, "learning_rate": 2.836345381526104e-05, "loss": 0.2949, "step": 5650 }, { "epoch": 34.066265060240966, "grad_norm": 2.182943344116211, "learning_rate": 2.838855421686747e-05, "loss": 0.2116, "step": 5655 }, { "epoch": 34.096385542168676, "grad_norm": 2.259359121322632, "learning_rate": 2.8413654618473896e-05, "loss": 0.2538, "step": 5660 }, { "epoch": 34.126506024096386, "grad_norm": 2.015058755874634, "learning_rate": 2.8438755020080322e-05, "loss": 0.2232, "step": 5665 }, { "epoch": 34.1566265060241, "grad_norm": 1.9988059997558594, "learning_rate": 2.8463855421686748e-05, "loss": 0.2737, "step": 5670 }, { "epoch": 34.18674698795181, "grad_norm": 2.2244763374328613, "learning_rate": 2.8488955823293173e-05, "loss": 0.2608, "step": 5675 }, { "epoch": 34.21686746987952, "grad_norm": 2.8359451293945312, "learning_rate": 2.85140562248996e-05, "loss": 0.2806, "step": 5680 }, { "epoch": 34.24698795180723, "grad_norm": 2.3713274002075195, "learning_rate": 2.8539156626506025e-05, "loss": 0.2939, "step": 5685 }, { "epoch": 34.27710843373494, "grad_norm": 2.9533097743988037, "learning_rate": 2.856425702811245e-05, "loss": 0.2589, "step": 5690 }, { "epoch": 34.30722891566265, "grad_norm": 2.3739044666290283, "learning_rate": 2.8589357429718876e-05, "loss": 0.2415, "step": 5695 }, { "epoch": 34.33734939759036, "grad_norm": 2.4942517280578613, "learning_rate": 2.86144578313253e-05, "loss": 0.2825, "step": 5700 }, { "epoch": 34.36746987951807, "grad_norm": 2.3502275943756104, "learning_rate": 2.863955823293173e-05, "loss": 0.2321, "step": 5705 }, { "epoch": 34.397590361445786, "grad_norm": 2.2536942958831787, "learning_rate": 2.8664658634538156e-05, "loss": 0.2823, "step": 5710 }, { "epoch": 34.4277108433735, "grad_norm": 2.001239776611328, "learning_rate": 2.8689759036144582e-05, "loss": 0.2799, "step": 5715 }, { "epoch": 34.45783132530121, "grad_norm": 2.6977627277374268, "learning_rate": 2.8714859437751008e-05, "loss": 0.2941, "step": 5720 }, { "epoch": 34.48795180722892, "grad_norm": 2.146167516708374, "learning_rate": 2.8739959839357433e-05, "loss": 0.2154, "step": 5725 }, { "epoch": 34.51807228915663, "grad_norm": 2.3009486198425293, "learning_rate": 2.876506024096386e-05, "loss": 0.2217, "step": 5730 }, { "epoch": 34.54819277108434, "grad_norm": 6.846885681152344, "learning_rate": 2.8790160642570285e-05, "loss": 0.25, "step": 5735 }, { "epoch": 34.57831325301205, "grad_norm": 3.1928577423095703, "learning_rate": 2.8815261044176707e-05, "loss": 0.3159, "step": 5740 }, { "epoch": 34.60843373493976, "grad_norm": 2.9763200283050537, "learning_rate": 2.8840361445783133e-05, "loss": 0.2633, "step": 5745 }, { "epoch": 34.63855421686747, "grad_norm": 2.920257091522217, "learning_rate": 2.8865461847389558e-05, "loss": 0.2693, "step": 5750 }, { "epoch": 34.66867469879518, "grad_norm": 2.94160532951355, "learning_rate": 2.8890562248995984e-05, "loss": 0.2848, "step": 5755 }, { "epoch": 34.69879518072289, "grad_norm": 2.237041473388672, "learning_rate": 2.891566265060241e-05, "loss": 0.2573, "step": 5760 }, { "epoch": 34.7289156626506, "grad_norm": 2.879662036895752, "learning_rate": 2.8940763052208835e-05, "loss": 0.2577, "step": 5765 }, { "epoch": 34.75903614457831, "grad_norm": 2.4831583499908447, "learning_rate": 2.896586345381526e-05, "loss": 0.2173, "step": 5770 }, { "epoch": 34.78915662650603, "grad_norm": 3.967815399169922, "learning_rate": 2.8990963855421687e-05, "loss": 0.274, "step": 5775 }, { "epoch": 34.81927710843374, "grad_norm": 3.6488428115844727, "learning_rate": 2.9016064257028112e-05, "loss": 0.3035, "step": 5780 }, { "epoch": 34.84939759036145, "grad_norm": 2.2879462242126465, "learning_rate": 2.904116465863454e-05, "loss": 0.2783, "step": 5785 }, { "epoch": 34.87951807228916, "grad_norm": 2.7552425861358643, "learning_rate": 2.9066265060240967e-05, "loss": 0.2255, "step": 5790 }, { "epoch": 34.90963855421687, "grad_norm": 2.1789631843566895, "learning_rate": 2.9091365461847393e-05, "loss": 0.2339, "step": 5795 }, { "epoch": 34.93975903614458, "grad_norm": 1.7293847799301147, "learning_rate": 2.911646586345382e-05, "loss": 0.2709, "step": 5800 }, { "epoch": 34.96987951807229, "grad_norm": 2.7016797065734863, "learning_rate": 2.9141566265060244e-05, "loss": 0.2735, "step": 5805 }, { "epoch": 35.0, "grad_norm": 2.5335707664489746, "learning_rate": 2.916666666666667e-05, "loss": 0.279, "step": 5810 }, { "epoch": 35.0, "eval_accuracy": 0.9028706252457727, "eval_auc": 0.956116426045903, "eval_f1": 0.8445563247325362, "eval_loss": 0.2674693167209625, "eval_precision": 0.9424157303370787, "eval_recall": 0.7651083238312428, "eval_runtime": 17.6831, "eval_samples_per_second": 143.81, "eval_steps_per_second": 0.735, "step": 5810 }, { "epoch": 35.03012048192771, "grad_norm": 2.46142578125, "learning_rate": 2.9191767068273095e-05, "loss": 0.2537, "step": 5815 }, { "epoch": 35.06024096385542, "grad_norm": 3.206148862838745, "learning_rate": 2.921686746987952e-05, "loss": 0.2767, "step": 5820 }, { "epoch": 35.09036144578313, "grad_norm": 2.477686882019043, "learning_rate": 2.9241967871485943e-05, "loss": 0.2423, "step": 5825 }, { "epoch": 35.12048192771084, "grad_norm": 3.08777117729187, "learning_rate": 2.926706827309237e-05, "loss": 0.2657, "step": 5830 }, { "epoch": 35.15060240963855, "grad_norm": 2.131882429122925, "learning_rate": 2.9292168674698795e-05, "loss": 0.245, "step": 5835 }, { "epoch": 35.18072289156626, "grad_norm": 2.3372786045074463, "learning_rate": 2.931726907630522e-05, "loss": 0.2793, "step": 5840 }, { "epoch": 35.21084337349398, "grad_norm": 2.8479580879211426, "learning_rate": 2.9342369477911646e-05, "loss": 0.2144, "step": 5845 }, { "epoch": 35.24096385542169, "grad_norm": 3.9904942512512207, "learning_rate": 2.9367469879518072e-05, "loss": 0.2547, "step": 5850 }, { "epoch": 35.2710843373494, "grad_norm": 2.9604651927948, "learning_rate": 2.9392570281124497e-05, "loss": 0.2963, "step": 5855 }, { "epoch": 35.30120481927711, "grad_norm": 2.49530029296875, "learning_rate": 2.9417670682730923e-05, "loss": 0.2565, "step": 5860 }, { "epoch": 35.33132530120482, "grad_norm": 1.704471468925476, "learning_rate": 2.9442771084337352e-05, "loss": 0.254, "step": 5865 }, { "epoch": 35.36144578313253, "grad_norm": 2.494004964828491, "learning_rate": 2.9467871485943778e-05, "loss": 0.2319, "step": 5870 }, { "epoch": 35.39156626506024, "grad_norm": 2.478168487548828, "learning_rate": 2.9492971887550204e-05, "loss": 0.2676, "step": 5875 }, { "epoch": 35.42168674698795, "grad_norm": 2.5450990200042725, "learning_rate": 2.951807228915663e-05, "loss": 0.2246, "step": 5880 }, { "epoch": 35.45180722891566, "grad_norm": 1.7174454927444458, "learning_rate": 2.9543172690763055e-05, "loss": 0.2338, "step": 5885 }, { "epoch": 35.48192771084337, "grad_norm": 2.448060989379883, "learning_rate": 2.956827309236948e-05, "loss": 0.2705, "step": 5890 }, { "epoch": 35.51204819277108, "grad_norm": 2.077549457550049, "learning_rate": 2.9593373493975906e-05, "loss": 0.2549, "step": 5895 }, { "epoch": 35.54216867469879, "grad_norm": 2.6109633445739746, "learning_rate": 2.9618473895582332e-05, "loss": 0.2472, "step": 5900 }, { "epoch": 35.5722891566265, "grad_norm": 2.551146984100342, "learning_rate": 2.9643574297188758e-05, "loss": 0.268, "step": 5905 }, { "epoch": 35.602409638554214, "grad_norm": 2.3147637844085693, "learning_rate": 2.9668674698795183e-05, "loss": 0.23, "step": 5910 }, { "epoch": 35.63253012048193, "grad_norm": 2.584393262863159, "learning_rate": 2.9693775100401606e-05, "loss": 0.2507, "step": 5915 }, { "epoch": 35.66265060240964, "grad_norm": 2.71573543548584, "learning_rate": 2.971887550200803e-05, "loss": 0.3107, "step": 5920 }, { "epoch": 35.69277108433735, "grad_norm": 2.746415376663208, "learning_rate": 2.9743975903614457e-05, "loss": 0.2501, "step": 5925 }, { "epoch": 35.72289156626506, "grad_norm": 1.9923425912857056, "learning_rate": 2.9769076305220883e-05, "loss": 0.2986, "step": 5930 }, { "epoch": 35.75301204819277, "grad_norm": 2.963034152984619, "learning_rate": 2.9794176706827308e-05, "loss": 0.2979, "step": 5935 }, { "epoch": 35.78313253012048, "grad_norm": 2.78971791267395, "learning_rate": 2.9819277108433734e-05, "loss": 0.2302, "step": 5940 }, { "epoch": 35.81325301204819, "grad_norm": 1.9936814308166504, "learning_rate": 2.9844377510040163e-05, "loss": 0.2608, "step": 5945 }, { "epoch": 35.8433734939759, "grad_norm": 2.7506966590881348, "learning_rate": 2.986947791164659e-05, "loss": 0.2347, "step": 5950 }, { "epoch": 35.873493975903614, "grad_norm": 2.516293525695801, "learning_rate": 2.9894578313253014e-05, "loss": 0.2546, "step": 5955 }, { "epoch": 35.903614457831324, "grad_norm": 3.7010300159454346, "learning_rate": 2.991967871485944e-05, "loss": 0.2571, "step": 5960 }, { "epoch": 35.933734939759034, "grad_norm": 2.489213466644287, "learning_rate": 2.9944779116465866e-05, "loss": 0.2735, "step": 5965 }, { "epoch": 35.963855421686745, "grad_norm": 3.4056196212768555, "learning_rate": 2.996987951807229e-05, "loss": 0.3093, "step": 5970 }, { "epoch": 35.993975903614455, "grad_norm": 2.641334295272827, "learning_rate": 2.9994979919678717e-05, "loss": 0.3134, "step": 5975 }, { "epoch": 36.0, "eval_accuracy": 0.9040503342508848, "eval_auc": 0.9602633527755456, "eval_f1": 0.8653421633554084, "eval_loss": 0.24029910564422607, "eval_precision": 0.8385026737967914, "eval_recall": 0.8939566704675028, "eval_runtime": 17.1657, "eval_samples_per_second": 148.145, "eval_steps_per_second": 0.757, "step": 5976 }, { "epoch": 36.024096385542165, "grad_norm": 2.1633102893829346, "learning_rate": 3.0020080321285143e-05, "loss": 0.2807, "step": 5980 }, { "epoch": 36.05421686746988, "grad_norm": 2.105492115020752, "learning_rate": 3.004518072289157e-05, "loss": 0.2768, "step": 5985 }, { "epoch": 36.08433734939759, "grad_norm": 2.0044474601745605, "learning_rate": 3.0070281124497994e-05, "loss": 0.2568, "step": 5990 }, { "epoch": 36.1144578313253, "grad_norm": 2.109309434890747, "learning_rate": 3.0095381526104423e-05, "loss": 0.2165, "step": 5995 }, { "epoch": 36.144578313253014, "grad_norm": 3.5626933574676514, "learning_rate": 3.012048192771085e-05, "loss": 0.2817, "step": 6000 }, { "epoch": 36.174698795180724, "grad_norm": 3.385136127471924, "learning_rate": 3.0145582329317268e-05, "loss": 0.2461, "step": 6005 }, { "epoch": 36.204819277108435, "grad_norm": 2.151554822921753, "learning_rate": 3.0170682730923693e-05, "loss": 0.2306, "step": 6010 }, { "epoch": 36.234939759036145, "grad_norm": 2.27700138092041, "learning_rate": 3.019578313253012e-05, "loss": 0.2384, "step": 6015 }, { "epoch": 36.265060240963855, "grad_norm": 2.4433555603027344, "learning_rate": 3.0220883534136545e-05, "loss": 0.2264, "step": 6020 }, { "epoch": 36.295180722891565, "grad_norm": 2.5779662132263184, "learning_rate": 3.024598393574297e-05, "loss": 0.2486, "step": 6025 }, { "epoch": 36.325301204819276, "grad_norm": 2.5328152179718018, "learning_rate": 3.02710843373494e-05, "loss": 0.2668, "step": 6030 }, { "epoch": 36.355421686746986, "grad_norm": 2.2948355674743652, "learning_rate": 3.0296184738955825e-05, "loss": 0.2898, "step": 6035 }, { "epoch": 36.3855421686747, "grad_norm": 2.2671010494232178, "learning_rate": 3.032128514056225e-05, "loss": 0.2695, "step": 6040 }, { "epoch": 36.41566265060241, "grad_norm": 1.7064307928085327, "learning_rate": 3.0346385542168676e-05, "loss": 0.2441, "step": 6045 }, { "epoch": 36.44578313253012, "grad_norm": 2.15734601020813, "learning_rate": 3.0371485943775102e-05, "loss": 0.26, "step": 6050 }, { "epoch": 36.475903614457835, "grad_norm": 2.113530158996582, "learning_rate": 3.0396586345381528e-05, "loss": 0.2525, "step": 6055 }, { "epoch": 36.506024096385545, "grad_norm": 1.943936824798584, "learning_rate": 3.0421686746987953e-05, "loss": 0.2653, "step": 6060 }, { "epoch": 36.536144578313255, "grad_norm": 1.9741214513778687, "learning_rate": 3.044678714859438e-05, "loss": 0.2628, "step": 6065 }, { "epoch": 36.566265060240966, "grad_norm": 2.583420991897583, "learning_rate": 3.0471887550200805e-05, "loss": 0.21, "step": 6070 }, { "epoch": 36.596385542168676, "grad_norm": 2.4453647136688232, "learning_rate": 3.049698795180723e-05, "loss": 0.2651, "step": 6075 }, { "epoch": 36.626506024096386, "grad_norm": 2.5714426040649414, "learning_rate": 3.052208835341366e-05, "loss": 0.2722, "step": 6080 }, { "epoch": 36.6566265060241, "grad_norm": 3.1495213508605957, "learning_rate": 3.054718875502008e-05, "loss": 0.2545, "step": 6085 }, { "epoch": 36.68674698795181, "grad_norm": 3.1175990104675293, "learning_rate": 3.057228915662651e-05, "loss": 0.2512, "step": 6090 }, { "epoch": 36.71686746987952, "grad_norm": 3.22166109085083, "learning_rate": 3.059738955823293e-05, "loss": 0.2541, "step": 6095 }, { "epoch": 36.74698795180723, "grad_norm": 2.9877281188964844, "learning_rate": 3.0622489959839355e-05, "loss": 0.2832, "step": 6100 }, { "epoch": 36.77710843373494, "grad_norm": 2.9391582012176514, "learning_rate": 3.0647590361445784e-05, "loss": 0.3119, "step": 6105 }, { "epoch": 36.80722891566265, "grad_norm": 2.741736650466919, "learning_rate": 3.067269076305221e-05, "loss": 0.3377, "step": 6110 }, { "epoch": 36.83734939759036, "grad_norm": 1.8150005340576172, "learning_rate": 3.0697791164658636e-05, "loss": 0.2782, "step": 6115 }, { "epoch": 36.86746987951807, "grad_norm": 2.3358445167541504, "learning_rate": 3.072289156626506e-05, "loss": 0.2753, "step": 6120 }, { "epoch": 36.897590361445786, "grad_norm": 2.2918527126312256, "learning_rate": 3.074799196787149e-05, "loss": 0.2636, "step": 6125 }, { "epoch": 36.9277108433735, "grad_norm": 2.7234408855438232, "learning_rate": 3.077309236947791e-05, "loss": 0.2631, "step": 6130 }, { "epoch": 36.95783132530121, "grad_norm": 2.4186816215515137, "learning_rate": 3.079819277108434e-05, "loss": 0.2569, "step": 6135 }, { "epoch": 36.98795180722892, "grad_norm": 2.6633265018463135, "learning_rate": 3.082329317269077e-05, "loss": 0.2653, "step": 6140 }, { "epoch": 37.0, "eval_accuracy": 0.9213527329925285, "eval_auc": 0.9617944098962276, "eval_f1": 0.8835855646100116, "eval_loss": 0.21790018677711487, "eval_precision": 0.9024970273483948, "eval_recall": 0.8654503990877993, "eval_runtime": 17.1708, "eval_samples_per_second": 148.1, "eval_steps_per_second": 0.757, "step": 6142 }, { "epoch": 37.01807228915663, "grad_norm": 2.8472111225128174, "learning_rate": 3.084839357429719e-05, "loss": 0.2264, "step": 6145 }, { "epoch": 37.04819277108434, "grad_norm": 2.0315515995025635, "learning_rate": 3.087349397590362e-05, "loss": 0.2352, "step": 6150 }, { "epoch": 37.07831325301205, "grad_norm": 2.0432772636413574, "learning_rate": 3.089859437751004e-05, "loss": 0.2107, "step": 6155 }, { "epoch": 37.10843373493976, "grad_norm": 2.9990720748901367, "learning_rate": 3.092369477911647e-05, "loss": 0.2566, "step": 6160 }, { "epoch": 37.13855421686747, "grad_norm": 2.3924663066864014, "learning_rate": 3.094879518072289e-05, "loss": 0.241, "step": 6165 }, { "epoch": 37.16867469879518, "grad_norm": 2.013744592666626, "learning_rate": 3.097389558232932e-05, "loss": 0.2483, "step": 6170 }, { "epoch": 37.19879518072289, "grad_norm": 1.6174145936965942, "learning_rate": 3.0998995983935744e-05, "loss": 0.2591, "step": 6175 }, { "epoch": 37.2289156626506, "grad_norm": 2.5364930629730225, "learning_rate": 3.102409638554217e-05, "loss": 0.2525, "step": 6180 }, { "epoch": 37.25903614457831, "grad_norm": 2.143467664718628, "learning_rate": 3.1049196787148595e-05, "loss": 0.2601, "step": 6185 }, { "epoch": 37.28915662650602, "grad_norm": 2.03132963180542, "learning_rate": 3.107429718875502e-05, "loss": 0.2342, "step": 6190 }, { "epoch": 37.31927710843374, "grad_norm": 2.004775285720825, "learning_rate": 3.1099397590361447e-05, "loss": 0.2839, "step": 6195 }, { "epoch": 37.34939759036145, "grad_norm": 2.106445074081421, "learning_rate": 3.112449799196787e-05, "loss": 0.241, "step": 6200 }, { "epoch": 37.37951807228916, "grad_norm": 2.17498779296875, "learning_rate": 3.11495983935743e-05, "loss": 0.2339, "step": 6205 }, { "epoch": 37.40963855421687, "grad_norm": 2.7526564598083496, "learning_rate": 3.117469879518072e-05, "loss": 0.2675, "step": 6210 }, { "epoch": 37.43975903614458, "grad_norm": 2.4853410720825195, "learning_rate": 3.119979919678715e-05, "loss": 0.2863, "step": 6215 }, { "epoch": 37.46987951807229, "grad_norm": 2.1971094608306885, "learning_rate": 3.122489959839358e-05, "loss": 0.2694, "step": 6220 }, { "epoch": 37.5, "grad_norm": 2.1958532333374023, "learning_rate": 3.125e-05, "loss": 0.2826, "step": 6225 }, { "epoch": 37.53012048192771, "grad_norm": 1.6764627695083618, "learning_rate": 3.127510040160643e-05, "loss": 0.2419, "step": 6230 }, { "epoch": 37.56024096385542, "grad_norm": 1.8171449899673462, "learning_rate": 3.130020080321285e-05, "loss": 0.2418, "step": 6235 }, { "epoch": 37.59036144578313, "grad_norm": 1.5661555528640747, "learning_rate": 3.132530120481928e-05, "loss": 0.2646, "step": 6240 }, { "epoch": 37.62048192771084, "grad_norm": 1.699576497077942, "learning_rate": 3.13504016064257e-05, "loss": 0.2558, "step": 6245 }, { "epoch": 37.65060240963855, "grad_norm": 2.271219253540039, "learning_rate": 3.137550200803213e-05, "loss": 0.2487, "step": 6250 }, { "epoch": 37.68072289156626, "grad_norm": 2.469144105911255, "learning_rate": 3.1400602409638555e-05, "loss": 0.2311, "step": 6255 }, { "epoch": 37.71084337349397, "grad_norm": 1.8686410188674927, "learning_rate": 3.1425702811244984e-05, "loss": 0.2417, "step": 6260 }, { "epoch": 37.74096385542169, "grad_norm": 2.3052306175231934, "learning_rate": 3.145080321285141e-05, "loss": 0.2551, "step": 6265 }, { "epoch": 37.7710843373494, "grad_norm": 2.1152069568634033, "learning_rate": 3.147590361445783e-05, "loss": 0.2644, "step": 6270 }, { "epoch": 37.80120481927711, "grad_norm": 3.332885265350342, "learning_rate": 3.150100401606426e-05, "loss": 0.2368, "step": 6275 }, { "epoch": 37.83132530120482, "grad_norm": 3.286970615386963, "learning_rate": 3.152610441767068e-05, "loss": 0.2925, "step": 6280 }, { "epoch": 37.86144578313253, "grad_norm": 2.1198184490203857, "learning_rate": 3.155120481927711e-05, "loss": 0.2001, "step": 6285 }, { "epoch": 37.89156626506024, "grad_norm": 2.578740119934082, "learning_rate": 3.157630522088353e-05, "loss": 0.2602, "step": 6290 }, { "epoch": 37.92168674698795, "grad_norm": 2.738973617553711, "learning_rate": 3.160140562248996e-05, "loss": 0.2542, "step": 6295 }, { "epoch": 37.95180722891566, "grad_norm": 2.4956698417663574, "learning_rate": 3.162650602409639e-05, "loss": 0.2792, "step": 6300 }, { "epoch": 37.98192771084337, "grad_norm": 2.3061397075653076, "learning_rate": 3.165160642570281e-05, "loss": 0.2746, "step": 6305 }, { "epoch": 38.0, "eval_accuracy": 0.9182068423122296, "eval_auc": 0.9663975054103738, "eval_f1": 0.8737864077669902, "eval_loss": 0.2299535572528839, "eval_precision": 0.933852140077821, "eval_recall": 0.8209806157354618, "eval_runtime": 17.2374, "eval_samples_per_second": 147.528, "eval_steps_per_second": 0.754, "step": 6308 }, { "epoch": 38.01204819277108, "grad_norm": 2.216925859451294, "learning_rate": 3.167670682730924e-05, "loss": 0.228, "step": 6310 }, { "epoch": 38.04216867469879, "grad_norm": 1.9454985857009888, "learning_rate": 3.170180722891566e-05, "loss": 0.2047, "step": 6315 }, { "epoch": 38.0722891566265, "grad_norm": 2.274045467376709, "learning_rate": 3.172690763052209e-05, "loss": 0.2281, "step": 6320 }, { "epoch": 38.102409638554214, "grad_norm": 2.2698090076446533, "learning_rate": 3.1752008032128514e-05, "loss": 0.2617, "step": 6325 }, { "epoch": 38.13253012048193, "grad_norm": 2.3226888179779053, "learning_rate": 3.177710843373494e-05, "loss": 0.2442, "step": 6330 }, { "epoch": 38.16265060240964, "grad_norm": 2.456148147583008, "learning_rate": 3.1802208835341365e-05, "loss": 0.2716, "step": 6335 }, { "epoch": 38.19277108433735, "grad_norm": 2.4330947399139404, "learning_rate": 3.1827309236947795e-05, "loss": 0.2403, "step": 6340 }, { "epoch": 38.22289156626506, "grad_norm": 3.385854959487915, "learning_rate": 3.1852409638554224e-05, "loss": 0.2505, "step": 6345 }, { "epoch": 38.25301204819277, "grad_norm": 2.735837936401367, "learning_rate": 3.1877510040160646e-05, "loss": 0.2294, "step": 6350 }, { "epoch": 38.28313253012048, "grad_norm": 3.903822660446167, "learning_rate": 3.1902610441767075e-05, "loss": 0.225, "step": 6355 }, { "epoch": 38.31325301204819, "grad_norm": 2.4196770191192627, "learning_rate": 3.192771084337349e-05, "loss": 0.2276, "step": 6360 }, { "epoch": 38.3433734939759, "grad_norm": 2.6809000968933105, "learning_rate": 3.195281124497992e-05, "loss": 0.2765, "step": 6365 }, { "epoch": 38.373493975903614, "grad_norm": 2.375967025756836, "learning_rate": 3.197791164658634e-05, "loss": 0.2218, "step": 6370 }, { "epoch": 38.403614457831324, "grad_norm": 2.497779130935669, "learning_rate": 3.200301204819277e-05, "loss": 0.2772, "step": 6375 }, { "epoch": 38.433734939759034, "grad_norm": 3.284944534301758, "learning_rate": 3.20281124497992e-05, "loss": 0.2785, "step": 6380 }, { "epoch": 38.463855421686745, "grad_norm": 2.3392996788024902, "learning_rate": 3.205321285140562e-05, "loss": 0.2187, "step": 6385 }, { "epoch": 38.493975903614455, "grad_norm": 2.683300495147705, "learning_rate": 3.207831325301205e-05, "loss": 0.2323, "step": 6390 }, { "epoch": 38.524096385542165, "grad_norm": 2.324141502380371, "learning_rate": 3.2103413654618474e-05, "loss": 0.2644, "step": 6395 }, { "epoch": 38.55421686746988, "grad_norm": 1.9178905487060547, "learning_rate": 3.21285140562249e-05, "loss": 0.2284, "step": 6400 }, { "epoch": 38.58433734939759, "grad_norm": 2.290726661682129, "learning_rate": 3.2153614457831325e-05, "loss": 0.2214, "step": 6405 }, { "epoch": 38.6144578313253, "grad_norm": 2.622251510620117, "learning_rate": 3.2178714859437754e-05, "loss": 0.2897, "step": 6410 }, { "epoch": 38.644578313253014, "grad_norm": 2.4100887775421143, "learning_rate": 3.2203815261044176e-05, "loss": 0.2199, "step": 6415 }, { "epoch": 38.674698795180724, "grad_norm": 2.782029151916504, "learning_rate": 3.2228915662650605e-05, "loss": 0.2315, "step": 6420 }, { "epoch": 38.704819277108435, "grad_norm": 2.4892172813415527, "learning_rate": 3.2254016064257034e-05, "loss": 0.2177, "step": 6425 }, { "epoch": 38.734939759036145, "grad_norm": 3.1388111114501953, "learning_rate": 3.227911646586346e-05, "loss": 0.2011, "step": 6430 }, { "epoch": 38.765060240963855, "grad_norm": 2.3266518115997314, "learning_rate": 3.2304216867469886e-05, "loss": 0.2693, "step": 6435 }, { "epoch": 38.795180722891565, "grad_norm": 2.3568663597106934, "learning_rate": 3.232931726907631e-05, "loss": 0.2237, "step": 6440 }, { "epoch": 38.825301204819276, "grad_norm": 2.961787462234497, "learning_rate": 3.235441767068274e-05, "loss": 0.2548, "step": 6445 }, { "epoch": 38.855421686746986, "grad_norm": 3.2425358295440674, "learning_rate": 3.237951807228915e-05, "loss": 0.2585, "step": 6450 }, { "epoch": 38.8855421686747, "grad_norm": 2.586047410964966, "learning_rate": 3.240461847389558e-05, "loss": 0.2849, "step": 6455 }, { "epoch": 38.91566265060241, "grad_norm": 1.9064887762069702, "learning_rate": 3.242971887550201e-05, "loss": 0.2631, "step": 6460 }, { "epoch": 38.94578313253012, "grad_norm": 2.6929991245269775, "learning_rate": 3.245481927710843e-05, "loss": 0.285, "step": 6465 }, { "epoch": 38.975903614457835, "grad_norm": 2.097921133041382, "learning_rate": 3.247991967871486e-05, "loss": 0.2464, "step": 6470 }, { "epoch": 39.0, "eval_accuracy": 0.913094769956744, "eval_auc": 0.9603937356014242, "eval_f1": 0.8709865732632808, "eval_loss": 0.23605474829673767, "eval_precision": 0.8923444976076556, "eval_recall": 0.8506271379703535, "eval_runtime": 18.2756, "eval_samples_per_second": 139.147, "eval_steps_per_second": 0.711, "step": 6474 }, { "epoch": 39.006024096385545, "grad_norm": 2.050915002822876, "learning_rate": 3.2505020080321284e-05, "loss": 0.2261, "step": 6475 }, { "epoch": 39.036144578313255, "grad_norm": 2.389162302017212, "learning_rate": 3.253012048192771e-05, "loss": 0.2349, "step": 6480 }, { "epoch": 39.066265060240966, "grad_norm": 2.4876036643981934, "learning_rate": 3.2555220883534136e-05, "loss": 0.222, "step": 6485 }, { "epoch": 39.096385542168676, "grad_norm": 2.4081146717071533, "learning_rate": 3.2580321285140565e-05, "loss": 0.2289, "step": 6490 }, { "epoch": 39.126506024096386, "grad_norm": 2.9568965435028076, "learning_rate": 3.260542168674699e-05, "loss": 0.2343, "step": 6495 }, { "epoch": 39.1566265060241, "grad_norm": 2.8140311241149902, "learning_rate": 3.2630522088353416e-05, "loss": 0.2473, "step": 6500 }, { "epoch": 39.18674698795181, "grad_norm": 3.025899648666382, "learning_rate": 3.265562248995984e-05, "loss": 0.2317, "step": 6505 }, { "epoch": 39.21686746987952, "grad_norm": 3.627990245819092, "learning_rate": 3.268072289156627e-05, "loss": 0.2362, "step": 6510 }, { "epoch": 39.24698795180723, "grad_norm": 1.3940659761428833, "learning_rate": 3.2705823293172696e-05, "loss": 0.2056, "step": 6515 }, { "epoch": 39.27710843373494, "grad_norm": 2.9245965480804443, "learning_rate": 3.273092369477912e-05, "loss": 0.2311, "step": 6520 }, { "epoch": 39.30722891566265, "grad_norm": 2.4287238121032715, "learning_rate": 3.275602409638555e-05, "loss": 0.2385, "step": 6525 }, { "epoch": 39.33734939759036, "grad_norm": 2.023843765258789, "learning_rate": 3.278112449799197e-05, "loss": 0.2409, "step": 6530 }, { "epoch": 39.36746987951807, "grad_norm": 1.6753655672073364, "learning_rate": 3.28062248995984e-05, "loss": 0.2207, "step": 6535 }, { "epoch": 39.397590361445786, "grad_norm": 2.5869171619415283, "learning_rate": 3.283132530120482e-05, "loss": 0.2518, "step": 6540 }, { "epoch": 39.4277108433735, "grad_norm": 1.6591705083847046, "learning_rate": 3.2856425702811244e-05, "loss": 0.2157, "step": 6545 }, { "epoch": 39.45783132530121, "grad_norm": 1.6560969352722168, "learning_rate": 3.288152610441767e-05, "loss": 0.2458, "step": 6550 }, { "epoch": 39.48795180722892, "grad_norm": 2.0736494064331055, "learning_rate": 3.2906626506024095e-05, "loss": 0.2072, "step": 6555 }, { "epoch": 39.51807228915663, "grad_norm": 2.9606733322143555, "learning_rate": 3.2931726907630524e-05, "loss": 0.2252, "step": 6560 }, { "epoch": 39.54819277108434, "grad_norm": 2.837667226791382, "learning_rate": 3.2956827309236946e-05, "loss": 0.2588, "step": 6565 }, { "epoch": 39.57831325301205, "grad_norm": 2.7487363815307617, "learning_rate": 3.2981927710843376e-05, "loss": 0.269, "step": 6570 }, { "epoch": 39.60843373493976, "grad_norm": 2.1929149627685547, "learning_rate": 3.30070281124498e-05, "loss": 0.2282, "step": 6575 }, { "epoch": 39.63855421686747, "grad_norm": 3.401123046875, "learning_rate": 3.303212851405623e-05, "loss": 0.2388, "step": 6580 }, { "epoch": 39.66867469879518, "grad_norm": 2.7502694129943848, "learning_rate": 3.305722891566265e-05, "loss": 0.2215, "step": 6585 }, { "epoch": 39.69879518072289, "grad_norm": 2.4091484546661377, "learning_rate": 3.308232931726908e-05, "loss": 0.2476, "step": 6590 }, { "epoch": 39.7289156626506, "grad_norm": 2.429182767868042, "learning_rate": 3.310742971887551e-05, "loss": 0.2619, "step": 6595 }, { "epoch": 39.75903614457831, "grad_norm": 2.5643534660339355, "learning_rate": 3.313253012048193e-05, "loss": 0.2515, "step": 6600 }, { "epoch": 39.78915662650603, "grad_norm": 1.7576724290847778, "learning_rate": 3.315763052208836e-05, "loss": 0.2011, "step": 6605 }, { "epoch": 39.81927710843374, "grad_norm": 2.449651002883911, "learning_rate": 3.318273092369478e-05, "loss": 0.2431, "step": 6610 }, { "epoch": 39.84939759036145, "grad_norm": 2.387871265411377, "learning_rate": 3.320783132530121e-05, "loss": 0.2842, "step": 6615 }, { "epoch": 39.87951807228916, "grad_norm": 1.8376022577285767, "learning_rate": 3.323293172690763e-05, "loss": 0.2144, "step": 6620 }, { "epoch": 39.90963855421687, "grad_norm": 3.066810131072998, "learning_rate": 3.325803212851406e-05, "loss": 0.2541, "step": 6625 }, { "epoch": 39.93975903614458, "grad_norm": 2.554441213607788, "learning_rate": 3.3283132530120484e-05, "loss": 0.2243, "step": 6630 }, { "epoch": 39.96987951807229, "grad_norm": 2.362246036529541, "learning_rate": 3.3308232931726906e-05, "loss": 0.2236, "step": 6635 }, { "epoch": 40.0, "grad_norm": 2.932560682296753, "learning_rate": 3.3333333333333335e-05, "loss": 0.3024, "step": 6640 }, { "epoch": 40.0, "eval_accuracy": 0.9142744789618561, "eval_auc": 0.9637652096186251, "eval_f1": 0.874133949191686, "eval_loss": 0.22493180632591248, "eval_precision": 0.8853801169590644, "eval_recall": 0.863169897377423, "eval_runtime": 20.6328, "eval_samples_per_second": 123.25, "eval_steps_per_second": 0.63, "step": 6640 }, { "epoch": 40.03012048192771, "grad_norm": 2.6266329288482666, "learning_rate": 3.335843373493976e-05, "loss": 0.2429, "step": 6645 }, { "epoch": 40.06024096385542, "grad_norm": 1.4775539636611938, "learning_rate": 3.3383534136546186e-05, "loss": 0.2261, "step": 6650 }, { "epoch": 40.09036144578313, "grad_norm": 2.8822290897369385, "learning_rate": 3.340863453815261e-05, "loss": 0.2474, "step": 6655 }, { "epoch": 40.12048192771084, "grad_norm": 2.0075509548187256, "learning_rate": 3.343373493975904e-05, "loss": 0.2164, "step": 6660 }, { "epoch": 40.15060240963855, "grad_norm": 1.9930087327957153, "learning_rate": 3.345883534136546e-05, "loss": 0.2185, "step": 6665 }, { "epoch": 40.18072289156626, "grad_norm": 1.7394765615463257, "learning_rate": 3.348393574297189e-05, "loss": 0.2029, "step": 6670 }, { "epoch": 40.21084337349398, "grad_norm": 2.443556785583496, "learning_rate": 3.350903614457832e-05, "loss": 0.2051, "step": 6675 }, { "epoch": 40.24096385542169, "grad_norm": 2.26416015625, "learning_rate": 3.353413654618474e-05, "loss": 0.2313, "step": 6680 }, { "epoch": 40.2710843373494, "grad_norm": 2.6214842796325684, "learning_rate": 3.355923694779117e-05, "loss": 0.2137, "step": 6685 }, { "epoch": 40.30120481927711, "grad_norm": 3.211836814880371, "learning_rate": 3.358433734939759e-05, "loss": 0.2527, "step": 6690 }, { "epoch": 40.33132530120482, "grad_norm": 2.4180943965911865, "learning_rate": 3.360943775100402e-05, "loss": 0.2209, "step": 6695 }, { "epoch": 40.36144578313253, "grad_norm": 3.3650221824645996, "learning_rate": 3.363453815261044e-05, "loss": 0.2426, "step": 6700 }, { "epoch": 40.39156626506024, "grad_norm": 2.210585832595825, "learning_rate": 3.365963855421687e-05, "loss": 0.216, "step": 6705 }, { "epoch": 40.42168674698795, "grad_norm": 2.640681743621826, "learning_rate": 3.3684738955823294e-05, "loss": 0.2473, "step": 6710 }, { "epoch": 40.45180722891566, "grad_norm": 2.018688917160034, "learning_rate": 3.370983935742972e-05, "loss": 0.2067, "step": 6715 }, { "epoch": 40.48192771084337, "grad_norm": 2.212965965270996, "learning_rate": 3.3734939759036146e-05, "loss": 0.2308, "step": 6720 }, { "epoch": 40.51204819277108, "grad_norm": 1.9449925422668457, "learning_rate": 3.376004016064257e-05, "loss": 0.2535, "step": 6725 }, { "epoch": 40.54216867469879, "grad_norm": 2.9005157947540283, "learning_rate": 3.3785140562249e-05, "loss": 0.231, "step": 6730 }, { "epoch": 40.5722891566265, "grad_norm": 2.57045578956604, "learning_rate": 3.381024096385542e-05, "loss": 0.2938, "step": 6735 }, { "epoch": 40.602409638554214, "grad_norm": 2.8470585346221924, "learning_rate": 3.383534136546185e-05, "loss": 0.2453, "step": 6740 }, { "epoch": 40.63253012048193, "grad_norm": 2.2170114517211914, "learning_rate": 3.386044176706827e-05, "loss": 0.2318, "step": 6745 }, { "epoch": 40.66265060240964, "grad_norm": 2.3872134685516357, "learning_rate": 3.38855421686747e-05, "loss": 0.2196, "step": 6750 }, { "epoch": 40.69277108433735, "grad_norm": 1.892056941986084, "learning_rate": 3.391064257028113e-05, "loss": 0.2559, "step": 6755 }, { "epoch": 40.72289156626506, "grad_norm": 1.9791001081466675, "learning_rate": 3.393574297188755e-05, "loss": 0.2077, "step": 6760 }, { "epoch": 40.75301204819277, "grad_norm": 1.5290873050689697, "learning_rate": 3.396084337349398e-05, "loss": 0.2508, "step": 6765 }, { "epoch": 40.78313253012048, "grad_norm": 3.184123992919922, "learning_rate": 3.39859437751004e-05, "loss": 0.2223, "step": 6770 }, { "epoch": 40.81325301204819, "grad_norm": 3.206636428833008, "learning_rate": 3.401104417670683e-05, "loss": 0.2824, "step": 6775 }, { "epoch": 40.8433734939759, "grad_norm": 1.9404165744781494, "learning_rate": 3.4036144578313254e-05, "loss": 0.2262, "step": 6780 }, { "epoch": 40.873493975903614, "grad_norm": 2.239152431488037, "learning_rate": 3.406124497991968e-05, "loss": 0.2436, "step": 6785 }, { "epoch": 40.903614457831324, "grad_norm": 3.0163211822509766, "learning_rate": 3.4086345381526105e-05, "loss": 0.2324, "step": 6790 }, { "epoch": 40.933734939759034, "grad_norm": 2.691326141357422, "learning_rate": 3.4111445783132534e-05, "loss": 0.2229, "step": 6795 }, { "epoch": 40.963855421686745, "grad_norm": 2.370250940322876, "learning_rate": 3.413654618473896e-05, "loss": 0.2414, "step": 6800 }, { "epoch": 40.993975903614455, "grad_norm": 1.2468866109848022, "learning_rate": 3.416164658634538e-05, "loss": 0.2063, "step": 6805 }, { "epoch": 41.0, "eval_accuracy": 0.906409752261109, "eval_auc": 0.9662151063390008, "eval_f1": 0.8491761723700887, "eval_loss": 0.2846982181072235, "eval_precision": 0.9557774607703281, "eval_recall": 0.7639680729760547, "eval_runtime": 16.9377, "eval_samples_per_second": 150.138, "eval_steps_per_second": 0.768, "step": 6806 }, { "epoch": 41.024096385542165, "grad_norm": 3.1601436138153076, "learning_rate": 3.418674698795181e-05, "loss": 0.2414, "step": 6810 }, { "epoch": 41.05421686746988, "grad_norm": 2.834940195083618, "learning_rate": 3.421184738955823e-05, "loss": 0.1988, "step": 6815 }, { "epoch": 41.08433734939759, "grad_norm": 2.317030668258667, "learning_rate": 3.423694779116466e-05, "loss": 0.2078, "step": 6820 }, { "epoch": 41.1144578313253, "grad_norm": 2.0118911266326904, "learning_rate": 3.426204819277108e-05, "loss": 0.2099, "step": 6825 }, { "epoch": 41.144578313253014, "grad_norm": 1.7952815294265747, "learning_rate": 3.428714859437751e-05, "loss": 0.2267, "step": 6830 }, { "epoch": 41.174698795180724, "grad_norm": 3.234902858734131, "learning_rate": 3.431224899598394e-05, "loss": 0.2371, "step": 6835 }, { "epoch": 41.204819277108435, "grad_norm": 2.2863574028015137, "learning_rate": 3.433734939759036e-05, "loss": 0.2259, "step": 6840 }, { "epoch": 41.234939759036145, "grad_norm": 2.8692567348480225, "learning_rate": 3.436244979919679e-05, "loss": 0.2237, "step": 6845 }, { "epoch": 41.265060240963855, "grad_norm": 2.949395179748535, "learning_rate": 3.438755020080321e-05, "loss": 0.2644, "step": 6850 }, { "epoch": 41.295180722891565, "grad_norm": 2.0119643211364746, "learning_rate": 3.441265060240964e-05, "loss": 0.2103, "step": 6855 }, { "epoch": 41.325301204819276, "grad_norm": 2.5554463863372803, "learning_rate": 3.4437751004016065e-05, "loss": 0.2422, "step": 6860 }, { "epoch": 41.355421686746986, "grad_norm": 2.153459310531616, "learning_rate": 3.4462851405622494e-05, "loss": 0.251, "step": 6865 }, { "epoch": 41.3855421686747, "grad_norm": 3.219743013381958, "learning_rate": 3.4487951807228916e-05, "loss": 0.2173, "step": 6870 }, { "epoch": 41.41566265060241, "grad_norm": 2.026254892349243, "learning_rate": 3.4513052208835345e-05, "loss": 0.236, "step": 6875 }, { "epoch": 41.44578313253012, "grad_norm": 3.212785482406616, "learning_rate": 3.4538152610441774e-05, "loss": 0.2829, "step": 6880 }, { "epoch": 41.475903614457835, "grad_norm": 2.520616292953491, "learning_rate": 3.4563253012048196e-05, "loss": 0.2315, "step": 6885 }, { "epoch": 41.506024096385545, "grad_norm": 2.249114513397217, "learning_rate": 3.4588353413654625e-05, "loss": 0.254, "step": 6890 }, { "epoch": 41.536144578313255, "grad_norm": 2.5036075115203857, "learning_rate": 3.461345381526104e-05, "loss": 0.2379, "step": 6895 }, { "epoch": 41.566265060240966, "grad_norm": 2.1759984493255615, "learning_rate": 3.463855421686747e-05, "loss": 0.2192, "step": 6900 }, { "epoch": 41.596385542168676, "grad_norm": 3.1835083961486816, "learning_rate": 3.466365461847389e-05, "loss": 0.2439, "step": 6905 }, { "epoch": 41.626506024096386, "grad_norm": 1.9755939245224, "learning_rate": 3.468875502008032e-05, "loss": 0.218, "step": 6910 }, { "epoch": 41.6566265060241, "grad_norm": 2.0728402137756348, "learning_rate": 3.471385542168675e-05, "loss": 0.2295, "step": 6915 }, { "epoch": 41.68674698795181, "grad_norm": 2.078345775604248, "learning_rate": 3.473895582329317e-05, "loss": 0.2214, "step": 6920 }, { "epoch": 41.71686746987952, "grad_norm": 2.592635154724121, "learning_rate": 3.47640562248996e-05, "loss": 0.2701, "step": 6925 }, { "epoch": 41.74698795180723, "grad_norm": 2.844268321990967, "learning_rate": 3.4789156626506024e-05, "loss": 0.2432, "step": 6930 }, { "epoch": 41.77710843373494, "grad_norm": 2.249371290206909, "learning_rate": 3.481425702811245e-05, "loss": 0.2246, "step": 6935 }, { "epoch": 41.80722891566265, "grad_norm": 2.175766944885254, "learning_rate": 3.4839357429718875e-05, "loss": 0.2312, "step": 6940 }, { "epoch": 41.83734939759036, "grad_norm": 2.340944766998291, "learning_rate": 3.4864457831325304e-05, "loss": 0.2527, "step": 6945 }, { "epoch": 41.86746987951807, "grad_norm": 2.598799705505371, "learning_rate": 3.488955823293173e-05, "loss": 0.2205, "step": 6950 }, { "epoch": 41.897590361445786, "grad_norm": 2.0303046703338623, "learning_rate": 3.4914658634538156e-05, "loss": 0.2323, "step": 6955 }, { "epoch": 41.9277108433735, "grad_norm": 2.12449049949646, "learning_rate": 3.4939759036144585e-05, "loss": 0.2037, "step": 6960 }, { "epoch": 41.95783132530121, "grad_norm": 2.0875773429870605, "learning_rate": 3.496485943775101e-05, "loss": 0.1861, "step": 6965 }, { "epoch": 41.98795180722892, "grad_norm": 1.7724308967590332, "learning_rate": 3.4989959839357436e-05, "loss": 0.2293, "step": 6970 }, { "epoch": 42.0, "eval_accuracy": 0.9182068423122296, "eval_auc": 0.9686663719079422, "eval_f1": 0.8751500600240096, "eval_loss": 0.22627486288547516, "eval_precision": 0.9239543726235742, "eval_recall": 0.8312428734321551, "eval_runtime": 17.6652, "eval_samples_per_second": 143.956, "eval_steps_per_second": 0.736, "step": 6972 }, { "epoch": 42.01807228915663, "grad_norm": 3.5082550048828125, "learning_rate": 3.501506024096386e-05, "loss": 0.2477, "step": 6975 }, { "epoch": 42.04819277108434, "grad_norm": 2.38348388671875, "learning_rate": 3.504016064257029e-05, "loss": 0.1945, "step": 6980 }, { "epoch": 42.07831325301205, "grad_norm": 2.430093765258789, "learning_rate": 3.50652610441767e-05, "loss": 0.2482, "step": 6985 }, { "epoch": 42.10843373493976, "grad_norm": 2.023977279663086, "learning_rate": 3.509036144578313e-05, "loss": 0.2388, "step": 6990 }, { "epoch": 42.13855421686747, "grad_norm": 2.185081720352173, "learning_rate": 3.511546184738956e-05, "loss": 0.2147, "step": 6995 }, { "epoch": 42.16867469879518, "grad_norm": 2.2125322818756104, "learning_rate": 3.5140562248995983e-05, "loss": 0.2178, "step": 7000 }, { "epoch": 42.19879518072289, "grad_norm": 1.4559276103973389, "learning_rate": 3.516566265060241e-05, "loss": 0.2033, "step": 7005 }, { "epoch": 42.2289156626506, "grad_norm": 1.966293454170227, "learning_rate": 3.5190763052208835e-05, "loss": 0.2295, "step": 7010 }, { "epoch": 42.25903614457831, "grad_norm": 2.518218994140625, "learning_rate": 3.5215863453815264e-05, "loss": 0.2385, "step": 7015 }, { "epoch": 42.28915662650602, "grad_norm": 3.4173643589019775, "learning_rate": 3.5240963855421686e-05, "loss": 0.2373, "step": 7020 }, { "epoch": 42.31927710843374, "grad_norm": 1.9910823106765747, "learning_rate": 3.5266064257028115e-05, "loss": 0.2021, "step": 7025 }, { "epoch": 42.34939759036145, "grad_norm": 2.9118754863739014, "learning_rate": 3.529116465863454e-05, "loss": 0.2254, "step": 7030 }, { "epoch": 42.37951807228916, "grad_norm": 1.894925832748413, "learning_rate": 3.5316265060240967e-05, "loss": 0.1968, "step": 7035 }, { "epoch": 42.40963855421687, "grad_norm": 2.5969414710998535, "learning_rate": 3.534136546184739e-05, "loss": 0.2411, "step": 7040 }, { "epoch": 42.43975903614458, "grad_norm": 3.139529228210449, "learning_rate": 3.536646586345382e-05, "loss": 0.2214, "step": 7045 }, { "epoch": 42.46987951807229, "grad_norm": 2.6836605072021484, "learning_rate": 3.539156626506025e-05, "loss": 0.2248, "step": 7050 }, { "epoch": 42.5, "grad_norm": 1.436519742012024, "learning_rate": 3.541666666666667e-05, "loss": 0.2297, "step": 7055 }, { "epoch": 42.53012048192771, "grad_norm": 1.9998631477355957, "learning_rate": 3.54417670682731e-05, "loss": 0.2392, "step": 7060 }, { "epoch": 42.56024096385542, "grad_norm": 1.3359869718551636, "learning_rate": 3.546686746987952e-05, "loss": 0.2042, "step": 7065 }, { "epoch": 42.59036144578313, "grad_norm": 2.925895929336548, "learning_rate": 3.549196787148594e-05, "loss": 0.2243, "step": 7070 }, { "epoch": 42.62048192771084, "grad_norm": 2.230924606323242, "learning_rate": 3.551706827309237e-05, "loss": 0.2574, "step": 7075 }, { "epoch": 42.65060240963855, "grad_norm": 1.888053059577942, "learning_rate": 3.5542168674698794e-05, "loss": 0.255, "step": 7080 }, { "epoch": 42.68072289156626, "grad_norm": 3.2984719276428223, "learning_rate": 3.556726907630522e-05, "loss": 0.2314, "step": 7085 }, { "epoch": 42.71084337349397, "grad_norm": 2.741931200027466, "learning_rate": 3.5592369477911646e-05, "loss": 0.2333, "step": 7090 }, { "epoch": 42.74096385542169, "grad_norm": 2.12164044380188, "learning_rate": 3.5617469879518075e-05, "loss": 0.2533, "step": 7095 }, { "epoch": 42.7710843373494, "grad_norm": 2.296083688735962, "learning_rate": 3.56425702811245e-05, "loss": 0.2438, "step": 7100 }, { "epoch": 42.80120481927711, "grad_norm": 2.5614187717437744, "learning_rate": 3.5667670682730926e-05, "loss": 0.2952, "step": 7105 }, { "epoch": 42.83132530120482, "grad_norm": 3.353217363357544, "learning_rate": 3.569277108433735e-05, "loss": 0.2304, "step": 7110 }, { "epoch": 42.86144578313253, "grad_norm": 1.8721473217010498, "learning_rate": 3.571787148594378e-05, "loss": 0.2587, "step": 7115 }, { "epoch": 42.89156626506024, "grad_norm": 2.8771235942840576, "learning_rate": 3.57429718875502e-05, "loss": 0.2924, "step": 7120 }, { "epoch": 42.92168674698795, "grad_norm": 2.3226511478424072, "learning_rate": 3.576807228915663e-05, "loss": 0.2435, "step": 7125 }, { "epoch": 42.95180722891566, "grad_norm": 2.316760540008545, "learning_rate": 3.579317269076306e-05, "loss": 0.2272, "step": 7130 }, { "epoch": 42.98192771084337, "grad_norm": 2.159874439239502, "learning_rate": 3.581827309236948e-05, "loss": 0.2165, "step": 7135 }, { "epoch": 43.0, "eval_accuracy": 0.9071962249311837, "eval_auc": 0.959778780383305, "eval_f1": 0.8567961165048543, "eval_loss": 0.26574328541755676, "eval_precision": 0.9156939040207522, "eval_recall": 0.8050171037628279, "eval_runtime": 17.2547, "eval_samples_per_second": 147.38, "eval_steps_per_second": 0.753, "step": 7138 }, { "epoch": 43.01204819277108, "grad_norm": 1.6516984701156616, "learning_rate": 3.584337349397591e-05, "loss": 0.2088, "step": 7140 }, { "epoch": 43.04216867469879, "grad_norm": 2.384612798690796, "learning_rate": 3.586847389558233e-05, "loss": 0.2276, "step": 7145 }, { "epoch": 43.0722891566265, "grad_norm": 1.8337193727493286, "learning_rate": 3.589357429718876e-05, "loss": 0.1962, "step": 7150 }, { "epoch": 43.102409638554214, "grad_norm": 1.7397912740707397, "learning_rate": 3.591867469879518e-05, "loss": 0.2244, "step": 7155 }, { "epoch": 43.13253012048193, "grad_norm": 2.2589216232299805, "learning_rate": 3.5943775100401605e-05, "loss": 0.2308, "step": 7160 }, { "epoch": 43.16265060240964, "grad_norm": 1.7657392024993896, "learning_rate": 3.5968875502008034e-05, "loss": 0.2065, "step": 7165 }, { "epoch": 43.19277108433735, "grad_norm": 2.2116403579711914, "learning_rate": 3.5993975903614456e-05, "loss": 0.1994, "step": 7170 }, { "epoch": 43.22289156626506, "grad_norm": 2.4515957832336426, "learning_rate": 3.6019076305220885e-05, "loss": 0.2301, "step": 7175 }, { "epoch": 43.25301204819277, "grad_norm": 2.409895181655884, "learning_rate": 3.604417670682731e-05, "loss": 0.2628, "step": 7180 }, { "epoch": 43.28313253012048, "grad_norm": 1.354292869567871, "learning_rate": 3.606927710843374e-05, "loss": 0.1978, "step": 7185 }, { "epoch": 43.31325301204819, "grad_norm": 2.4475462436676025, "learning_rate": 3.609437751004016e-05, "loss": 0.234, "step": 7190 }, { "epoch": 43.3433734939759, "grad_norm": 1.8721057176589966, "learning_rate": 3.611947791164659e-05, "loss": 0.1964, "step": 7195 }, { "epoch": 43.373493975903614, "grad_norm": 1.691802740097046, "learning_rate": 3.614457831325301e-05, "loss": 0.2253, "step": 7200 }, { "epoch": 43.403614457831324, "grad_norm": 2.6589553356170654, "learning_rate": 3.616967871485944e-05, "loss": 0.2063, "step": 7205 }, { "epoch": 43.433734939759034, "grad_norm": 3.7984652519226074, "learning_rate": 3.619477911646587e-05, "loss": 0.2669, "step": 7210 }, { "epoch": 43.463855421686745, "grad_norm": 2.075361728668213, "learning_rate": 3.621987951807229e-05, "loss": 0.2199, "step": 7215 }, { "epoch": 43.493975903614455, "grad_norm": 2.824955940246582, "learning_rate": 3.624497991967872e-05, "loss": 0.2655, "step": 7220 }, { "epoch": 43.524096385542165, "grad_norm": 2.5480446815490723, "learning_rate": 3.627008032128514e-05, "loss": 0.2485, "step": 7225 }, { "epoch": 43.55421686746988, "grad_norm": 1.8091715574264526, "learning_rate": 3.629518072289157e-05, "loss": 0.2128, "step": 7230 }, { "epoch": 43.58433734939759, "grad_norm": 1.4671165943145752, "learning_rate": 3.6320281124497993e-05, "loss": 0.2471, "step": 7235 }, { "epoch": 43.6144578313253, "grad_norm": 2.3526570796966553, "learning_rate": 3.634538152610442e-05, "loss": 0.2348, "step": 7240 }, { "epoch": 43.644578313253014, "grad_norm": 2.2521297931671143, "learning_rate": 3.6370481927710845e-05, "loss": 0.1954, "step": 7245 }, { "epoch": 43.674698795180724, "grad_norm": 2.4531917572021484, "learning_rate": 3.639558232931727e-05, "loss": 0.268, "step": 7250 }, { "epoch": 43.704819277108435, "grad_norm": 1.406149983406067, "learning_rate": 3.6420682730923696e-05, "loss": 0.2, "step": 7255 }, { "epoch": 43.734939759036145, "grad_norm": 1.7887499332427979, "learning_rate": 3.644578313253012e-05, "loss": 0.183, "step": 7260 }, { "epoch": 43.765060240963855, "grad_norm": 3.3391366004943848, "learning_rate": 3.647088353413655e-05, "loss": 0.2299, "step": 7265 }, { "epoch": 43.795180722891565, "grad_norm": 2.490065574645996, "learning_rate": 3.649598393574297e-05, "loss": 0.2554, "step": 7270 }, { "epoch": 43.825301204819276, "grad_norm": 2.691765069961548, "learning_rate": 3.65210843373494e-05, "loss": 0.2079, "step": 7275 }, { "epoch": 43.855421686746986, "grad_norm": 1.9577827453613281, "learning_rate": 3.654618473895582e-05, "loss": 0.2438, "step": 7280 }, { "epoch": 43.8855421686747, "grad_norm": 2.4634768962860107, "learning_rate": 3.657128514056225e-05, "loss": 0.2022, "step": 7285 }, { "epoch": 43.91566265060241, "grad_norm": 1.8126288652420044, "learning_rate": 3.659638554216868e-05, "loss": 0.2265, "step": 7290 }, { "epoch": 43.94578313253012, "grad_norm": 3.2564098834991455, "learning_rate": 3.66214859437751e-05, "loss": 0.2232, "step": 7295 }, { "epoch": 43.975903614457835, "grad_norm": 1.973140835762024, "learning_rate": 3.664658634538153e-05, "loss": 0.262, "step": 7300 }, { "epoch": 44.0, "eval_accuracy": 0.9134880062917814, "eval_auc": 0.965799318587184, "eval_f1": 0.8784530386740331, "eval_loss": 0.23760782182216644, "eval_precision": 0.8520900321543409, "eval_recall": 0.9064994298745724, "eval_runtime": 18.138, "eval_samples_per_second": 140.203, "eval_steps_per_second": 0.717, "step": 7304 }, { "epoch": 44.006024096385545, "grad_norm": 3.11803936958313, "learning_rate": 3.667168674698795e-05, "loss": 0.2606, "step": 7305 }, { "epoch": 44.036144578313255, "grad_norm": 1.4260988235473633, "learning_rate": 3.669678714859438e-05, "loss": 0.1956, "step": 7310 }, { "epoch": 44.066265060240966, "grad_norm": 2.298755168914795, "learning_rate": 3.6721887550200804e-05, "loss": 0.221, "step": 7315 }, { "epoch": 44.096385542168676, "grad_norm": 2.2086496353149414, "learning_rate": 3.674698795180723e-05, "loss": 0.2416, "step": 7320 }, { "epoch": 44.126506024096386, "grad_norm": 2.1699838638305664, "learning_rate": 3.6772088353413656e-05, "loss": 0.2039, "step": 7325 }, { "epoch": 44.1566265060241, "grad_norm": 2.87320613861084, "learning_rate": 3.6797188755020085e-05, "loss": 0.2373, "step": 7330 }, { "epoch": 44.18674698795181, "grad_norm": 2.5652034282684326, "learning_rate": 3.6822289156626514e-05, "loss": 0.2004, "step": 7335 }, { "epoch": 44.21686746987952, "grad_norm": 3.0507736206054688, "learning_rate": 3.684738955823293e-05, "loss": 0.2416, "step": 7340 }, { "epoch": 44.24698795180723, "grad_norm": 2.3488194942474365, "learning_rate": 3.687248995983936e-05, "loss": 0.2158, "step": 7345 }, { "epoch": 44.27710843373494, "grad_norm": 1.9353269338607788, "learning_rate": 3.689759036144578e-05, "loss": 0.2115, "step": 7350 }, { "epoch": 44.30722891566265, "grad_norm": 2.5070390701293945, "learning_rate": 3.692269076305221e-05, "loss": 0.2172, "step": 7355 }, { "epoch": 44.33734939759036, "grad_norm": 2.123479127883911, "learning_rate": 3.694779116465863e-05, "loss": 0.213, "step": 7360 }, { "epoch": 44.36746987951807, "grad_norm": 1.4485774040222168, "learning_rate": 3.697289156626506e-05, "loss": 0.1941, "step": 7365 }, { "epoch": 44.397590361445786, "grad_norm": 1.6342570781707764, "learning_rate": 3.699799196787149e-05, "loss": 0.2088, "step": 7370 }, { "epoch": 44.4277108433735, "grad_norm": 1.9521421194076538, "learning_rate": 3.702309236947791e-05, "loss": 0.2017, "step": 7375 }, { "epoch": 44.45783132530121, "grad_norm": 2.1190085411071777, "learning_rate": 3.704819277108434e-05, "loss": 0.2221, "step": 7380 }, { "epoch": 44.48795180722892, "grad_norm": 2.233778953552246, "learning_rate": 3.7073293172690764e-05, "loss": 0.1708, "step": 7385 }, { "epoch": 44.51807228915663, "grad_norm": 1.870052456855774, "learning_rate": 3.709839357429719e-05, "loss": 0.2155, "step": 7390 }, { "epoch": 44.54819277108434, "grad_norm": 2.6740262508392334, "learning_rate": 3.7123493975903615e-05, "loss": 0.2393, "step": 7395 }, { "epoch": 44.57831325301205, "grad_norm": 1.830087423324585, "learning_rate": 3.7148594377510044e-05, "loss": 0.2365, "step": 7400 }, { "epoch": 44.60843373493976, "grad_norm": 3.084540367126465, "learning_rate": 3.7173694779116466e-05, "loss": 0.2712, "step": 7405 }, { "epoch": 44.63855421686747, "grad_norm": 1.879089593887329, "learning_rate": 3.7198795180722895e-05, "loss": 0.228, "step": 7410 }, { "epoch": 44.66867469879518, "grad_norm": 2.8133385181427, "learning_rate": 3.7223895582329324e-05, "loss": 0.2436, "step": 7415 }, { "epoch": 44.69879518072289, "grad_norm": 2.9523673057556152, "learning_rate": 3.724899598393575e-05, "loss": 0.2195, "step": 7420 }, { "epoch": 44.7289156626506, "grad_norm": 2.7336349487304688, "learning_rate": 3.7274096385542176e-05, "loss": 0.2222, "step": 7425 }, { "epoch": 44.75903614457831, "grad_norm": 1.8540711402893066, "learning_rate": 3.729919678714859e-05, "loss": 0.2195, "step": 7430 }, { "epoch": 44.78915662650603, "grad_norm": 1.6824413537979126, "learning_rate": 3.732429718875502e-05, "loss": 0.2417, "step": 7435 }, { "epoch": 44.81927710843374, "grad_norm": 2.8414008617401123, "learning_rate": 3.734939759036144e-05, "loss": 0.2428, "step": 7440 }, { "epoch": 44.84939759036145, "grad_norm": 2.3256123065948486, "learning_rate": 3.737449799196787e-05, "loss": 0.2146, "step": 7445 }, { "epoch": 44.87951807228916, "grad_norm": 2.0897772312164307, "learning_rate": 3.73995983935743e-05, "loss": 0.258, "step": 7450 }, { "epoch": 44.90963855421687, "grad_norm": 1.9921648502349854, "learning_rate": 3.742469879518072e-05, "loss": 0.1894, "step": 7455 }, { "epoch": 44.93975903614458, "grad_norm": 1.5796568393707275, "learning_rate": 3.744979919678715e-05, "loss": 0.2267, "step": 7460 }, { "epoch": 44.96987951807229, "grad_norm": 2.159181594848633, "learning_rate": 3.7474899598393574e-05, "loss": 0.2554, "step": 7465 }, { "epoch": 45.0, "grad_norm": 2.91159725189209, "learning_rate": 3.7500000000000003e-05, "loss": 0.2056, "step": 7470 }, { "epoch": 45.0, "eval_accuracy": 0.9154541879669682, "eval_auc": 0.9655703786645787, "eval_f1": 0.8743424897720631, "eval_loss": 0.23049841821193695, "eval_precision": 0.8968824940047961, "eval_recall": 0.8529076396807298, "eval_runtime": 17.0235, "eval_samples_per_second": 149.381, "eval_steps_per_second": 0.764, "step": 7470 }, { "epoch": 45.03012048192771, "grad_norm": 2.273345708847046, "learning_rate": 3.7525100401606426e-05, "loss": 0.2279, "step": 7475 }, { "epoch": 45.06024096385542, "grad_norm": 1.93620765209198, "learning_rate": 3.7550200803212855e-05, "loss": 0.2348, "step": 7480 }, { "epoch": 45.09036144578313, "grad_norm": 2.0208992958068848, "learning_rate": 3.757530120481928e-05, "loss": 0.2378, "step": 7485 }, { "epoch": 45.12048192771084, "grad_norm": 1.7389707565307617, "learning_rate": 3.7600401606425706e-05, "loss": 0.2, "step": 7490 }, { "epoch": 45.15060240963855, "grad_norm": 2.7836644649505615, "learning_rate": 3.7625502008032135e-05, "loss": 0.2091, "step": 7495 }, { "epoch": 45.18072289156626, "grad_norm": 1.9033209085464478, "learning_rate": 3.765060240963856e-05, "loss": 0.1727, "step": 7500 }, { "epoch": 45.21084337349398, "grad_norm": 2.405941963195801, "learning_rate": 3.7675702811244987e-05, "loss": 0.2401, "step": 7505 }, { "epoch": 45.24096385542169, "grad_norm": 1.9806922674179077, "learning_rate": 3.770080321285141e-05, "loss": 0.2247, "step": 7510 }, { "epoch": 45.2710843373494, "grad_norm": 2.2501771450042725, "learning_rate": 3.772590361445783e-05, "loss": 0.211, "step": 7515 }, { "epoch": 45.30120481927711, "grad_norm": 2.0017693042755127, "learning_rate": 3.7751004016064253e-05, "loss": 0.2372, "step": 7520 }, { "epoch": 45.33132530120482, "grad_norm": 1.948451280593872, "learning_rate": 3.777610441767068e-05, "loss": 0.208, "step": 7525 }, { "epoch": 45.36144578313253, "grad_norm": 2.418311357498169, "learning_rate": 3.780120481927711e-05, "loss": 0.1952, "step": 7530 }, { "epoch": 45.39156626506024, "grad_norm": 2.2427890300750732, "learning_rate": 3.7826305220883534e-05, "loss": 0.2271, "step": 7535 }, { "epoch": 45.42168674698795, "grad_norm": 2.134998083114624, "learning_rate": 3.785140562248996e-05, "loss": 0.2197, "step": 7540 }, { "epoch": 45.45180722891566, "grad_norm": 1.6235309839248657, "learning_rate": 3.7876506024096385e-05, "loss": 0.2216, "step": 7545 }, { "epoch": 45.48192771084337, "grad_norm": 2.081052780151367, "learning_rate": 3.7901606425702814e-05, "loss": 0.2205, "step": 7550 }, { "epoch": 45.51204819277108, "grad_norm": 2.2003960609436035, "learning_rate": 3.7926706827309237e-05, "loss": 0.1913, "step": 7555 }, { "epoch": 45.54216867469879, "grad_norm": 2.2834722995758057, "learning_rate": 3.7951807228915666e-05, "loss": 0.2239, "step": 7560 }, { "epoch": 45.5722891566265, "grad_norm": 2.1172125339508057, "learning_rate": 3.797690763052209e-05, "loss": 0.1776, "step": 7565 }, { "epoch": 45.602409638554214, "grad_norm": 2.7145802974700928, "learning_rate": 3.800200803212852e-05, "loss": 0.228, "step": 7570 }, { "epoch": 45.63253012048193, "grad_norm": 2.9677984714508057, "learning_rate": 3.802710843373494e-05, "loss": 0.1937, "step": 7575 }, { "epoch": 45.66265060240964, "grad_norm": 2.65963077545166, "learning_rate": 3.805220883534137e-05, "loss": 0.192, "step": 7580 }, { "epoch": 45.69277108433735, "grad_norm": 2.723327398300171, "learning_rate": 3.80773092369478e-05, "loss": 0.2364, "step": 7585 }, { "epoch": 45.72289156626506, "grad_norm": 2.2919390201568604, "learning_rate": 3.810240963855422e-05, "loss": 0.2316, "step": 7590 }, { "epoch": 45.75301204819277, "grad_norm": 2.0183589458465576, "learning_rate": 3.812751004016065e-05, "loss": 0.1891, "step": 7595 }, { "epoch": 45.78313253012048, "grad_norm": 2.712099552154541, "learning_rate": 3.815261044176707e-05, "loss": 0.2174, "step": 7600 }, { "epoch": 45.81325301204819, "grad_norm": 3.526670217514038, "learning_rate": 3.817771084337349e-05, "loss": 0.2729, "step": 7605 }, { "epoch": 45.8433734939759, "grad_norm": 2.002732515335083, "learning_rate": 3.820281124497992e-05, "loss": 0.2192, "step": 7610 }, { "epoch": 45.873493975903614, "grad_norm": 3.051781177520752, "learning_rate": 3.8227911646586345e-05, "loss": 0.2627, "step": 7615 }, { "epoch": 45.903614457831324, "grad_norm": 2.3048646450042725, "learning_rate": 3.8253012048192774e-05, "loss": 0.223, "step": 7620 }, { "epoch": 45.933734939759034, "grad_norm": 2.2568962574005127, "learning_rate": 3.8278112449799196e-05, "loss": 0.2222, "step": 7625 }, { "epoch": 45.963855421686745, "grad_norm": 2.023124933242798, "learning_rate": 3.8303212851405625e-05, "loss": 0.2063, "step": 7630 }, { "epoch": 45.993975903614455, "grad_norm": 1.6241674423217773, "learning_rate": 3.832831325301205e-05, "loss": 0.1744, "step": 7635 }, { "epoch": 46.0, "eval_accuracy": 0.918600078647267, "eval_auc": 0.9651871010661962, "eval_f1": 0.8743169398907104, "eval_loss": 0.25582775473594666, "eval_precision": 0.935064935064935, "eval_recall": 0.8209806157354618, "eval_runtime": 17.1337, "eval_samples_per_second": 148.421, "eval_steps_per_second": 0.759, "step": 7636 }, { "epoch": 46.024096385542165, "grad_norm": 2.8567662239074707, "learning_rate": 3.8353413654618476e-05, "loss": 0.2164, "step": 7640 }, { "epoch": 46.05421686746988, "grad_norm": 2.854685068130493, "learning_rate": 3.83785140562249e-05, "loss": 0.2242, "step": 7645 }, { "epoch": 46.08433734939759, "grad_norm": 2.8304920196533203, "learning_rate": 3.840361445783133e-05, "loss": 0.1927, "step": 7650 }, { "epoch": 46.1144578313253, "grad_norm": 1.38346529006958, "learning_rate": 3.842871485943775e-05, "loss": 0.1966, "step": 7655 }, { "epoch": 46.144578313253014, "grad_norm": 2.5765368938446045, "learning_rate": 3.845381526104418e-05, "loss": 0.1855, "step": 7660 }, { "epoch": 46.174698795180724, "grad_norm": 1.645381212234497, "learning_rate": 3.847891566265061e-05, "loss": 0.1507, "step": 7665 }, { "epoch": 46.204819277108435, "grad_norm": 2.0395102500915527, "learning_rate": 3.850401606425703e-05, "loss": 0.2106, "step": 7670 }, { "epoch": 46.234939759036145, "grad_norm": 2.4283220767974854, "learning_rate": 3.852911646586346e-05, "loss": 0.2174, "step": 7675 }, { "epoch": 46.265060240963855, "grad_norm": 2.396634340286255, "learning_rate": 3.855421686746988e-05, "loss": 0.2345, "step": 7680 }, { "epoch": 46.295180722891565, "grad_norm": 3.298989772796631, "learning_rate": 3.857931726907631e-05, "loss": 0.227, "step": 7685 }, { "epoch": 46.325301204819276, "grad_norm": 2.302367687225342, "learning_rate": 3.860441767068273e-05, "loss": 0.2717, "step": 7690 }, { "epoch": 46.355421686746986, "grad_norm": 1.4514416456222534, "learning_rate": 3.8629518072289155e-05, "loss": 0.2327, "step": 7695 }, { "epoch": 46.3855421686747, "grad_norm": 1.3491817712783813, "learning_rate": 3.8654618473895584e-05, "loss": 0.213, "step": 7700 }, { "epoch": 46.41566265060241, "grad_norm": 2.660956382751465, "learning_rate": 3.867971887550201e-05, "loss": 0.2314, "step": 7705 }, { "epoch": 46.44578313253012, "grad_norm": 2.8108885288238525, "learning_rate": 3.8704819277108436e-05, "loss": 0.2305, "step": 7710 }, { "epoch": 46.475903614457835, "grad_norm": 2.0226879119873047, "learning_rate": 3.872991967871486e-05, "loss": 0.2256, "step": 7715 }, { "epoch": 46.506024096385545, "grad_norm": 2.303884506225586, "learning_rate": 3.875502008032129e-05, "loss": 0.2363, "step": 7720 }, { "epoch": 46.536144578313255, "grad_norm": 1.635791540145874, "learning_rate": 3.878012048192771e-05, "loss": 0.2129, "step": 7725 }, { "epoch": 46.566265060240966, "grad_norm": 1.936125636100769, "learning_rate": 3.880522088353414e-05, "loss": 0.2077, "step": 7730 }, { "epoch": 46.596385542168676, "grad_norm": 2.263965368270874, "learning_rate": 3.883032128514056e-05, "loss": 0.2505, "step": 7735 }, { "epoch": 46.626506024096386, "grad_norm": 2.2494423389434814, "learning_rate": 3.885542168674699e-05, "loss": 0.229, "step": 7740 }, { "epoch": 46.6566265060241, "grad_norm": 2.2650575637817383, "learning_rate": 3.888052208835342e-05, "loss": 0.1995, "step": 7745 }, { "epoch": 46.68674698795181, "grad_norm": 2.2266595363616943, "learning_rate": 3.890562248995984e-05, "loss": 0.2237, "step": 7750 }, { "epoch": 46.71686746987952, "grad_norm": 1.8683754205703735, "learning_rate": 3.893072289156627e-05, "loss": 0.222, "step": 7755 }, { "epoch": 46.74698795180723, "grad_norm": 1.9006563425064087, "learning_rate": 3.895582329317269e-05, "loss": 0.1921, "step": 7760 }, { "epoch": 46.77710843373494, "grad_norm": 3.2155590057373047, "learning_rate": 3.898092369477912e-05, "loss": 0.2461, "step": 7765 }, { "epoch": 46.80722891566265, "grad_norm": 1.617855191230774, "learning_rate": 3.9006024096385544e-05, "loss": 0.2145, "step": 7770 }, { "epoch": 46.83734939759036, "grad_norm": 3.959197759628296, "learning_rate": 3.903112449799197e-05, "loss": 0.2727, "step": 7775 }, { "epoch": 46.86746987951807, "grad_norm": 3.9538347721099854, "learning_rate": 3.9056224899598395e-05, "loss": 0.2684, "step": 7780 }, { "epoch": 46.897590361445786, "grad_norm": 2.1486432552337646, "learning_rate": 3.908132530120482e-05, "loss": 0.2024, "step": 7785 }, { "epoch": 46.9277108433735, "grad_norm": 2.487645387649536, "learning_rate": 3.9106425702811247e-05, "loss": 0.2405, "step": 7790 }, { "epoch": 46.95783132530121, "grad_norm": 2.512543201446533, "learning_rate": 3.913152610441767e-05, "loss": 0.2371, "step": 7795 }, { "epoch": 46.98795180722892, "grad_norm": 1.5896553993225098, "learning_rate": 3.91566265060241e-05, "loss": 0.2009, "step": 7800 }, { "epoch": 47.0, "eval_accuracy": 0.9174203696421549, "eval_auc": 0.9671657716678461, "eval_f1": 0.8770491803278688, "eval_loss": 0.2341395765542984, "eval_precision": 0.901323706377858, "eval_recall": 0.8540478905359179, "eval_runtime": 17.092, "eval_samples_per_second": 148.783, "eval_steps_per_second": 0.761, "step": 7802 }, { "epoch": 47.01807228915663, "grad_norm": 2.3858580589294434, "learning_rate": 3.918172690763052e-05, "loss": 0.182, "step": 7805 }, { "epoch": 47.04819277108434, "grad_norm": 1.9192538261413574, "learning_rate": 3.920682730923695e-05, "loss": 0.2144, "step": 7810 }, { "epoch": 47.07831325301205, "grad_norm": 3.998718023300171, "learning_rate": 3.923192771084337e-05, "loss": 0.22, "step": 7815 }, { "epoch": 47.10843373493976, "grad_norm": 2.8246681690216064, "learning_rate": 3.92570281124498e-05, "loss": 0.2237, "step": 7820 }, { "epoch": 47.13855421686747, "grad_norm": 1.7214534282684326, "learning_rate": 3.928212851405623e-05, "loss": 0.2247, "step": 7825 }, { "epoch": 47.16867469879518, "grad_norm": 1.522997260093689, "learning_rate": 3.930722891566265e-05, "loss": 0.2075, "step": 7830 }, { "epoch": 47.19879518072289, "grad_norm": 1.877416729927063, "learning_rate": 3.933232931726908e-05, "loss": 0.1689, "step": 7835 }, { "epoch": 47.2289156626506, "grad_norm": 2.4283390045166016, "learning_rate": 3.93574297188755e-05, "loss": 0.2184, "step": 7840 }, { "epoch": 47.25903614457831, "grad_norm": 2.131427526473999, "learning_rate": 3.938253012048193e-05, "loss": 0.1668, "step": 7845 }, { "epoch": 47.28915662650602, "grad_norm": 2.9161033630371094, "learning_rate": 3.9407630522088355e-05, "loss": 0.2252, "step": 7850 }, { "epoch": 47.31927710843374, "grad_norm": 3.200554609298706, "learning_rate": 3.9432730923694784e-05, "loss": 0.2335, "step": 7855 }, { "epoch": 47.34939759036145, "grad_norm": 2.120020627975464, "learning_rate": 3.9457831325301206e-05, "loss": 0.2366, "step": 7860 }, { "epoch": 47.37951807228916, "grad_norm": 1.453628659248352, "learning_rate": 3.9482931726907635e-05, "loss": 0.1861, "step": 7865 }, { "epoch": 47.40963855421687, "grad_norm": 1.8164753913879395, "learning_rate": 3.9508032128514064e-05, "loss": 0.1764, "step": 7870 }, { "epoch": 47.43975903614458, "grad_norm": 2.0672473907470703, "learning_rate": 3.953313253012048e-05, "loss": 0.2708, "step": 7875 }, { "epoch": 47.46987951807229, "grad_norm": 3.7126882076263428, "learning_rate": 3.955823293172691e-05, "loss": 0.2495, "step": 7880 }, { "epoch": 47.5, "grad_norm": 3.2293787002563477, "learning_rate": 3.958333333333333e-05, "loss": 0.2388, "step": 7885 }, { "epoch": 47.53012048192771, "grad_norm": 2.1597959995269775, "learning_rate": 3.960843373493976e-05, "loss": 0.2125, "step": 7890 }, { "epoch": 47.56024096385542, "grad_norm": 2.026249408721924, "learning_rate": 3.963353413654618e-05, "loss": 0.191, "step": 7895 }, { "epoch": 47.59036144578313, "grad_norm": 2.365387439727783, "learning_rate": 3.965863453815261e-05, "loss": 0.2275, "step": 7900 }, { "epoch": 47.62048192771084, "grad_norm": 1.6983262300491333, "learning_rate": 3.968373493975904e-05, "loss": 0.1965, "step": 7905 }, { "epoch": 47.65060240963855, "grad_norm": 2.45988392829895, "learning_rate": 3.970883534136546e-05, "loss": 0.266, "step": 7910 }, { "epoch": 47.68072289156626, "grad_norm": 2.1438827514648438, "learning_rate": 3.973393574297189e-05, "loss": 0.2275, "step": 7915 }, { "epoch": 47.71084337349397, "grad_norm": 1.9596291780471802, "learning_rate": 3.9759036144578314e-05, "loss": 0.2521, "step": 7920 }, { "epoch": 47.74096385542169, "grad_norm": 2.0082342624664307, "learning_rate": 3.978413654618474e-05, "loss": 0.2345, "step": 7925 }, { "epoch": 47.7710843373494, "grad_norm": 2.714073419570923, "learning_rate": 3.9809236947791165e-05, "loss": 0.2403, "step": 7930 }, { "epoch": 47.80120481927711, "grad_norm": 1.8705098628997803, "learning_rate": 3.9834337349397595e-05, "loss": 0.2063, "step": 7935 }, { "epoch": 47.83132530120482, "grad_norm": 1.9358417987823486, "learning_rate": 3.985943775100402e-05, "loss": 0.2141, "step": 7940 }, { "epoch": 47.86144578313253, "grad_norm": 2.227191686630249, "learning_rate": 3.9884538152610446e-05, "loss": 0.2179, "step": 7945 }, { "epoch": 47.89156626506024, "grad_norm": 2.8488807678222656, "learning_rate": 3.9909638554216875e-05, "loss": 0.2186, "step": 7950 }, { "epoch": 47.92168674698795, "grad_norm": 1.6021779775619507, "learning_rate": 3.99347389558233e-05, "loss": 0.2329, "step": 7955 }, { "epoch": 47.95180722891566, "grad_norm": 2.1023037433624268, "learning_rate": 3.995983935742972e-05, "loss": 0.2615, "step": 7960 }, { "epoch": 47.98192771084337, "grad_norm": 2.3818461894989014, "learning_rate": 3.998493975903614e-05, "loss": 0.2356, "step": 7965 }, { "epoch": 48.0, "eval_accuracy": 0.9213527329925285, "eval_auc": 0.968722494699134, "eval_f1": 0.882491186839013, "eval_loss": 0.21619462966918945, "eval_precision": 0.9103030303030303, "eval_recall": 0.8563283922462942, "eval_runtime": 17.2277, "eval_samples_per_second": 147.611, "eval_steps_per_second": 0.755, "step": 7968 }, { "epoch": 48.01204819277108, "grad_norm": 1.3949185609817505, "learning_rate": 4.001004016064257e-05, "loss": 0.2073, "step": 7970 }, { "epoch": 48.04216867469879, "grad_norm": 1.8420413732528687, "learning_rate": 4.003514056224899e-05, "loss": 0.2128, "step": 7975 }, { "epoch": 48.0722891566265, "grad_norm": 1.9469246864318848, "learning_rate": 4.006024096385542e-05, "loss": 0.1887, "step": 7980 }, { "epoch": 48.102409638554214, "grad_norm": 2.362492084503174, "learning_rate": 4.008534136546185e-05, "loss": 0.2148, "step": 7985 }, { "epoch": 48.13253012048193, "grad_norm": 1.5186575651168823, "learning_rate": 4.0110441767068274e-05, "loss": 0.2052, "step": 7990 }, { "epoch": 48.16265060240964, "grad_norm": 2.3660972118377686, "learning_rate": 4.01355421686747e-05, "loss": 0.2274, "step": 7995 }, { "epoch": 48.19277108433735, "grad_norm": 1.7655103206634521, "learning_rate": 4.0160642570281125e-05, "loss": 0.1772, "step": 8000 }, { "epoch": 48.22289156626506, "grad_norm": 2.0450825691223145, "learning_rate": 4.0185742971887554e-05, "loss": 0.2016, "step": 8005 }, { "epoch": 48.25301204819277, "grad_norm": 2.284454107284546, "learning_rate": 4.0210843373493976e-05, "loss": 0.2097, "step": 8010 }, { "epoch": 48.28313253012048, "grad_norm": 2.565540075302124, "learning_rate": 4.0235943775100405e-05, "loss": 0.2301, "step": 8015 }, { "epoch": 48.31325301204819, "grad_norm": 2.9623656272888184, "learning_rate": 4.026104417670683e-05, "loss": 0.1995, "step": 8020 }, { "epoch": 48.3433734939759, "grad_norm": 2.499049663543701, "learning_rate": 4.028614457831326e-05, "loss": 0.2371, "step": 8025 }, { "epoch": 48.373493975903614, "grad_norm": 2.517566680908203, "learning_rate": 4.031124497991968e-05, "loss": 0.2569, "step": 8030 }, { "epoch": 48.403614457831324, "grad_norm": 1.6054986715316772, "learning_rate": 4.033634538152611e-05, "loss": 0.2037, "step": 8035 }, { "epoch": 48.433734939759034, "grad_norm": 2.0692198276519775, "learning_rate": 4.036144578313254e-05, "loss": 0.2151, "step": 8040 }, { "epoch": 48.463855421686745, "grad_norm": 1.7841137647628784, "learning_rate": 4.038654618473896e-05, "loss": 0.2078, "step": 8045 }, { "epoch": 48.493975903614455, "grad_norm": 1.7544052600860596, "learning_rate": 4.041164658634538e-05, "loss": 0.1812, "step": 8050 }, { "epoch": 48.524096385542165, "grad_norm": 2.5118513107299805, "learning_rate": 4.0436746987951804e-05, "loss": 0.2402, "step": 8055 }, { "epoch": 48.55421686746988, "grad_norm": 2.230863094329834, "learning_rate": 4.046184738955823e-05, "loss": 0.183, "step": 8060 }, { "epoch": 48.58433734939759, "grad_norm": 2.237218141555786, "learning_rate": 4.048694779116466e-05, "loss": 0.2277, "step": 8065 }, { "epoch": 48.6144578313253, "grad_norm": 3.2372775077819824, "learning_rate": 4.0512048192771084e-05, "loss": 0.2322, "step": 8070 }, { "epoch": 48.644578313253014, "grad_norm": 3.1102917194366455, "learning_rate": 4.053714859437751e-05, "loss": 0.2296, "step": 8075 }, { "epoch": 48.674698795180724, "grad_norm": 2.165656566619873, "learning_rate": 4.0562248995983936e-05, "loss": 0.239, "step": 8080 }, { "epoch": 48.704819277108435, "grad_norm": 1.6019127368927002, "learning_rate": 4.0587349397590365e-05, "loss": 0.2006, "step": 8085 }, { "epoch": 48.734939759036145, "grad_norm": 2.381197929382324, "learning_rate": 4.061244979919679e-05, "loss": 0.2113, "step": 8090 }, { "epoch": 48.765060240963855, "grad_norm": 2.4255783557891846, "learning_rate": 4.0637550200803216e-05, "loss": 0.2358, "step": 8095 }, { "epoch": 48.795180722891565, "grad_norm": 1.3936980962753296, "learning_rate": 4.066265060240964e-05, "loss": 0.2086, "step": 8100 }, { "epoch": 48.825301204819276, "grad_norm": 2.304527997970581, "learning_rate": 4.068775100401607e-05, "loss": 0.2188, "step": 8105 }, { "epoch": 48.855421686746986, "grad_norm": 2.2357938289642334, "learning_rate": 4.071285140562249e-05, "loss": 0.1996, "step": 8110 }, { "epoch": 48.8855421686747, "grad_norm": 2.678866386413574, "learning_rate": 4.073795180722892e-05, "loss": 0.2467, "step": 8115 }, { "epoch": 48.91566265060241, "grad_norm": 2.4957497119903564, "learning_rate": 4.076305220883535e-05, "loss": 0.2704, "step": 8120 }, { "epoch": 48.94578313253012, "grad_norm": 2.6663835048675537, "learning_rate": 4.078815261044177e-05, "loss": 0.2316, "step": 8125 }, { "epoch": 48.975903614457835, "grad_norm": 2.898577928543091, "learning_rate": 4.08132530120482e-05, "loss": 0.2556, "step": 8130 }, { "epoch": 49.0, "eval_accuracy": 0.9237121510027526, "eval_auc": 0.9678888659226519, "eval_f1": 0.8864168618266979, "eval_loss": 0.2191127985715866, "eval_precision": 0.910950661853189, "eval_recall": 0.863169897377423, "eval_runtime": 17.0353, "eval_samples_per_second": 149.279, "eval_steps_per_second": 0.763, "step": 8134 }, { "epoch": 49.006024096385545, "grad_norm": 2.100454092025757, "learning_rate": 4.083835341365462e-05, "loss": 0.1929, "step": 8135 }, { "epoch": 49.036144578313255, "grad_norm": 1.9053326845169067, "learning_rate": 4.0863453815261044e-05, "loss": 0.1936, "step": 8140 }, { "epoch": 49.066265060240966, "grad_norm": 1.8544409275054932, "learning_rate": 4.088855421686747e-05, "loss": 0.1797, "step": 8145 }, { "epoch": 49.096385542168676, "grad_norm": 2.9682719707489014, "learning_rate": 4.0913654618473895e-05, "loss": 0.2269, "step": 8150 }, { "epoch": 49.126506024096386, "grad_norm": 2.228851795196533, "learning_rate": 4.0938755020080324e-05, "loss": 0.2095, "step": 8155 }, { "epoch": 49.1566265060241, "grad_norm": 2.6955111026763916, "learning_rate": 4.0963855421686746e-05, "loss": 0.1792, "step": 8160 }, { "epoch": 49.18674698795181, "grad_norm": 1.7635419368743896, "learning_rate": 4.0988955823293175e-05, "loss": 0.1999, "step": 8165 }, { "epoch": 49.21686746987952, "grad_norm": 2.2137768268585205, "learning_rate": 4.10140562248996e-05, "loss": 0.2154, "step": 8170 }, { "epoch": 49.24698795180723, "grad_norm": 2.5191023349761963, "learning_rate": 4.103915662650603e-05, "loss": 0.2557, "step": 8175 }, { "epoch": 49.27710843373494, "grad_norm": 2.286573886871338, "learning_rate": 4.106425702811245e-05, "loss": 0.2602, "step": 8180 }, { "epoch": 49.30722891566265, "grad_norm": 2.6866259574890137, "learning_rate": 4.108935742971888e-05, "loss": 0.21, "step": 8185 }, { "epoch": 49.33734939759036, "grad_norm": 2.017263412475586, "learning_rate": 4.11144578313253e-05, "loss": 0.1535, "step": 8190 }, { "epoch": 49.36746987951807, "grad_norm": 2.0354788303375244, "learning_rate": 4.113955823293173e-05, "loss": 0.2021, "step": 8195 }, { "epoch": 49.397590361445786, "grad_norm": 1.6226084232330322, "learning_rate": 4.116465863453816e-05, "loss": 0.258, "step": 8200 }, { "epoch": 49.4277108433735, "grad_norm": 2.9974417686462402, "learning_rate": 4.118975903614458e-05, "loss": 0.2214, "step": 8205 }, { "epoch": 49.45783132530121, "grad_norm": 2.164848566055298, "learning_rate": 4.121485943775101e-05, "loss": 0.2227, "step": 8210 }, { "epoch": 49.48795180722892, "grad_norm": 1.9364224672317505, "learning_rate": 4.123995983935743e-05, "loss": 0.2389, "step": 8215 }, { "epoch": 49.51807228915663, "grad_norm": 1.5729063749313354, "learning_rate": 4.126506024096386e-05, "loss": 0.2055, "step": 8220 }, { "epoch": 49.54819277108434, "grad_norm": 2.0302486419677734, "learning_rate": 4.1290160642570284e-05, "loss": 0.2079, "step": 8225 }, { "epoch": 49.57831325301205, "grad_norm": 2.086446523666382, "learning_rate": 4.1315261044176706e-05, "loss": 0.1869, "step": 8230 }, { "epoch": 49.60843373493976, "grad_norm": 2.3346996307373047, "learning_rate": 4.1340361445783135e-05, "loss": 0.2259, "step": 8235 }, { "epoch": 49.63855421686747, "grad_norm": 1.7249573469161987, "learning_rate": 4.136546184738956e-05, "loss": 0.1928, "step": 8240 }, { "epoch": 49.66867469879518, "grad_norm": 2.7298545837402344, "learning_rate": 4.1390562248995986e-05, "loss": 0.2336, "step": 8245 }, { "epoch": 49.69879518072289, "grad_norm": 3.0118720531463623, "learning_rate": 4.141566265060241e-05, "loss": 0.2221, "step": 8250 }, { "epoch": 49.7289156626506, "grad_norm": 2.4571619033813477, "learning_rate": 4.144076305220884e-05, "loss": 0.1981, "step": 8255 }, { "epoch": 49.75903614457831, "grad_norm": 2.2176711559295654, "learning_rate": 4.146586345381526e-05, "loss": 0.2094, "step": 8260 }, { "epoch": 49.78915662650603, "grad_norm": 2.423710584640503, "learning_rate": 4.149096385542169e-05, "loss": 0.2158, "step": 8265 }, { "epoch": 49.81927710843374, "grad_norm": 2.3995325565338135, "learning_rate": 4.151606425702811e-05, "loss": 0.2123, "step": 8270 }, { "epoch": 49.84939759036145, "grad_norm": 1.716963529586792, "learning_rate": 4.154116465863454e-05, "loss": 0.2062, "step": 8275 }, { "epoch": 49.87951807228916, "grad_norm": 2.483274459838867, "learning_rate": 4.156626506024097e-05, "loss": 0.1824, "step": 8280 }, { "epoch": 49.90963855421687, "grad_norm": 1.5357404947280884, "learning_rate": 4.159136546184739e-05, "loss": 0.1938, "step": 8285 }, { "epoch": 49.93975903614458, "grad_norm": 2.8210079669952393, "learning_rate": 4.161646586345382e-05, "loss": 0.2232, "step": 8290 }, { "epoch": 49.96987951807229, "grad_norm": 2.644474744796753, "learning_rate": 4.164156626506024e-05, "loss": 0.2048, "step": 8295 }, { "epoch": 50.0, "grad_norm": 2.154975175857544, "learning_rate": 4.166666666666667e-05, "loss": 0.1875, "step": 8300 }, { "epoch": 50.0, "eval_accuracy": 0.9197797876523791, "eval_auc": 0.9642733946486234, "eval_f1": 0.8802816901408451, "eval_loss": 0.25391778349876404, "eval_precision": 0.9068923821039904, "eval_recall": 0.855188141391106, "eval_runtime": 17.7018, "eval_samples_per_second": 143.658, "eval_steps_per_second": 0.734, "step": 8300 }, { "epoch": 50.03012048192771, "grad_norm": 2.3240296840667725, "learning_rate": 4.1691767068273094e-05, "loss": 0.1923, "step": 8305 }, { "epoch": 50.06024096385542, "grad_norm": 2.1551833152770996, "learning_rate": 4.1716867469879523e-05, "loss": 0.2121, "step": 8310 }, { "epoch": 50.09036144578313, "grad_norm": 2.293663501739502, "learning_rate": 4.1741967871485946e-05, "loss": 0.1768, "step": 8315 }, { "epoch": 50.12048192771084, "grad_norm": 2.1318576335906982, "learning_rate": 4.176706827309237e-05, "loss": 0.1981, "step": 8320 }, { "epoch": 50.15060240963855, "grad_norm": 2.407623052597046, "learning_rate": 4.17921686746988e-05, "loss": 0.2072, "step": 8325 }, { "epoch": 50.18072289156626, "grad_norm": 2.1320385932922363, "learning_rate": 4.181726907630522e-05, "loss": 0.2063, "step": 8330 }, { "epoch": 50.21084337349398, "grad_norm": 2.106816530227661, "learning_rate": 4.184236947791165e-05, "loss": 0.1472, "step": 8335 }, { "epoch": 50.24096385542169, "grad_norm": 2.103788137435913, "learning_rate": 4.186746987951807e-05, "loss": 0.2001, "step": 8340 }, { "epoch": 50.2710843373494, "grad_norm": 2.0746145248413086, "learning_rate": 4.18925702811245e-05, "loss": 0.206, "step": 8345 }, { "epoch": 50.30120481927711, "grad_norm": 2.4276602268218994, "learning_rate": 4.191767068273092e-05, "loss": 0.261, "step": 8350 }, { "epoch": 50.33132530120482, "grad_norm": 2.2360825538635254, "learning_rate": 4.194277108433735e-05, "loss": 0.2205, "step": 8355 }, { "epoch": 50.36144578313253, "grad_norm": 2.1541240215301514, "learning_rate": 4.196787148594378e-05, "loss": 0.2181, "step": 8360 }, { "epoch": 50.39156626506024, "grad_norm": 3.0024476051330566, "learning_rate": 4.19929718875502e-05, "loss": 0.2141, "step": 8365 }, { "epoch": 50.42168674698795, "grad_norm": 2.068899631500244, "learning_rate": 4.201807228915663e-05, "loss": 0.1967, "step": 8370 }, { "epoch": 50.45180722891566, "grad_norm": 1.6858114004135132, "learning_rate": 4.2043172690763054e-05, "loss": 0.1823, "step": 8375 }, { "epoch": 50.48192771084337, "grad_norm": 2.16697359085083, "learning_rate": 4.206827309236948e-05, "loss": 0.1633, "step": 8380 }, { "epoch": 50.51204819277108, "grad_norm": 2.2981698513031006, "learning_rate": 4.2093373493975905e-05, "loss": 0.2043, "step": 8385 }, { "epoch": 50.54216867469879, "grad_norm": 1.885445475578308, "learning_rate": 4.2118473895582334e-05, "loss": 0.1958, "step": 8390 }, { "epoch": 50.5722891566265, "grad_norm": 2.8808133602142334, "learning_rate": 4.2143574297188756e-05, "loss": 0.2118, "step": 8395 }, { "epoch": 50.602409638554214, "grad_norm": 1.648272156715393, "learning_rate": 4.2168674698795186e-05, "loss": 0.2369, "step": 8400 }, { "epoch": 50.63253012048193, "grad_norm": 2.7153096199035645, "learning_rate": 4.219377510040161e-05, "loss": 0.1965, "step": 8405 }, { "epoch": 50.66265060240964, "grad_norm": 2.3628122806549072, "learning_rate": 4.221887550200803e-05, "loss": 0.2291, "step": 8410 }, { "epoch": 50.69277108433735, "grad_norm": 2.9337639808654785, "learning_rate": 4.224397590361446e-05, "loss": 0.2315, "step": 8415 }, { "epoch": 50.72289156626506, "grad_norm": 1.9613662958145142, "learning_rate": 4.226907630522088e-05, "loss": 0.2519, "step": 8420 }, { "epoch": 50.75301204819277, "grad_norm": 1.7669310569763184, "learning_rate": 4.229417670682731e-05, "loss": 0.2419, "step": 8425 }, { "epoch": 50.78313253012048, "grad_norm": 1.8924133777618408, "learning_rate": 4.231927710843373e-05, "loss": 0.2282, "step": 8430 }, { "epoch": 50.81325301204819, "grad_norm": 2.1653175354003906, "learning_rate": 4.234437751004016e-05, "loss": 0.1903, "step": 8435 }, { "epoch": 50.8433734939759, "grad_norm": 2.1018800735473633, "learning_rate": 4.236947791164659e-05, "loss": 0.2456, "step": 8440 }, { "epoch": 50.873493975903614, "grad_norm": 1.8597257137298584, "learning_rate": 4.239457831325301e-05, "loss": 0.1953, "step": 8445 }, { "epoch": 50.903614457831324, "grad_norm": 1.8078064918518066, "learning_rate": 4.241967871485944e-05, "loss": 0.1778, "step": 8450 }, { "epoch": 50.933734939759034, "grad_norm": 2.412869691848755, "learning_rate": 4.2444779116465865e-05, "loss": 0.2315, "step": 8455 }, { "epoch": 50.963855421686745, "grad_norm": 1.3655896186828613, "learning_rate": 4.2469879518072294e-05, "loss": 0.2115, "step": 8460 }, { "epoch": 50.993975903614455, "grad_norm": 2.445127487182617, "learning_rate": 4.2494979919678716e-05, "loss": 0.2252, "step": 8465 }, { "epoch": 51.0, "eval_accuracy": 0.9280377506881636, "eval_auc": 0.9689456170153352, "eval_f1": 0.8940359004053272, "eval_loss": 0.22296485304832458, "eval_precision": 0.908235294117647, "eval_recall": 0.8802736602052451, "eval_runtime": 17.0589, "eval_samples_per_second": 149.072, "eval_steps_per_second": 0.762, "step": 8466 }, { "epoch": 51.024096385542165, "grad_norm": 1.491262435913086, "learning_rate": 4.2520080321285145e-05, "loss": 0.1959, "step": 8470 }, { "epoch": 51.05421686746988, "grad_norm": 2.2632458209991455, "learning_rate": 4.254518072289157e-05, "loss": 0.2049, "step": 8475 }, { "epoch": 51.08433734939759, "grad_norm": 2.91318416595459, "learning_rate": 4.2570281124497996e-05, "loss": 0.1958, "step": 8480 }, { "epoch": 51.1144578313253, "grad_norm": 2.273866891860962, "learning_rate": 4.2595381526104425e-05, "loss": 0.2061, "step": 8485 }, { "epoch": 51.144578313253014, "grad_norm": 2.750541925430298, "learning_rate": 4.262048192771085e-05, "loss": 0.1655, "step": 8490 }, { "epoch": 51.174698795180724, "grad_norm": 3.153205633163452, "learning_rate": 4.264558232931727e-05, "loss": 0.1921, "step": 8495 }, { "epoch": 51.204819277108435, "grad_norm": 3.316248893737793, "learning_rate": 4.267068273092369e-05, "loss": 0.2232, "step": 8500 }, { "epoch": 51.234939759036145, "grad_norm": 2.0477054119110107, "learning_rate": 4.269578313253012e-05, "loss": 0.1949, "step": 8505 }, { "epoch": 51.265060240963855, "grad_norm": 1.9106863737106323, "learning_rate": 4.2720883534136544e-05, "loss": 0.1902, "step": 8510 }, { "epoch": 51.295180722891565, "grad_norm": 2.0968832969665527, "learning_rate": 4.274598393574297e-05, "loss": 0.1758, "step": 8515 }, { "epoch": 51.325301204819276, "grad_norm": 1.4065873622894287, "learning_rate": 4.27710843373494e-05, "loss": 0.2099, "step": 8520 }, { "epoch": 51.355421686746986, "grad_norm": 2.735079765319824, "learning_rate": 4.2796184738955824e-05, "loss": 0.1967, "step": 8525 }, { "epoch": 51.3855421686747, "grad_norm": 1.4214318990707397, "learning_rate": 4.282128514056225e-05, "loss": 0.1991, "step": 8530 }, { "epoch": 51.41566265060241, "grad_norm": 1.6565520763397217, "learning_rate": 4.2846385542168675e-05, "loss": 0.2003, "step": 8535 }, { "epoch": 51.44578313253012, "grad_norm": 1.5868991613388062, "learning_rate": 4.2871485943775104e-05, "loss": 0.1788, "step": 8540 }, { "epoch": 51.475903614457835, "grad_norm": 2.1586837768554688, "learning_rate": 4.289658634538153e-05, "loss": 0.219, "step": 8545 }, { "epoch": 51.506024096385545, "grad_norm": 2.678581953048706, "learning_rate": 4.2921686746987956e-05, "loss": 0.1606, "step": 8550 }, { "epoch": 51.536144578313255, "grad_norm": 2.364051580429077, "learning_rate": 4.294678714859438e-05, "loss": 0.2155, "step": 8555 }, { "epoch": 51.566265060240966, "grad_norm": 1.729448676109314, "learning_rate": 4.297188755020081e-05, "loss": 0.1741, "step": 8560 }, { "epoch": 51.596385542168676, "grad_norm": 2.4447762966156006, "learning_rate": 4.299698795180723e-05, "loss": 0.2083, "step": 8565 }, { "epoch": 51.626506024096386, "grad_norm": 1.8021255731582642, "learning_rate": 4.302208835341366e-05, "loss": 0.2093, "step": 8570 }, { "epoch": 51.6566265060241, "grad_norm": 3.061856508255005, "learning_rate": 4.304718875502009e-05, "loss": 0.1992, "step": 8575 }, { "epoch": 51.68674698795181, "grad_norm": 1.822127342224121, "learning_rate": 4.307228915662651e-05, "loss": 0.2208, "step": 8580 }, { "epoch": 51.71686746987952, "grad_norm": 2.0909178256988525, "learning_rate": 4.309738955823293e-05, "loss": 0.2104, "step": 8585 }, { "epoch": 51.74698795180723, "grad_norm": 1.498009204864502, "learning_rate": 4.3122489959839354e-05, "loss": 0.2083, "step": 8590 }, { "epoch": 51.77710843373494, "grad_norm": 2.5373308658599854, "learning_rate": 4.3147590361445783e-05, "loss": 0.2074, "step": 8595 }, { "epoch": 51.80722891566265, "grad_norm": 2.005295991897583, "learning_rate": 4.317269076305221e-05, "loss": 0.2242, "step": 8600 }, { "epoch": 51.83734939759036, "grad_norm": 2.867748737335205, "learning_rate": 4.3197791164658635e-05, "loss": 0.2493, "step": 8605 }, { "epoch": 51.86746987951807, "grad_norm": 3.365629196166992, "learning_rate": 4.3222891566265064e-05, "loss": 0.1988, "step": 8610 }, { "epoch": 51.897590361445786, "grad_norm": 1.7968188524246216, "learning_rate": 4.3247991967871486e-05, "loss": 0.2058, "step": 8615 }, { "epoch": 51.9277108433735, "grad_norm": 1.8287718296051025, "learning_rate": 4.3273092369477915e-05, "loss": 0.1895, "step": 8620 }, { "epoch": 51.95783132530121, "grad_norm": 2.291860342025757, "learning_rate": 4.329819277108434e-05, "loss": 0.2157, "step": 8625 }, { "epoch": 51.98795180722892, "grad_norm": 2.7147622108459473, "learning_rate": 4.3323293172690766e-05, "loss": 0.1993, "step": 8630 }, { "epoch": 52.0, "eval_accuracy": 0.9303971686983877, "eval_auc": 0.9749829236141434, "eval_f1": 0.8908081431215299, "eval_loss": 0.22926868498325348, "eval_precision": 0.9704301075268817, "eval_recall": 0.8232611174458381, "eval_runtime": 19.2801, "eval_samples_per_second": 131.897, "eval_steps_per_second": 0.674, "step": 8632 }, { "epoch": 52.01807228915663, "grad_norm": 1.3713630437850952, "learning_rate": 4.334839357429719e-05, "loss": 0.1696, "step": 8635 }, { "epoch": 52.04819277108434, "grad_norm": 2.2953884601593018, "learning_rate": 4.337349397590362e-05, "loss": 0.2015, "step": 8640 }, { "epoch": 52.07831325301205, "grad_norm": 1.6782729625701904, "learning_rate": 4.339859437751004e-05, "loss": 0.2073, "step": 8645 }, { "epoch": 52.10843373493976, "grad_norm": 2.056797504425049, "learning_rate": 4.342369477911647e-05, "loss": 0.1909, "step": 8650 }, { "epoch": 52.13855421686747, "grad_norm": 1.9719815254211426, "learning_rate": 4.34487951807229e-05, "loss": 0.2102, "step": 8655 }, { "epoch": 52.16867469879518, "grad_norm": 1.7055913209915161, "learning_rate": 4.347389558232932e-05, "loss": 0.1694, "step": 8660 }, { "epoch": 52.19879518072289, "grad_norm": 1.97348952293396, "learning_rate": 4.349899598393575e-05, "loss": 0.188, "step": 8665 }, { "epoch": 52.2289156626506, "grad_norm": 2.388824701309204, "learning_rate": 4.352409638554217e-05, "loss": 0.1471, "step": 8670 }, { "epoch": 52.25903614457831, "grad_norm": 2.0131945610046387, "learning_rate": 4.3549196787148594e-05, "loss": 0.1753, "step": 8675 }, { "epoch": 52.28915662650602, "grad_norm": 2.1807432174682617, "learning_rate": 4.357429718875502e-05, "loss": 0.1792, "step": 8680 }, { "epoch": 52.31927710843374, "grad_norm": 2.6073501110076904, "learning_rate": 4.3599397590361446e-05, "loss": 0.27, "step": 8685 }, { "epoch": 52.34939759036145, "grad_norm": 2.618926525115967, "learning_rate": 4.3624497991967875e-05, "loss": 0.193, "step": 8690 }, { "epoch": 52.37951807228916, "grad_norm": 1.5784326791763306, "learning_rate": 4.36495983935743e-05, "loss": 0.2148, "step": 8695 }, { "epoch": 52.40963855421687, "grad_norm": 1.9290697574615479, "learning_rate": 4.3674698795180726e-05, "loss": 0.2036, "step": 8700 }, { "epoch": 52.43975903614458, "grad_norm": 2.2212750911712646, "learning_rate": 4.369979919678715e-05, "loss": 0.1845, "step": 8705 }, { "epoch": 52.46987951807229, "grad_norm": 1.0794906616210938, "learning_rate": 4.372489959839358e-05, "loss": 0.1834, "step": 8710 }, { "epoch": 52.5, "grad_norm": 1.8574261665344238, "learning_rate": 4.375e-05, "loss": 0.196, "step": 8715 }, { "epoch": 52.53012048192771, "grad_norm": 1.8506181240081787, "learning_rate": 4.377510040160643e-05, "loss": 0.1983, "step": 8720 }, { "epoch": 52.56024096385542, "grad_norm": 1.2905638217926025, "learning_rate": 4.380020080321285e-05, "loss": 0.2053, "step": 8725 }, { "epoch": 52.59036144578313, "grad_norm": 2.2692477703094482, "learning_rate": 4.382530120481928e-05, "loss": 0.2398, "step": 8730 }, { "epoch": 52.62048192771084, "grad_norm": 1.1828069686889648, "learning_rate": 4.385040160642571e-05, "loss": 0.189, "step": 8735 }, { "epoch": 52.65060240963855, "grad_norm": 2.9759180545806885, "learning_rate": 4.387550200803213e-05, "loss": 0.2304, "step": 8740 }, { "epoch": 52.68072289156626, "grad_norm": 1.6620732545852661, "learning_rate": 4.390060240963856e-05, "loss": 0.2039, "step": 8745 }, { "epoch": 52.71084337349397, "grad_norm": 2.220284938812256, "learning_rate": 4.392570281124498e-05, "loss": 0.1902, "step": 8750 }, { "epoch": 52.74096385542169, "grad_norm": 2.7123513221740723, "learning_rate": 4.395080321285141e-05, "loss": 0.187, "step": 8755 }, { "epoch": 52.7710843373494, "grad_norm": 1.9920449256896973, "learning_rate": 4.3975903614457834e-05, "loss": 0.1733, "step": 8760 }, { "epoch": 52.80120481927711, "grad_norm": 1.8670777082443237, "learning_rate": 4.4001004016064256e-05, "loss": 0.2132, "step": 8765 }, { "epoch": 52.83132530120482, "grad_norm": 2.346907138824463, "learning_rate": 4.4026104417670685e-05, "loss": 0.2032, "step": 8770 }, { "epoch": 52.86144578313253, "grad_norm": 2.630713939666748, "learning_rate": 4.405120481927711e-05, "loss": 0.2217, "step": 8775 }, { "epoch": 52.89156626506024, "grad_norm": 1.692564845085144, "learning_rate": 4.407630522088354e-05, "loss": 0.2216, "step": 8780 }, { "epoch": 52.92168674698795, "grad_norm": 2.8092164993286133, "learning_rate": 4.410140562248996e-05, "loss": 0.1729, "step": 8785 }, { "epoch": 52.95180722891566, "grad_norm": 2.36733341217041, "learning_rate": 4.412650602409639e-05, "loss": 0.1809, "step": 8790 }, { "epoch": 52.98192771084337, "grad_norm": 2.001338005065918, "learning_rate": 4.415160642570281e-05, "loss": 0.2081, "step": 8795 }, { "epoch": 53.0, "eval_accuracy": 0.9307904050334251, "eval_auc": 0.9701998245136139, "eval_f1": 0.8935912938331319, "eval_loss": 0.2352074831724167, "eval_precision": 0.9510939510939511, "eval_recall": 0.8426453819840365, "eval_runtime": 19.9178, "eval_samples_per_second": 127.675, "eval_steps_per_second": 0.653, "step": 8798 }, { "epoch": 53.01204819277108, "grad_norm": 1.9767760038375854, "learning_rate": 4.417670682730924e-05, "loss": 0.193, "step": 8800 }, { "epoch": 53.04216867469879, "grad_norm": 2.481797695159912, "learning_rate": 4.420180722891566e-05, "loss": 0.1708, "step": 8805 }, { "epoch": 53.0722891566265, "grad_norm": 2.7323923110961914, "learning_rate": 4.422690763052209e-05, "loss": 0.1834, "step": 8810 }, { "epoch": 53.102409638554214, "grad_norm": 2.4297308921813965, "learning_rate": 4.425200803212852e-05, "loss": 0.2178, "step": 8815 }, { "epoch": 53.13253012048193, "grad_norm": 3.1860032081604004, "learning_rate": 4.427710843373494e-05, "loss": 0.2285, "step": 8820 }, { "epoch": 53.16265060240964, "grad_norm": 1.8464399576187134, "learning_rate": 4.430220883534137e-05, "loss": 0.2045, "step": 8825 }, { "epoch": 53.19277108433735, "grad_norm": 1.474560022354126, "learning_rate": 4.4327309236947793e-05, "loss": 0.1837, "step": 8830 }, { "epoch": 53.22289156626506, "grad_norm": 2.9821767807006836, "learning_rate": 4.435240963855422e-05, "loss": 0.2869, "step": 8835 }, { "epoch": 53.25301204819277, "grad_norm": 2.518110752105713, "learning_rate": 4.4377510040160645e-05, "loss": 0.1892, "step": 8840 }, { "epoch": 53.28313253012048, "grad_norm": 2.1917898654937744, "learning_rate": 4.4402610441767074e-05, "loss": 0.1843, "step": 8845 }, { "epoch": 53.31325301204819, "grad_norm": 2.0914201736450195, "learning_rate": 4.4427710843373496e-05, "loss": 0.1934, "step": 8850 }, { "epoch": 53.3433734939759, "grad_norm": 2.946645498275757, "learning_rate": 4.445281124497992e-05, "loss": 0.2093, "step": 8855 }, { "epoch": 53.373493975903614, "grad_norm": 2.390066385269165, "learning_rate": 4.447791164658635e-05, "loss": 0.1918, "step": 8860 }, { "epoch": 53.403614457831324, "grad_norm": 1.206371784210205, "learning_rate": 4.450301204819277e-05, "loss": 0.1915, "step": 8865 }, { "epoch": 53.433734939759034, "grad_norm": 2.3323042392730713, "learning_rate": 4.45281124497992e-05, "loss": 0.1999, "step": 8870 }, { "epoch": 53.463855421686745, "grad_norm": 1.4416083097457886, "learning_rate": 4.455321285140562e-05, "loss": 0.2099, "step": 8875 }, { "epoch": 53.493975903614455, "grad_norm": 1.983001470565796, "learning_rate": 4.457831325301205e-05, "loss": 0.203, "step": 8880 }, { "epoch": 53.524096385542165, "grad_norm": 1.6679518222808838, "learning_rate": 4.460341365461847e-05, "loss": 0.1928, "step": 8885 }, { "epoch": 53.55421686746988, "grad_norm": 2.298336982727051, "learning_rate": 4.46285140562249e-05, "loss": 0.2264, "step": 8890 }, { "epoch": 53.58433734939759, "grad_norm": 1.883910894393921, "learning_rate": 4.465361445783133e-05, "loss": 0.1952, "step": 8895 }, { "epoch": 53.6144578313253, "grad_norm": 2.8218042850494385, "learning_rate": 4.467871485943775e-05, "loss": 0.2028, "step": 8900 }, { "epoch": 53.644578313253014, "grad_norm": 2.3828554153442383, "learning_rate": 4.470381526104418e-05, "loss": 0.1979, "step": 8905 }, { "epoch": 53.674698795180724, "grad_norm": 3.0916848182678223, "learning_rate": 4.4728915662650604e-05, "loss": 0.1986, "step": 8910 }, { "epoch": 53.704819277108435, "grad_norm": 3.2898638248443604, "learning_rate": 4.475401606425703e-05, "loss": 0.2263, "step": 8915 }, { "epoch": 53.734939759036145, "grad_norm": 1.38592529296875, "learning_rate": 4.4779116465863456e-05, "loss": 0.2121, "step": 8920 }, { "epoch": 53.765060240963855, "grad_norm": 2.301682710647583, "learning_rate": 4.4804216867469885e-05, "loss": 0.2023, "step": 8925 }, { "epoch": 53.795180722891565, "grad_norm": 1.671717882156372, "learning_rate": 4.482931726907631e-05, "loss": 0.1967, "step": 8930 }, { "epoch": 53.825301204819276, "grad_norm": 1.9744724035263062, "learning_rate": 4.4854417670682736e-05, "loss": 0.1752, "step": 8935 }, { "epoch": 53.855421686746986, "grad_norm": 1.1811193227767944, "learning_rate": 4.487951807228916e-05, "loss": 0.1688, "step": 8940 }, { "epoch": 53.8855421686747, "grad_norm": 2.8911030292510986, "learning_rate": 4.490461847389558e-05, "loss": 0.2126, "step": 8945 }, { "epoch": 53.91566265060241, "grad_norm": 2.2526655197143555, "learning_rate": 4.492971887550201e-05, "loss": 0.233, "step": 8950 }, { "epoch": 53.94578313253012, "grad_norm": 2.8007659912109375, "learning_rate": 4.495481927710843e-05, "loss": 0.2219, "step": 8955 }, { "epoch": 53.975903614457835, "grad_norm": 3.165022373199463, "learning_rate": 4.497991967871486e-05, "loss": 0.2496, "step": 8960 }, { "epoch": 54.0, "eval_accuracy": 0.935116004718836, "eval_auc": 0.9742937083613377, "eval_f1": 0.9021932424422051, "eval_loss": 0.1958591490983963, "eval_precision": 0.9395061728395062, "eval_recall": 0.8677309007981756, "eval_runtime": 19.5495, "eval_samples_per_second": 130.08, "eval_steps_per_second": 0.665, "step": 8964 }, { "epoch": 54.006024096385545, "grad_norm": 1.5866248607635498, "learning_rate": 4.500502008032128e-05, "loss": 0.1758, "step": 8965 }, { "epoch": 54.036144578313255, "grad_norm": 2.1163887977600098, "learning_rate": 4.503012048192771e-05, "loss": 0.2267, "step": 8970 }, { "epoch": 54.066265060240966, "grad_norm": 1.9508090019226074, "learning_rate": 4.505522088353414e-05, "loss": 0.1804, "step": 8975 }, { "epoch": 54.096385542168676, "grad_norm": 1.2839763164520264, "learning_rate": 4.5080321285140564e-05, "loss": 0.1979, "step": 8980 }, { "epoch": 54.126506024096386, "grad_norm": 1.4487197399139404, "learning_rate": 4.510542168674699e-05, "loss": 0.1666, "step": 8985 }, { "epoch": 54.1566265060241, "grad_norm": 1.8158890008926392, "learning_rate": 4.5130522088353415e-05, "loss": 0.1926, "step": 8990 }, { "epoch": 54.18674698795181, "grad_norm": 1.6030244827270508, "learning_rate": 4.5155622489959844e-05, "loss": 0.1505, "step": 8995 }, { "epoch": 54.21686746987952, "grad_norm": 1.469895362854004, "learning_rate": 4.5180722891566266e-05, "loss": 0.1798, "step": 9000 }, { "epoch": 54.24698795180723, "grad_norm": 2.093759536743164, "learning_rate": 4.5205823293172695e-05, "loss": 0.2196, "step": 9005 }, { "epoch": 54.27710843373494, "grad_norm": 1.9970499277114868, "learning_rate": 4.523092369477912e-05, "loss": 0.1971, "step": 9010 }, { "epoch": 54.30722891566265, "grad_norm": 1.7573943138122559, "learning_rate": 4.525602409638555e-05, "loss": 0.1899, "step": 9015 }, { "epoch": 54.33734939759036, "grad_norm": 1.4472109079360962, "learning_rate": 4.5281124497991976e-05, "loss": 0.1951, "step": 9020 }, { "epoch": 54.36746987951807, "grad_norm": 2.5054283142089844, "learning_rate": 4.53062248995984e-05, "loss": 0.2062, "step": 9025 }, { "epoch": 54.397590361445786, "grad_norm": 2.5859766006469727, "learning_rate": 4.533132530120482e-05, "loss": 0.1788, "step": 9030 }, { "epoch": 54.4277108433735, "grad_norm": 2.444463014602661, "learning_rate": 4.535642570281124e-05, "loss": 0.1626, "step": 9035 }, { "epoch": 54.45783132530121, "grad_norm": 1.884308099746704, "learning_rate": 4.538152610441767e-05, "loss": 0.1794, "step": 9040 }, { "epoch": 54.48795180722892, "grad_norm": 2.135911703109741, "learning_rate": 4.5406626506024094e-05, "loss": 0.1992, "step": 9045 }, { "epoch": 54.51807228915663, "grad_norm": 2.4031646251678467, "learning_rate": 4.543172690763052e-05, "loss": 0.2086, "step": 9050 }, { "epoch": 54.54819277108434, "grad_norm": 1.498276948928833, "learning_rate": 4.545682730923695e-05, "loss": 0.1702, "step": 9055 }, { "epoch": 54.57831325301205, "grad_norm": 2.10369873046875, "learning_rate": 4.5481927710843374e-05, "loss": 0.1828, "step": 9060 }, { "epoch": 54.60843373493976, "grad_norm": 1.6820237636566162, "learning_rate": 4.5507028112449803e-05, "loss": 0.198, "step": 9065 }, { "epoch": 54.63855421686747, "grad_norm": 1.806857943534851, "learning_rate": 4.5532128514056226e-05, "loss": 0.1937, "step": 9070 }, { "epoch": 54.66867469879518, "grad_norm": 2.2759451866149902, "learning_rate": 4.5557228915662655e-05, "loss": 0.1971, "step": 9075 }, { "epoch": 54.69879518072289, "grad_norm": 1.972905158996582, "learning_rate": 4.558232931726908e-05, "loss": 0.1738, "step": 9080 }, { "epoch": 54.7289156626506, "grad_norm": 2.6859445571899414, "learning_rate": 4.5607429718875506e-05, "loss": 0.1905, "step": 9085 }, { "epoch": 54.75903614457831, "grad_norm": 1.83362877368927, "learning_rate": 4.563253012048193e-05, "loss": 0.2235, "step": 9090 }, { "epoch": 54.78915662650603, "grad_norm": 1.7969437837600708, "learning_rate": 4.565763052208836e-05, "loss": 0.1856, "step": 9095 }, { "epoch": 54.81927710843374, "grad_norm": 2.4315059185028076, "learning_rate": 4.568273092369478e-05, "loss": 0.2234, "step": 9100 }, { "epoch": 54.84939759036145, "grad_norm": 2.577702045440674, "learning_rate": 4.570783132530121e-05, "loss": 0.235, "step": 9105 }, { "epoch": 54.87951807228916, "grad_norm": 1.6569136381149292, "learning_rate": 4.573293172690764e-05, "loss": 0.1639, "step": 9110 }, { "epoch": 54.90963855421687, "grad_norm": 1.192121148109436, "learning_rate": 4.575803212851406e-05, "loss": 0.1791, "step": 9115 }, { "epoch": 54.93975903614458, "grad_norm": 2.4576196670532227, "learning_rate": 4.578313253012048e-05, "loss": 0.1732, "step": 9120 }, { "epoch": 54.96987951807229, "grad_norm": 2.1103174686431885, "learning_rate": 4.5808232931726905e-05, "loss": 0.1979, "step": 9125 }, { "epoch": 55.0, "grad_norm": 2.6083672046661377, "learning_rate": 4.5833333333333334e-05, "loss": 0.2174, "step": 9130 }, { "epoch": 55.0, "eval_accuracy": 0.9178136059771923, "eval_auc": 0.9665703225417875, "eval_f1": 0.8707482993197279, "eval_loss": 0.30000755190849304, "eval_precision": 0.9513513513513514, "eval_recall": 0.8027366020524516, "eval_runtime": 19.6456, "eval_samples_per_second": 129.443, "eval_steps_per_second": 0.662, "step": 9130 }, { "epoch": 55.03012048192771, "grad_norm": 1.9286142587661743, "learning_rate": 4.585843373493976e-05, "loss": 0.2294, "step": 9135 }, { "epoch": 55.06024096385542, "grad_norm": 2.960637092590332, "learning_rate": 4.5883534136546185e-05, "loss": 0.1984, "step": 9140 }, { "epoch": 55.09036144578313, "grad_norm": 2.789501905441284, "learning_rate": 4.5908634538152614e-05, "loss": 0.2446, "step": 9145 }, { "epoch": 55.12048192771084, "grad_norm": 2.2632017135620117, "learning_rate": 4.5933734939759037e-05, "loss": 0.1907, "step": 9150 }, { "epoch": 55.15060240963855, "grad_norm": 2.261414051055908, "learning_rate": 4.5958835341365466e-05, "loss": 0.2248, "step": 9155 }, { "epoch": 55.18072289156626, "grad_norm": 1.362753987312317, "learning_rate": 4.598393574297189e-05, "loss": 0.2021, "step": 9160 }, { "epoch": 55.21084337349398, "grad_norm": 1.8857378959655762, "learning_rate": 4.600903614457832e-05, "loss": 0.1757, "step": 9165 }, { "epoch": 55.24096385542169, "grad_norm": 1.6326899528503418, "learning_rate": 4.603413654618474e-05, "loss": 0.1731, "step": 9170 }, { "epoch": 55.2710843373494, "grad_norm": 2.264676809310913, "learning_rate": 4.605923694779117e-05, "loss": 0.1987, "step": 9175 }, { "epoch": 55.30120481927711, "grad_norm": 1.5106638669967651, "learning_rate": 4.608433734939759e-05, "loss": 0.1774, "step": 9180 }, { "epoch": 55.33132530120482, "grad_norm": 1.6665120124816895, "learning_rate": 4.610943775100402e-05, "loss": 0.189, "step": 9185 }, { "epoch": 55.36144578313253, "grad_norm": 1.8084211349487305, "learning_rate": 4.613453815261045e-05, "loss": 0.2032, "step": 9190 }, { "epoch": 55.39156626506024, "grad_norm": 1.4120748043060303, "learning_rate": 4.615963855421687e-05, "loss": 0.1611, "step": 9195 }, { "epoch": 55.42168674698795, "grad_norm": 1.8941086530685425, "learning_rate": 4.61847389558233e-05, "loss": 0.1983, "step": 9200 }, { "epoch": 55.45180722891566, "grad_norm": 2.4490437507629395, "learning_rate": 4.6209839357429716e-05, "loss": 0.1884, "step": 9205 }, { "epoch": 55.48192771084337, "grad_norm": 1.140552282333374, "learning_rate": 4.6234939759036145e-05, "loss": 0.2301, "step": 9210 }, { "epoch": 55.51204819277108, "grad_norm": 2.700495958328247, "learning_rate": 4.6260040160642574e-05, "loss": 0.1932, "step": 9215 }, { "epoch": 55.54216867469879, "grad_norm": 1.8277531862258911, "learning_rate": 4.6285140562248996e-05, "loss": 0.2021, "step": 9220 }, { "epoch": 55.5722891566265, "grad_norm": 1.8265845775604248, "learning_rate": 4.6310240963855425e-05, "loss": 0.1984, "step": 9225 }, { "epoch": 55.602409638554214, "grad_norm": 2.271312952041626, "learning_rate": 4.633534136546185e-05, "loss": 0.2141, "step": 9230 }, { "epoch": 55.63253012048193, "grad_norm": 1.8416059017181396, "learning_rate": 4.6360441767068276e-05, "loss": 0.1959, "step": 9235 }, { "epoch": 55.66265060240964, "grad_norm": 1.6881704330444336, "learning_rate": 4.63855421686747e-05, "loss": 0.2153, "step": 9240 }, { "epoch": 55.69277108433735, "grad_norm": 1.877532958984375, "learning_rate": 4.641064257028113e-05, "loss": 0.1944, "step": 9245 }, { "epoch": 55.72289156626506, "grad_norm": 2.533802032470703, "learning_rate": 4.643574297188755e-05, "loss": 0.2371, "step": 9250 }, { "epoch": 55.75301204819277, "grad_norm": 1.2995754480361938, "learning_rate": 4.646084337349398e-05, "loss": 0.2245, "step": 9255 }, { "epoch": 55.78313253012048, "grad_norm": 2.5549495220184326, "learning_rate": 4.64859437751004e-05, "loss": 0.1839, "step": 9260 }, { "epoch": 55.81325301204819, "grad_norm": 3.4564034938812256, "learning_rate": 4.651104417670683e-05, "loss": 0.1955, "step": 9265 }, { "epoch": 55.8433734939759, "grad_norm": 1.7227681875228882, "learning_rate": 4.653614457831326e-05, "loss": 0.1779, "step": 9270 }, { "epoch": 55.873493975903614, "grad_norm": 1.8119142055511475, "learning_rate": 4.656124497991968e-05, "loss": 0.1914, "step": 9275 }, { "epoch": 55.903614457831324, "grad_norm": 1.8299942016601562, "learning_rate": 4.658634538152611e-05, "loss": 0.1888, "step": 9280 }, { "epoch": 55.933734939759034, "grad_norm": 2.523125648498535, "learning_rate": 4.661144578313253e-05, "loss": 0.195, "step": 9285 }, { "epoch": 55.963855421686745, "grad_norm": 3.1764206886291504, "learning_rate": 4.663654618473896e-05, "loss": 0.2003, "step": 9290 }, { "epoch": 55.993975903614455, "grad_norm": 1.5687943696975708, "learning_rate": 4.6661646586345384e-05, "loss": 0.2186, "step": 9295 }, { "epoch": 56.0, "eval_accuracy": 0.9087691702713331, "eval_auc": 0.9684418807431753, "eval_f1": 0.8533501896333755, "eval_loss": 0.29460248351097107, "eval_precision": 0.9574468085106383, "eval_recall": 0.7696693272519954, "eval_runtime": 18.3085, "eval_samples_per_second": 138.897, "eval_steps_per_second": 0.71, "step": 9296 }, { "epoch": 56.024096385542165, "grad_norm": 1.6623972654342651, "learning_rate": 4.668674698795181e-05, "loss": 0.1697, "step": 9300 }, { "epoch": 56.05421686746988, "grad_norm": 2.72373104095459, "learning_rate": 4.6711847389558236e-05, "loss": 0.2215, "step": 9305 }, { "epoch": 56.08433734939759, "grad_norm": 1.5855753421783447, "learning_rate": 4.673694779116466e-05, "loss": 0.1525, "step": 9310 }, { "epoch": 56.1144578313253, "grad_norm": 1.6013188362121582, "learning_rate": 4.676204819277109e-05, "loss": 0.2053, "step": 9315 }, { "epoch": 56.144578313253014, "grad_norm": 2.0472776889801025, "learning_rate": 4.678714859437751e-05, "loss": 0.1781, "step": 9320 }, { "epoch": 56.174698795180724, "grad_norm": 2.218830108642578, "learning_rate": 4.681224899598394e-05, "loss": 0.2152, "step": 9325 }, { "epoch": 56.204819277108435, "grad_norm": 1.771793246269226, "learning_rate": 4.683734939759036e-05, "loss": 0.1593, "step": 9330 }, { "epoch": 56.234939759036145, "grad_norm": 1.8977620601654053, "learning_rate": 4.686244979919679e-05, "loss": 0.1865, "step": 9335 }, { "epoch": 56.265060240963855, "grad_norm": 1.4574172496795654, "learning_rate": 4.688755020080321e-05, "loss": 0.1508, "step": 9340 }, { "epoch": 56.295180722891565, "grad_norm": 2.013129949569702, "learning_rate": 4.691265060240964e-05, "loss": 0.1881, "step": 9345 }, { "epoch": 56.325301204819276, "grad_norm": 1.7704567909240723, "learning_rate": 4.693775100401607e-05, "loss": 0.1996, "step": 9350 }, { "epoch": 56.355421686746986, "grad_norm": 1.887007713317871, "learning_rate": 4.696285140562249e-05, "loss": 0.1963, "step": 9355 }, { "epoch": 56.3855421686747, "grad_norm": 1.9213777780532837, "learning_rate": 4.698795180722892e-05, "loss": 0.1655, "step": 9360 }, { "epoch": 56.41566265060241, "grad_norm": 2.012073278427124, "learning_rate": 4.7013052208835344e-05, "loss": 0.138, "step": 9365 }, { "epoch": 56.44578313253012, "grad_norm": 2.9040980339050293, "learning_rate": 4.703815261044177e-05, "loss": 0.1949, "step": 9370 }, { "epoch": 56.475903614457835, "grad_norm": 1.981165885925293, "learning_rate": 4.7063253012048195e-05, "loss": 0.1696, "step": 9375 }, { "epoch": 56.506024096385545, "grad_norm": 2.3808298110961914, "learning_rate": 4.7088353413654624e-05, "loss": 0.2098, "step": 9380 }, { "epoch": 56.536144578313255, "grad_norm": 2.0263671875, "learning_rate": 4.7113453815261047e-05, "loss": 0.1811, "step": 9385 }, { "epoch": 56.566265060240966, "grad_norm": 1.884153127670288, "learning_rate": 4.713855421686747e-05, "loss": 0.2036, "step": 9390 }, { "epoch": 56.596385542168676, "grad_norm": 1.0312684774398804, "learning_rate": 4.71636546184739e-05, "loss": 0.1657, "step": 9395 }, { "epoch": 56.626506024096386, "grad_norm": 2.476505994796753, "learning_rate": 4.718875502008032e-05, "loss": 0.1784, "step": 9400 }, { "epoch": 56.6566265060241, "grad_norm": 3.1120269298553467, "learning_rate": 4.721385542168675e-05, "loss": 0.1962, "step": 9405 }, { "epoch": 56.68674698795181, "grad_norm": 2.2251052856445312, "learning_rate": 4.723895582329317e-05, "loss": 0.2168, "step": 9410 }, { "epoch": 56.71686746987952, "grad_norm": 1.7547193765640259, "learning_rate": 4.72640562248996e-05, "loss": 0.1842, "step": 9415 }, { "epoch": 56.74698795180723, "grad_norm": 2.9130563735961914, "learning_rate": 4.728915662650602e-05, "loss": 0.2157, "step": 9420 }, { "epoch": 56.77710843373494, "grad_norm": 2.223615884780884, "learning_rate": 4.731425702811245e-05, "loss": 0.1958, "step": 9425 }, { "epoch": 56.80722891566265, "grad_norm": 2.0641613006591797, "learning_rate": 4.733935742971888e-05, "loss": 0.1992, "step": 9430 }, { "epoch": 56.83734939759036, "grad_norm": 1.5671474933624268, "learning_rate": 4.73644578313253e-05, "loss": 0.1937, "step": 9435 }, { "epoch": 56.86746987951807, "grad_norm": 1.8134636878967285, "learning_rate": 4.738955823293173e-05, "loss": 0.2251, "step": 9440 }, { "epoch": 56.897590361445786, "grad_norm": 1.9263204336166382, "learning_rate": 4.7414658634538155e-05, "loss": 0.1919, "step": 9445 }, { "epoch": 56.9277108433735, "grad_norm": 1.2124485969543457, "learning_rate": 4.7439759036144584e-05, "loss": 0.172, "step": 9450 }, { "epoch": 56.95783132530121, "grad_norm": 1.6786320209503174, "learning_rate": 4.7464859437751006e-05, "loss": 0.1771, "step": 9455 }, { "epoch": 56.98795180722892, "grad_norm": 2.007603883743286, "learning_rate": 4.7489959839357435e-05, "loss": 0.1914, "step": 9460 }, { "epoch": 57.0, "eval_accuracy": 0.94494691309477, "eval_auc": 0.9780734414632444, "eval_f1": 0.9185098952270081, "eval_loss": 0.1861359030008316, "eval_precision": 0.9381688466111772, "eval_recall": 0.8996579247434435, "eval_runtime": 17.0537, "eval_samples_per_second": 149.117, "eval_steps_per_second": 0.762, "step": 9462 }, { "epoch": 57.01807228915663, "grad_norm": 1.681352972984314, "learning_rate": 4.751506024096386e-05, "loss": 0.1718, "step": 9465 }, { "epoch": 57.04819277108434, "grad_norm": 2.399091958999634, "learning_rate": 4.7540160642570286e-05, "loss": 0.171, "step": 9470 }, { "epoch": 57.07831325301205, "grad_norm": 1.1098817586898804, "learning_rate": 4.756526104417671e-05, "loss": 0.2415, "step": 9475 }, { "epoch": 57.10843373493976, "grad_norm": 3.2166264057159424, "learning_rate": 4.759036144578313e-05, "loss": 0.2354, "step": 9480 }, { "epoch": 57.13855421686747, "grad_norm": 2.6321725845336914, "learning_rate": 4.761546184738956e-05, "loss": 0.1686, "step": 9485 }, { "epoch": 57.16867469879518, "grad_norm": 1.4256139993667603, "learning_rate": 4.764056224899598e-05, "loss": 0.1904, "step": 9490 }, { "epoch": 57.19879518072289, "grad_norm": 1.756381630897522, "learning_rate": 4.766566265060241e-05, "loss": 0.2274, "step": 9495 }, { "epoch": 57.2289156626506, "grad_norm": 2.3295838832855225, "learning_rate": 4.7690763052208834e-05, "loss": 0.1896, "step": 9500 }, { "epoch": 57.25903614457831, "grad_norm": 1.5230826139450073, "learning_rate": 4.771586345381526e-05, "loss": 0.1804, "step": 9505 }, { "epoch": 57.28915662650602, "grad_norm": 2.0946292877197266, "learning_rate": 4.774096385542169e-05, "loss": 0.1768, "step": 9510 }, { "epoch": 57.31927710843374, "grad_norm": 2.077497959136963, "learning_rate": 4.7766064257028114e-05, "loss": 0.1733, "step": 9515 }, { "epoch": 57.34939759036145, "grad_norm": 2.689943313598633, "learning_rate": 4.779116465863454e-05, "loss": 0.1704, "step": 9520 }, { "epoch": 57.37951807228916, "grad_norm": 1.5451295375823975, "learning_rate": 4.7816265060240965e-05, "loss": 0.2037, "step": 9525 }, { "epoch": 57.40963855421687, "grad_norm": 2.226560115814209, "learning_rate": 4.7841365461847394e-05, "loss": 0.1868, "step": 9530 }, { "epoch": 57.43975903614458, "grad_norm": 1.874423861503601, "learning_rate": 4.786646586345382e-05, "loss": 0.1626, "step": 9535 }, { "epoch": 57.46987951807229, "grad_norm": 1.8824931383132935, "learning_rate": 4.7891566265060246e-05, "loss": 0.1873, "step": 9540 }, { "epoch": 57.5, "grad_norm": 2.245616912841797, "learning_rate": 4.791666666666667e-05, "loss": 0.1997, "step": 9545 }, { "epoch": 57.53012048192771, "grad_norm": 1.600521206855774, "learning_rate": 4.79417670682731e-05, "loss": 0.1621, "step": 9550 }, { "epoch": 57.56024096385542, "grad_norm": 1.4455299377441406, "learning_rate": 4.796686746987952e-05, "loss": 0.1607, "step": 9555 }, { "epoch": 57.59036144578313, "grad_norm": 2.1505472660064697, "learning_rate": 4.799196787148594e-05, "loss": 0.1754, "step": 9560 }, { "epoch": 57.62048192771084, "grad_norm": 2.4378881454467773, "learning_rate": 4.801706827309237e-05, "loss": 0.2255, "step": 9565 }, { "epoch": 57.65060240963855, "grad_norm": 3.2311930656433105, "learning_rate": 4.804216867469879e-05, "loss": 0.225, "step": 9570 }, { "epoch": 57.68072289156626, "grad_norm": 1.9619172811508179, "learning_rate": 4.806726907630522e-05, "loss": 0.2063, "step": 9575 }, { "epoch": 57.71084337349397, "grad_norm": 2.327970504760742, "learning_rate": 4.8092369477911644e-05, "loss": 0.2296, "step": 9580 }, { "epoch": 57.74096385542169, "grad_norm": 2.125100612640381, "learning_rate": 4.8117469879518074e-05, "loss": 0.161, "step": 9585 }, { "epoch": 57.7710843373494, "grad_norm": 1.8094075918197632, "learning_rate": 4.81425702811245e-05, "loss": 0.1591, "step": 9590 }, { "epoch": 57.80120481927711, "grad_norm": 2.7225515842437744, "learning_rate": 4.8167670682730925e-05, "loss": 0.2082, "step": 9595 }, { "epoch": 57.83132530120482, "grad_norm": 2.394782066345215, "learning_rate": 4.8192771084337354e-05, "loss": 0.1697, "step": 9600 }, { "epoch": 57.86144578313253, "grad_norm": 2.6002819538116455, "learning_rate": 4.8217871485943776e-05, "loss": 0.1993, "step": 9605 }, { "epoch": 57.89156626506024, "grad_norm": 2.59199857711792, "learning_rate": 4.8242971887550205e-05, "loss": 0.1996, "step": 9610 }, { "epoch": 57.92168674698795, "grad_norm": 1.7686564922332764, "learning_rate": 4.826807228915663e-05, "loss": 0.2004, "step": 9615 }, { "epoch": 57.95180722891566, "grad_norm": 1.7516053915023804, "learning_rate": 4.8293172690763057e-05, "loss": 0.1889, "step": 9620 }, { "epoch": 57.98192771084337, "grad_norm": 2.2338221073150635, "learning_rate": 4.831827309236948e-05, "loss": 0.195, "step": 9625 }, { "epoch": 58.0, "eval_accuracy": 0.9189933149823044, "eval_auc": 0.9644585314171279, "eval_f1": 0.8740831295843521, "eval_loss": 0.26911577582359314, "eval_precision": 0.9420289855072463, "eval_recall": 0.8152793614595211, "eval_runtime": 17.0243, "eval_samples_per_second": 149.374, "eval_steps_per_second": 0.764, "step": 9628 }, { "epoch": 58.01204819277108, "grad_norm": 0.897562563419342, "learning_rate": 4.834337349397591e-05, "loss": 0.1596, "step": 9630 }, { "epoch": 58.04216867469879, "grad_norm": 1.6326240301132202, "learning_rate": 4.836847389558233e-05, "loss": 0.1571, "step": 9635 }, { "epoch": 58.0722891566265, "grad_norm": 1.8651673793792725, "learning_rate": 4.839357429718876e-05, "loss": 0.1737, "step": 9640 }, { "epoch": 58.102409638554214, "grad_norm": 2.2550816535949707, "learning_rate": 4.841867469879519e-05, "loss": 0.1726, "step": 9645 }, { "epoch": 58.13253012048193, "grad_norm": 1.8229386806488037, "learning_rate": 4.8443775100401604e-05, "loss": 0.1772, "step": 9650 }, { "epoch": 58.16265060240964, "grad_norm": 2.511829376220703, "learning_rate": 4.846887550200803e-05, "loss": 0.1739, "step": 9655 }, { "epoch": 58.19277108433735, "grad_norm": 1.3670412302017212, "learning_rate": 4.8493975903614455e-05, "loss": 0.1759, "step": 9660 }, { "epoch": 58.22289156626506, "grad_norm": 1.1315114498138428, "learning_rate": 4.8519076305220884e-05, "loss": 0.177, "step": 9665 }, { "epoch": 58.25301204819277, "grad_norm": 1.233046293258667, "learning_rate": 4.854417670682731e-05, "loss": 0.1466, "step": 9670 }, { "epoch": 58.28313253012048, "grad_norm": 2.8945133686065674, "learning_rate": 4.8569277108433736e-05, "loss": 0.171, "step": 9675 }, { "epoch": 58.31325301204819, "grad_norm": 2.6257240772247314, "learning_rate": 4.8594377510040165e-05, "loss": 0.1765, "step": 9680 }, { "epoch": 58.3433734939759, "grad_norm": 2.419882297515869, "learning_rate": 4.861947791164659e-05, "loss": 0.1816, "step": 9685 }, { "epoch": 58.373493975903614, "grad_norm": 2.089569091796875, "learning_rate": 4.8644578313253016e-05, "loss": 0.1584, "step": 9690 }, { "epoch": 58.403614457831324, "grad_norm": 1.8195586204528809, "learning_rate": 4.866967871485944e-05, "loss": 0.1887, "step": 9695 }, { "epoch": 58.433734939759034, "grad_norm": 2.7612900733947754, "learning_rate": 4.869477911646587e-05, "loss": 0.1992, "step": 9700 }, { "epoch": 58.463855421686745, "grad_norm": 1.2157365083694458, "learning_rate": 4.871987951807229e-05, "loss": 0.196, "step": 9705 }, { "epoch": 58.493975903614455, "grad_norm": 1.898940086364746, "learning_rate": 4.874497991967872e-05, "loss": 0.1826, "step": 9710 }, { "epoch": 58.524096385542165, "grad_norm": 2.632331609725952, "learning_rate": 4.877008032128514e-05, "loss": 0.1896, "step": 9715 }, { "epoch": 58.55421686746988, "grad_norm": 1.7673542499542236, "learning_rate": 4.879518072289157e-05, "loss": 0.1877, "step": 9720 }, { "epoch": 58.58433734939759, "grad_norm": 2.230239152908325, "learning_rate": 4.8820281124498e-05, "loss": 0.2212, "step": 9725 }, { "epoch": 58.6144578313253, "grad_norm": 1.7951022386550903, "learning_rate": 4.884538152610442e-05, "loss": 0.2038, "step": 9730 }, { "epoch": 58.644578313253014, "grad_norm": 1.6010494232177734, "learning_rate": 4.887048192771085e-05, "loss": 0.1843, "step": 9735 }, { "epoch": 58.674698795180724, "grad_norm": 3.0509254932403564, "learning_rate": 4.8895582329317266e-05, "loss": 0.2334, "step": 9740 }, { "epoch": 58.704819277108435, "grad_norm": 2.3465235233306885, "learning_rate": 4.8920682730923695e-05, "loss": 0.2078, "step": 9745 }, { "epoch": 58.734939759036145, "grad_norm": 2.3988800048828125, "learning_rate": 4.8945783132530124e-05, "loss": 0.1669, "step": 9750 }, { "epoch": 58.765060240963855, "grad_norm": 2.1776366233825684, "learning_rate": 4.8970883534136546e-05, "loss": 0.2175, "step": 9755 }, { "epoch": 58.795180722891565, "grad_norm": 3.2049734592437744, "learning_rate": 4.8995983935742975e-05, "loss": 0.2254, "step": 9760 }, { "epoch": 58.825301204819276, "grad_norm": 2.559453010559082, "learning_rate": 4.90210843373494e-05, "loss": 0.2251, "step": 9765 }, { "epoch": 58.855421686746986, "grad_norm": 1.55681574344635, "learning_rate": 4.904618473895583e-05, "loss": 0.2225, "step": 9770 }, { "epoch": 58.8855421686747, "grad_norm": 2.9513471126556396, "learning_rate": 4.907128514056225e-05, "loss": 0.2351, "step": 9775 }, { "epoch": 58.91566265060241, "grad_norm": 1.4565945863723755, "learning_rate": 4.909638554216868e-05, "loss": 0.19, "step": 9780 }, { "epoch": 58.94578313253012, "grad_norm": 2.205860137939453, "learning_rate": 4.91214859437751e-05, "loss": 0.1828, "step": 9785 }, { "epoch": 58.975903614457835, "grad_norm": 1.4514609575271606, "learning_rate": 4.914658634538153e-05, "loss": 0.1996, "step": 9790 }, { "epoch": 59.0, "eval_accuracy": 0.9315768777034998, "eval_auc": 0.9756372332285252, "eval_f1": 0.8941605839416058, "eval_loss": 0.23759578168392181, "eval_precision": 0.9582790091264668, "eval_recall": 0.8380843785632839, "eval_runtime": 17.2202, "eval_samples_per_second": 147.676, "eval_steps_per_second": 0.755, "step": 9794 }, { "epoch": 59.006024096385545, "grad_norm": 1.3549339771270752, "learning_rate": 4.917168674698795e-05, "loss": 0.1943, "step": 9795 }, { "epoch": 59.036144578313255, "grad_norm": 1.6545672416687012, "learning_rate": 4.919678714859438e-05, "loss": 0.1442, "step": 9800 }, { "epoch": 59.066265060240966, "grad_norm": 3.495600461959839, "learning_rate": 4.922188755020081e-05, "loss": 0.1783, "step": 9805 }, { "epoch": 59.096385542168676, "grad_norm": 1.1288702487945557, "learning_rate": 4.924698795180723e-05, "loss": 0.1477, "step": 9810 }, { "epoch": 59.126506024096386, "grad_norm": 2.1384871006011963, "learning_rate": 4.927208835341366e-05, "loss": 0.2319, "step": 9815 }, { "epoch": 59.1566265060241, "grad_norm": 2.3611960411071777, "learning_rate": 4.9297188755020084e-05, "loss": 0.1767, "step": 9820 }, { "epoch": 59.18674698795181, "grad_norm": 2.8789052963256836, "learning_rate": 4.932228915662651e-05, "loss": 0.1613, "step": 9825 }, { "epoch": 59.21686746987952, "grad_norm": 1.6236155033111572, "learning_rate": 4.9347389558232935e-05, "loss": 0.2431, "step": 9830 }, { "epoch": 59.24698795180723, "grad_norm": 1.9372285604476929, "learning_rate": 4.937248995983936e-05, "loss": 0.1726, "step": 9835 }, { "epoch": 59.27710843373494, "grad_norm": 2.4922103881835938, "learning_rate": 4.9397590361445786e-05, "loss": 0.195, "step": 9840 }, { "epoch": 59.30722891566265, "grad_norm": 2.4770331382751465, "learning_rate": 4.942269076305221e-05, "loss": 0.1671, "step": 9845 }, { "epoch": 59.33734939759036, "grad_norm": 2.4791817665100098, "learning_rate": 4.944779116465864e-05, "loss": 0.1679, "step": 9850 }, { "epoch": 59.36746987951807, "grad_norm": 1.4872862100601196, "learning_rate": 4.947289156626506e-05, "loss": 0.148, "step": 9855 }, { "epoch": 59.397590361445786, "grad_norm": 1.551857829093933, "learning_rate": 4.949799196787149e-05, "loss": 0.1942, "step": 9860 }, { "epoch": 59.4277108433735, "grad_norm": 2.071345806121826, "learning_rate": 4.952309236947791e-05, "loss": 0.1539, "step": 9865 }, { "epoch": 59.45783132530121, "grad_norm": 2.5802876949310303, "learning_rate": 4.954819277108434e-05, "loss": 0.1834, "step": 9870 }, { "epoch": 59.48795180722892, "grad_norm": 1.9511005878448486, "learning_rate": 4.957329317269076e-05, "loss": 0.2367, "step": 9875 }, { "epoch": 59.51807228915663, "grad_norm": 2.43558669090271, "learning_rate": 4.959839357429719e-05, "loss": 0.1732, "step": 9880 }, { "epoch": 59.54819277108434, "grad_norm": 2.888108015060425, "learning_rate": 4.962349397590362e-05, "loss": 0.1688, "step": 9885 }, { "epoch": 59.57831325301205, "grad_norm": 1.2000738382339478, "learning_rate": 4.964859437751004e-05, "loss": 0.1844, "step": 9890 }, { "epoch": 59.60843373493976, "grad_norm": 1.5006636381149292, "learning_rate": 4.967369477911647e-05, "loss": 0.2022, "step": 9895 }, { "epoch": 59.63855421686747, "grad_norm": 1.8979883193969727, "learning_rate": 4.9698795180722894e-05, "loss": 0.1858, "step": 9900 }, { "epoch": 59.66867469879518, "grad_norm": 1.771418809890747, "learning_rate": 4.972389558232932e-05, "loss": 0.1665, "step": 9905 }, { "epoch": 59.69879518072289, "grad_norm": 1.7620320320129395, "learning_rate": 4.9748995983935746e-05, "loss": 0.1907, "step": 9910 }, { "epoch": 59.7289156626506, "grad_norm": 1.8053280115127563, "learning_rate": 4.9774096385542175e-05, "loss": 0.1822, "step": 9915 }, { "epoch": 59.75903614457831, "grad_norm": 1.4249768257141113, "learning_rate": 4.97991967871486e-05, "loss": 0.1966, "step": 9920 }, { "epoch": 59.78915662650603, "grad_norm": 1.8042192459106445, "learning_rate": 4.982429718875502e-05, "loss": 0.1919, "step": 9925 }, { "epoch": 59.81927710843374, "grad_norm": 1.9030768871307373, "learning_rate": 4.984939759036145e-05, "loss": 0.1794, "step": 9930 }, { "epoch": 59.84939759036145, "grad_norm": 1.8618736267089844, "learning_rate": 4.987449799196787e-05, "loss": 0.18, "step": 9935 }, { "epoch": 59.87951807228916, "grad_norm": 3.4829823970794678, "learning_rate": 4.98995983935743e-05, "loss": 0.1943, "step": 9940 }, { "epoch": 59.90963855421687, "grad_norm": 2.407097101211548, "learning_rate": 4.992469879518072e-05, "loss": 0.2142, "step": 9945 }, { "epoch": 59.93975903614458, "grad_norm": 1.5124931335449219, "learning_rate": 4.994979919678715e-05, "loss": 0.1798, "step": 9950 }, { "epoch": 59.96987951807229, "grad_norm": 1.4459714889526367, "learning_rate": 4.997489959839357e-05, "loss": 0.1883, "step": 9955 }, { "epoch": 60.0, "grad_norm": 1.3064192533493042, "learning_rate": 5e-05, "loss": 0.1891, "step": 9960 }, { "epoch": 60.0, "eval_accuracy": 0.9327565867086118, "eval_auc": 0.9759270869123019, "eval_f1": 0.8968014484007242, "eval_loss": 0.2146846204996109, "eval_precision": 0.9525641025641025, "eval_recall": 0.8472063854047891, "eval_runtime": 17.0585, "eval_samples_per_second": 149.075, "eval_steps_per_second": 0.762, "step": 9960 }, { "epoch": 60.03012048192771, "grad_norm": 1.8910998106002808, "learning_rate": 4.9997211066488176e-05, "loss": 0.1941, "step": 9965 }, { "epoch": 60.06024096385542, "grad_norm": 1.4629839658737183, "learning_rate": 4.999442213297636e-05, "loss": 0.1729, "step": 9970 }, { "epoch": 60.09036144578313, "grad_norm": 2.8674991130828857, "learning_rate": 4.9991633199464524e-05, "loss": 0.1982, "step": 9975 }, { "epoch": 60.12048192771084, "grad_norm": 2.215824842453003, "learning_rate": 4.99888442659527e-05, "loss": 0.187, "step": 9980 }, { "epoch": 60.15060240963855, "grad_norm": 1.5279314517974854, "learning_rate": 4.998605533244088e-05, "loss": 0.1858, "step": 9985 }, { "epoch": 60.18072289156626, "grad_norm": 2.4290390014648438, "learning_rate": 4.998326639892905e-05, "loss": 0.1554, "step": 9990 }, { "epoch": 60.21084337349398, "grad_norm": 1.4494363069534302, "learning_rate": 4.9980477465417226e-05, "loss": 0.1648, "step": 9995 }, { "epoch": 60.24096385542169, "grad_norm": 3.501488208770752, "learning_rate": 4.9977688531905406e-05, "loss": 0.304, "step": 10000 }, { "epoch": 60.2710843373494, "grad_norm": 1.657923936843872, "learning_rate": 4.997489959839357e-05, "loss": 0.1939, "step": 10005 }, { "epoch": 60.30120481927711, "grad_norm": 1.2460441589355469, "learning_rate": 4.997211066488175e-05, "loss": 0.1355, "step": 10010 }, { "epoch": 60.33132530120482, "grad_norm": 2.036921739578247, "learning_rate": 4.996932173136993e-05, "loss": 0.1821, "step": 10015 }, { "epoch": 60.36144578313253, "grad_norm": 2.1364831924438477, "learning_rate": 4.99665327978581e-05, "loss": 0.1886, "step": 10020 }, { "epoch": 60.39156626506024, "grad_norm": 1.8020092248916626, "learning_rate": 4.9963743864346275e-05, "loss": 0.197, "step": 10025 }, { "epoch": 60.42168674698795, "grad_norm": 1.8530651330947876, "learning_rate": 4.9960954930834456e-05, "loss": 0.1697, "step": 10030 }, { "epoch": 60.45180722891566, "grad_norm": 1.9424024820327759, "learning_rate": 4.995816599732262e-05, "loss": 0.164, "step": 10035 }, { "epoch": 60.48192771084337, "grad_norm": 1.4572926759719849, "learning_rate": 4.99553770638108e-05, "loss": 0.155, "step": 10040 }, { "epoch": 60.51204819277108, "grad_norm": 1.6664016246795654, "learning_rate": 4.995258813029898e-05, "loss": 0.1671, "step": 10045 }, { "epoch": 60.54216867469879, "grad_norm": 2.254645347595215, "learning_rate": 4.994979919678715e-05, "loss": 0.1583, "step": 10050 }, { "epoch": 60.5722891566265, "grad_norm": 2.37227725982666, "learning_rate": 4.9947010263275325e-05, "loss": 0.1317, "step": 10055 }, { "epoch": 60.602409638554214, "grad_norm": 1.5512620210647583, "learning_rate": 4.99442213297635e-05, "loss": 0.1791, "step": 10060 }, { "epoch": 60.63253012048193, "grad_norm": 2.3960814476013184, "learning_rate": 4.994143239625167e-05, "loss": 0.1995, "step": 10065 }, { "epoch": 60.66265060240964, "grad_norm": 2.3821523189544678, "learning_rate": 4.9938643462739846e-05, "loss": 0.2163, "step": 10070 }, { "epoch": 60.69277108433735, "grad_norm": 1.7303850650787354, "learning_rate": 4.993585452922803e-05, "loss": 0.2091, "step": 10075 }, { "epoch": 60.72289156626506, "grad_norm": 1.8073005676269531, "learning_rate": 4.99330655957162e-05, "loss": 0.1943, "step": 10080 }, { "epoch": 60.75301204819277, "grad_norm": 1.7040612697601318, "learning_rate": 4.9930276662204374e-05, "loss": 0.1603, "step": 10085 }, { "epoch": 60.78313253012048, "grad_norm": 1.420932650566101, "learning_rate": 4.992748772869255e-05, "loss": 0.1601, "step": 10090 }, { "epoch": 60.81325301204819, "grad_norm": 1.76956307888031, "learning_rate": 4.992469879518072e-05, "loss": 0.2245, "step": 10095 }, { "epoch": 60.8433734939759, "grad_norm": 1.641416311264038, "learning_rate": 4.99219098616689e-05, "loss": 0.1664, "step": 10100 }, { "epoch": 60.873493975903614, "grad_norm": 1.379858136177063, "learning_rate": 4.9919120928157076e-05, "loss": 0.141, "step": 10105 }, { "epoch": 60.903614457831324, "grad_norm": 1.7981139421463013, "learning_rate": 4.991633199464525e-05, "loss": 0.1728, "step": 10110 }, { "epoch": 60.933734939759034, "grad_norm": 1.6015158891677856, "learning_rate": 4.991354306113343e-05, "loss": 0.178, "step": 10115 }, { "epoch": 60.963855421686745, "grad_norm": 2.2964370250701904, "learning_rate": 4.99107541276216e-05, "loss": 0.1792, "step": 10120 }, { "epoch": 60.993975903614455, "grad_norm": 2.3170790672302246, "learning_rate": 4.990796519410977e-05, "loss": 0.1353, "step": 10125 }, { "epoch": 61.0, "eval_accuracy": 0.9158474243020055, "eval_auc": 0.9712860058504588, "eval_f1": 0.8654088050314466, "eval_loss": 0.29633164405822754, "eval_precision": 0.9649368863955119, "eval_recall": 0.7844925883694412, "eval_runtime": 16.6957, "eval_samples_per_second": 152.315, "eval_steps_per_second": 0.779, "step": 10126 }, { "epoch": 61.024096385542165, "grad_norm": 1.8975228071212769, "learning_rate": 4.990517626059795e-05, "loss": 0.1957, "step": 10130 }, { "epoch": 61.05421686746988, "grad_norm": 1.653012990951538, "learning_rate": 4.9902387327086126e-05, "loss": 0.1494, "step": 10135 }, { "epoch": 61.08433734939759, "grad_norm": 1.3063195943832397, "learning_rate": 4.98995983935743e-05, "loss": 0.1789, "step": 10140 }, { "epoch": 61.1144578313253, "grad_norm": 2.9485018253326416, "learning_rate": 4.989680946006248e-05, "loss": 0.1966, "step": 10145 }, { "epoch": 61.144578313253014, "grad_norm": 2.3813064098358154, "learning_rate": 4.989402052655065e-05, "loss": 0.1674, "step": 10150 }, { "epoch": 61.174698795180724, "grad_norm": 2.1475465297698975, "learning_rate": 4.989123159303882e-05, "loss": 0.1944, "step": 10155 }, { "epoch": 61.204819277108435, "grad_norm": 2.3563380241394043, "learning_rate": 4.9888442659527e-05, "loss": 0.2116, "step": 10160 }, { "epoch": 61.234939759036145, "grad_norm": 2.4149973392486572, "learning_rate": 4.9885653726015175e-05, "loss": 0.2233, "step": 10165 }, { "epoch": 61.265060240963855, "grad_norm": 1.2588717937469482, "learning_rate": 4.988286479250335e-05, "loss": 0.1677, "step": 10170 }, { "epoch": 61.295180722891565, "grad_norm": 1.472192645072937, "learning_rate": 4.988007585899152e-05, "loss": 0.1813, "step": 10175 }, { "epoch": 61.325301204819276, "grad_norm": 1.9705734252929688, "learning_rate": 4.98772869254797e-05, "loss": 0.1787, "step": 10180 }, { "epoch": 61.355421686746986, "grad_norm": 1.8915636539459229, "learning_rate": 4.987449799196787e-05, "loss": 0.1878, "step": 10185 }, { "epoch": 61.3855421686747, "grad_norm": 1.9380899667739868, "learning_rate": 4.987170905845605e-05, "loss": 0.1863, "step": 10190 }, { "epoch": 61.41566265060241, "grad_norm": 2.0398693084716797, "learning_rate": 4.9868920124944225e-05, "loss": 0.1687, "step": 10195 }, { "epoch": 61.44578313253012, "grad_norm": 1.4080811738967896, "learning_rate": 4.98661311914324e-05, "loss": 0.1652, "step": 10200 }, { "epoch": 61.475903614457835, "grad_norm": 2.3035523891448975, "learning_rate": 4.986334225792057e-05, "loss": 0.1546, "step": 10205 }, { "epoch": 61.506024096385545, "grad_norm": 2.251307964324951, "learning_rate": 4.9860553324408746e-05, "loss": 0.2048, "step": 10210 }, { "epoch": 61.536144578313255, "grad_norm": 1.9995403289794922, "learning_rate": 4.985776439089692e-05, "loss": 0.2228, "step": 10215 }, { "epoch": 61.566265060240966, "grad_norm": 1.5359236001968384, "learning_rate": 4.98549754573851e-05, "loss": 0.1831, "step": 10220 }, { "epoch": 61.596385542168676, "grad_norm": 2.048762083053589, "learning_rate": 4.9852186523873275e-05, "loss": 0.1967, "step": 10225 }, { "epoch": 61.626506024096386, "grad_norm": 1.4467693567276, "learning_rate": 4.984939759036145e-05, "loss": 0.2185, "step": 10230 }, { "epoch": 61.6566265060241, "grad_norm": 1.6843082904815674, "learning_rate": 4.984660865684962e-05, "loss": 0.1966, "step": 10235 }, { "epoch": 61.68674698795181, "grad_norm": 1.7061634063720703, "learning_rate": 4.9843819723337796e-05, "loss": 0.1718, "step": 10240 }, { "epoch": 61.71686746987952, "grad_norm": 1.6093111038208008, "learning_rate": 4.984103078982597e-05, "loss": 0.1719, "step": 10245 }, { "epoch": 61.74698795180723, "grad_norm": 1.2672582864761353, "learning_rate": 4.983824185631415e-05, "loss": 0.1679, "step": 10250 }, { "epoch": 61.77710843373494, "grad_norm": 1.7078900337219238, "learning_rate": 4.9835452922802324e-05, "loss": 0.1701, "step": 10255 }, { "epoch": 61.80722891566265, "grad_norm": 1.8595340251922607, "learning_rate": 4.98326639892905e-05, "loss": 0.155, "step": 10260 }, { "epoch": 61.83734939759036, "grad_norm": 2.7607834339141846, "learning_rate": 4.982987505577867e-05, "loss": 0.2112, "step": 10265 }, { "epoch": 61.86746987951807, "grad_norm": 2.582641839981079, "learning_rate": 4.9827086122266845e-05, "loss": 0.2258, "step": 10270 }, { "epoch": 61.897590361445786, "grad_norm": 1.8134492635726929, "learning_rate": 4.982429718875502e-05, "loss": 0.2046, "step": 10275 }, { "epoch": 61.9277108433735, "grad_norm": 1.6217050552368164, "learning_rate": 4.98215082552432e-05, "loss": 0.1539, "step": 10280 }, { "epoch": 61.95783132530121, "grad_norm": 2.1390674114227295, "learning_rate": 4.9818719321731374e-05, "loss": 0.1442, "step": 10285 }, { "epoch": 61.98795180722892, "grad_norm": 1.8469104766845703, "learning_rate": 4.981593038821955e-05, "loss": 0.1799, "step": 10290 }, { "epoch": 62.0, "eval_accuracy": 0.9264648053480141, "eval_auc": 0.9765399888575726, "eval_f1": 0.8856269113149847, "eval_loss": 0.24341483414173126, "eval_precision": 0.9551451187335093, "eval_recall": 0.8255416191562144, "eval_runtime": 19.8091, "eval_samples_per_second": 128.375, "eval_steps_per_second": 0.656, "step": 10292 }, { "epoch": 62.01807228915663, "grad_norm": 1.5650662183761597, "learning_rate": 4.981314145470772e-05, "loss": 0.172, "step": 10295 }, { "epoch": 62.04819277108434, "grad_norm": 1.2761918306350708, "learning_rate": 4.9810352521195895e-05, "loss": 0.1496, "step": 10300 }, { "epoch": 62.07831325301205, "grad_norm": 1.1782655715942383, "learning_rate": 4.980756358768407e-05, "loss": 0.1902, "step": 10305 }, { "epoch": 62.10843373493976, "grad_norm": 1.1633213758468628, "learning_rate": 4.980477465417225e-05, "loss": 0.1556, "step": 10310 }, { "epoch": 62.13855421686747, "grad_norm": 2.094839096069336, "learning_rate": 4.980198572066042e-05, "loss": 0.1414, "step": 10315 }, { "epoch": 62.16867469879518, "grad_norm": 2.7337560653686523, "learning_rate": 4.97991967871486e-05, "loss": 0.1442, "step": 10320 }, { "epoch": 62.19879518072289, "grad_norm": 1.796980381011963, "learning_rate": 4.979640785363677e-05, "loss": 0.1309, "step": 10325 }, { "epoch": 62.2289156626506, "grad_norm": 2.00614857673645, "learning_rate": 4.9793618920124945e-05, "loss": 0.1709, "step": 10330 }, { "epoch": 62.25903614457831, "grad_norm": 1.2246202230453491, "learning_rate": 4.979082998661312e-05, "loss": 0.2058, "step": 10335 }, { "epoch": 62.28915662650602, "grad_norm": 1.9987736940383911, "learning_rate": 4.97880410531013e-05, "loss": 0.1649, "step": 10340 }, { "epoch": 62.31927710843374, "grad_norm": 1.9422751665115356, "learning_rate": 4.978525211958947e-05, "loss": 0.1839, "step": 10345 }, { "epoch": 62.34939759036145, "grad_norm": 2.593979597091675, "learning_rate": 4.9782463186077647e-05, "loss": 0.1791, "step": 10350 }, { "epoch": 62.37951807228916, "grad_norm": 2.39090633392334, "learning_rate": 4.977967425256582e-05, "loss": 0.2201, "step": 10355 }, { "epoch": 62.40963855421687, "grad_norm": 1.6905205249786377, "learning_rate": 4.9776885319053994e-05, "loss": 0.1624, "step": 10360 }, { "epoch": 62.43975903614458, "grad_norm": 1.8943250179290771, "learning_rate": 4.9774096385542175e-05, "loss": 0.176, "step": 10365 }, { "epoch": 62.46987951807229, "grad_norm": 1.9889397621154785, "learning_rate": 4.977130745203035e-05, "loss": 0.1907, "step": 10370 }, { "epoch": 62.5, "grad_norm": 2.0285348892211914, "learning_rate": 4.976851851851852e-05, "loss": 0.2148, "step": 10375 }, { "epoch": 62.53012048192771, "grad_norm": 1.7169373035430908, "learning_rate": 4.9765729585006696e-05, "loss": 0.1899, "step": 10380 }, { "epoch": 62.56024096385542, "grad_norm": 2.2081243991851807, "learning_rate": 4.976294065149487e-05, "loss": 0.1959, "step": 10385 }, { "epoch": 62.59036144578313, "grad_norm": 1.8505403995513916, "learning_rate": 4.9760151717983044e-05, "loss": 0.1847, "step": 10390 }, { "epoch": 62.62048192771084, "grad_norm": 1.6287717819213867, "learning_rate": 4.9757362784471224e-05, "loss": 0.1766, "step": 10395 }, { "epoch": 62.65060240963855, "grad_norm": 2.9438302516937256, "learning_rate": 4.97545738509594e-05, "loss": 0.1632, "step": 10400 }, { "epoch": 62.68072289156626, "grad_norm": 2.115572214126587, "learning_rate": 4.975178491744757e-05, "loss": 0.1832, "step": 10405 }, { "epoch": 62.71084337349397, "grad_norm": 1.9674654006958008, "learning_rate": 4.9748995983935746e-05, "loss": 0.1474, "step": 10410 }, { "epoch": 62.74096385542169, "grad_norm": 1.9791538715362549, "learning_rate": 4.974620705042392e-05, "loss": 0.1703, "step": 10415 }, { "epoch": 62.7710843373494, "grad_norm": 1.506995439529419, "learning_rate": 4.974341811691209e-05, "loss": 0.1613, "step": 10420 }, { "epoch": 62.80120481927711, "grad_norm": 1.310712456703186, "learning_rate": 4.9740629183400274e-05, "loss": 0.204, "step": 10425 }, { "epoch": 62.83132530120482, "grad_norm": 2.5213351249694824, "learning_rate": 4.973784024988845e-05, "loss": 0.1617, "step": 10430 }, { "epoch": 62.86144578313253, "grad_norm": 0.9870374798774719, "learning_rate": 4.973505131637662e-05, "loss": 0.1408, "step": 10435 }, { "epoch": 62.89156626506024, "grad_norm": 2.704153060913086, "learning_rate": 4.9732262382864795e-05, "loss": 0.1841, "step": 10440 }, { "epoch": 62.92168674698795, "grad_norm": 1.7165454626083374, "learning_rate": 4.972947344935297e-05, "loss": 0.2026, "step": 10445 }, { "epoch": 62.95180722891566, "grad_norm": 3.0817670822143555, "learning_rate": 4.972668451584114e-05, "loss": 0.1867, "step": 10450 }, { "epoch": 62.98192771084337, "grad_norm": 2.2709290981292725, "learning_rate": 4.972389558232932e-05, "loss": 0.1682, "step": 10455 }, { "epoch": 63.0, "eval_accuracy": 0.936295713723948, "eval_auc": 0.9788920129055042, "eval_f1": 0.9027611044417767, "eval_loss": 0.20549307763576508, "eval_precision": 0.9531051964512041, "eval_recall": 0.8574686431014823, "eval_runtime": 19.5681, "eval_samples_per_second": 129.956, "eval_steps_per_second": 0.664, "step": 10458 }, { "epoch": 63.01204819277108, "grad_norm": 1.6144845485687256, "learning_rate": 4.97211066488175e-05, "loss": 0.1778, "step": 10460 }, { "epoch": 63.04216867469879, "grad_norm": 1.5116358995437622, "learning_rate": 4.971831771530567e-05, "loss": 0.1997, "step": 10465 }, { "epoch": 63.0722891566265, "grad_norm": 1.9036710262298584, "learning_rate": 4.9715528781793845e-05, "loss": 0.1625, "step": 10470 }, { "epoch": 63.102409638554214, "grad_norm": 1.6996465921401978, "learning_rate": 4.971273984828202e-05, "loss": 0.152, "step": 10475 }, { "epoch": 63.13253012048193, "grad_norm": 1.7426986694335938, "learning_rate": 4.970995091477019e-05, "loss": 0.1615, "step": 10480 }, { "epoch": 63.16265060240964, "grad_norm": 1.8857890367507935, "learning_rate": 4.970716198125837e-05, "loss": 0.2047, "step": 10485 }, { "epoch": 63.19277108433735, "grad_norm": 1.2623646259307861, "learning_rate": 4.970437304774655e-05, "loss": 0.1901, "step": 10490 }, { "epoch": 63.22289156626506, "grad_norm": 1.2678370475769043, "learning_rate": 4.970158411423472e-05, "loss": 0.1536, "step": 10495 }, { "epoch": 63.25301204819277, "grad_norm": 2.6455559730529785, "learning_rate": 4.9698795180722894e-05, "loss": 0.1528, "step": 10500 }, { "epoch": 63.28313253012048, "grad_norm": 2.2459418773651123, "learning_rate": 4.969600624721107e-05, "loss": 0.1432, "step": 10505 }, { "epoch": 63.31325301204819, "grad_norm": 2.285522222518921, "learning_rate": 4.969321731369924e-05, "loss": 0.1685, "step": 10510 }, { "epoch": 63.3433734939759, "grad_norm": 1.965384840965271, "learning_rate": 4.969042838018742e-05, "loss": 0.1829, "step": 10515 }, { "epoch": 63.373493975903614, "grad_norm": 1.9819344282150269, "learning_rate": 4.9687639446675596e-05, "loss": 0.1623, "step": 10520 }, { "epoch": 63.403614457831324, "grad_norm": 1.7619704008102417, "learning_rate": 4.968485051316376e-05, "loss": 0.2063, "step": 10525 }, { "epoch": 63.433734939759034, "grad_norm": 1.49800705909729, "learning_rate": 4.9682061579651944e-05, "loss": 0.16, "step": 10530 }, { "epoch": 63.463855421686745, "grad_norm": 1.3514907360076904, "learning_rate": 4.967927264614012e-05, "loss": 0.1473, "step": 10535 }, { "epoch": 63.493975903614455, "grad_norm": 2.645862579345703, "learning_rate": 4.967648371262829e-05, "loss": 0.2033, "step": 10540 }, { "epoch": 63.524096385542165, "grad_norm": 2.3720364570617676, "learning_rate": 4.967369477911647e-05, "loss": 0.1581, "step": 10545 }, { "epoch": 63.55421686746988, "grad_norm": 1.090379238128662, "learning_rate": 4.9670905845604646e-05, "loss": 0.1836, "step": 10550 }, { "epoch": 63.58433734939759, "grad_norm": 2.2446887493133545, "learning_rate": 4.966811691209281e-05, "loss": 0.1688, "step": 10555 }, { "epoch": 63.6144578313253, "grad_norm": 2.3685302734375, "learning_rate": 4.9665327978580993e-05, "loss": 0.1821, "step": 10560 }, { "epoch": 63.644578313253014, "grad_norm": 1.32175874710083, "learning_rate": 4.966253904506917e-05, "loss": 0.1941, "step": 10565 }, { "epoch": 63.674698795180724, "grad_norm": 2.90374493598938, "learning_rate": 4.965975011155734e-05, "loss": 0.249, "step": 10570 }, { "epoch": 63.704819277108435, "grad_norm": 1.3915349245071411, "learning_rate": 4.965696117804552e-05, "loss": 0.1556, "step": 10575 }, { "epoch": 63.734939759036145, "grad_norm": 1.482227087020874, "learning_rate": 4.9654172244533695e-05, "loss": 0.1871, "step": 10580 }, { "epoch": 63.765060240963855, "grad_norm": 2.8194668292999268, "learning_rate": 4.965138331102186e-05, "loss": 0.2093, "step": 10585 }, { "epoch": 63.795180722891565, "grad_norm": 2.2497358322143555, "learning_rate": 4.964859437751004e-05, "loss": 0.1644, "step": 10590 }, { "epoch": 63.825301204819276, "grad_norm": 1.5044275522232056, "learning_rate": 4.964580544399822e-05, "loss": 0.1752, "step": 10595 }, { "epoch": 63.855421686746986, "grad_norm": 2.974330186843872, "learning_rate": 4.964301651048639e-05, "loss": 0.1891, "step": 10600 }, { "epoch": 63.8855421686747, "grad_norm": 2.525432825088501, "learning_rate": 4.964022757697457e-05, "loss": 0.2096, "step": 10605 }, { "epoch": 63.91566265060241, "grad_norm": 1.7562264204025269, "learning_rate": 4.963743864346274e-05, "loss": 0.1663, "step": 10610 }, { "epoch": 63.94578313253012, "grad_norm": 1.7163867950439453, "learning_rate": 4.963464970995091e-05, "loss": 0.1809, "step": 10615 }, { "epoch": 63.975903614457835, "grad_norm": 2.554893732070923, "learning_rate": 4.963186077643909e-05, "loss": 0.1798, "step": 10620 }, { "epoch": 64.0, "eval_accuracy": 0.9272512780180888, "eval_auc": 0.9788444454178477, "eval_f1": 0.8862937922556853, "eval_loss": 0.23211508989334106, "eval_precision": 0.9613333333333334, "eval_recall": 0.82212086659065, "eval_runtime": 19.9559, "eval_samples_per_second": 127.431, "eval_steps_per_second": 0.651, "step": 10624 }, { "epoch": 64.00602409638554, "grad_norm": 1.369818925857544, "learning_rate": 4.9629071842927266e-05, "loss": 0.13, "step": 10625 }, { "epoch": 64.03614457831326, "grad_norm": 1.9708218574523926, "learning_rate": 4.962628290941545e-05, "loss": 0.1507, "step": 10630 }, { "epoch": 64.06626506024097, "grad_norm": 1.8631243705749512, "learning_rate": 4.962349397590362e-05, "loss": 0.1506, "step": 10635 }, { "epoch": 64.09638554216868, "grad_norm": 1.812243938446045, "learning_rate": 4.962070504239179e-05, "loss": 0.1948, "step": 10640 }, { "epoch": 64.12650602409639, "grad_norm": 1.4348286390304565, "learning_rate": 4.961791610887997e-05, "loss": 0.1647, "step": 10645 }, { "epoch": 64.1566265060241, "grad_norm": 2.407808780670166, "learning_rate": 4.961512717536814e-05, "loss": 0.1722, "step": 10650 }, { "epoch": 64.1867469879518, "grad_norm": 2.4277946949005127, "learning_rate": 4.9612338241856316e-05, "loss": 0.1609, "step": 10655 }, { "epoch": 64.21686746987952, "grad_norm": 1.4109902381896973, "learning_rate": 4.9609549308344496e-05, "loss": 0.1399, "step": 10660 }, { "epoch": 64.24698795180723, "grad_norm": 1.2755377292633057, "learning_rate": 4.960676037483267e-05, "loss": 0.1523, "step": 10665 }, { "epoch": 64.27710843373494, "grad_norm": 1.7213119268417358, "learning_rate": 4.960397144132084e-05, "loss": 0.1724, "step": 10670 }, { "epoch": 64.30722891566265, "grad_norm": 2.4596023559570312, "learning_rate": 4.960118250780902e-05, "loss": 0.1481, "step": 10675 }, { "epoch": 64.33734939759036, "grad_norm": 2.551539897918701, "learning_rate": 4.959839357429719e-05, "loss": 0.2016, "step": 10680 }, { "epoch": 64.36746987951807, "grad_norm": 2.1785504817962646, "learning_rate": 4.9595604640785365e-05, "loss": 0.1646, "step": 10685 }, { "epoch": 64.39759036144578, "grad_norm": 1.351625680923462, "learning_rate": 4.9592815707273546e-05, "loss": 0.1707, "step": 10690 }, { "epoch": 64.42771084337349, "grad_norm": 1.3918484449386597, "learning_rate": 4.959002677376171e-05, "loss": 0.1519, "step": 10695 }, { "epoch": 64.4578313253012, "grad_norm": 1.8310281038284302, "learning_rate": 4.958723784024989e-05, "loss": 0.2019, "step": 10700 }, { "epoch": 64.48795180722891, "grad_norm": 2.043283224105835, "learning_rate": 4.958444890673807e-05, "loss": 0.1472, "step": 10705 }, { "epoch": 64.51807228915662, "grad_norm": 1.7341029644012451, "learning_rate": 4.958165997322624e-05, "loss": 0.2088, "step": 10710 }, { "epoch": 64.54819277108433, "grad_norm": 1.8614193201065063, "learning_rate": 4.9578871039714415e-05, "loss": 0.1419, "step": 10715 }, { "epoch": 64.57831325301204, "grad_norm": 2.1345086097717285, "learning_rate": 4.9576082106202596e-05, "loss": 0.2617, "step": 10720 }, { "epoch": 64.60843373493977, "grad_norm": 1.1087085008621216, "learning_rate": 4.957329317269076e-05, "loss": 0.1431, "step": 10725 }, { "epoch": 64.63855421686748, "grad_norm": 1.2613799571990967, "learning_rate": 4.9570504239178936e-05, "loss": 0.1741, "step": 10730 }, { "epoch": 64.66867469879519, "grad_norm": 1.9829838275909424, "learning_rate": 4.956771530566712e-05, "loss": 0.1856, "step": 10735 }, { "epoch": 64.6987951807229, "grad_norm": 2.0306339263916016, "learning_rate": 4.956492637215529e-05, "loss": 0.1717, "step": 10740 }, { "epoch": 64.7289156626506, "grad_norm": 1.563502311706543, "learning_rate": 4.9562137438643465e-05, "loss": 0.1443, "step": 10745 }, { "epoch": 64.75903614457832, "grad_norm": 1.6432170867919922, "learning_rate": 4.9559348505131645e-05, "loss": 0.1986, "step": 10750 }, { "epoch": 64.78915662650603, "grad_norm": 2.0203120708465576, "learning_rate": 4.955655957161981e-05, "loss": 0.1864, "step": 10755 }, { "epoch": 64.81927710843374, "grad_norm": 1.6433354616165161, "learning_rate": 4.9553770638107986e-05, "loss": 0.1864, "step": 10760 }, { "epoch": 64.84939759036145, "grad_norm": 1.7345216274261475, "learning_rate": 4.9550981704596166e-05, "loss": 0.2027, "step": 10765 }, { "epoch": 64.87951807228916, "grad_norm": 1.6810556650161743, "learning_rate": 4.954819277108434e-05, "loss": 0.1628, "step": 10770 }, { "epoch": 64.90963855421687, "grad_norm": 2.2163572311401367, "learning_rate": 4.9545403837572514e-05, "loss": 0.17, "step": 10775 }, { "epoch": 64.93975903614458, "grad_norm": 1.1497232913970947, "learning_rate": 4.9542614904060695e-05, "loss": 0.1854, "step": 10780 }, { "epoch": 64.96987951807229, "grad_norm": 1.6407033205032349, "learning_rate": 4.953982597054886e-05, "loss": 0.1385, "step": 10785 }, { "epoch": 65.0, "grad_norm": 1.4199328422546387, "learning_rate": 4.9537037037037035e-05, "loss": 0.1668, "step": 10790 }, { "epoch": 65.0, "eval_accuracy": 0.9311836413684624, "eval_auc": 0.9761231744693317, "eval_f1": 0.8921749845964264, "eval_loss": 0.25203993916511536, "eval_precision": 0.9705093833780161, "eval_recall": 0.8255416191562144, "eval_runtime": 16.9177, "eval_samples_per_second": 150.316, "eval_steps_per_second": 0.768, "step": 10790 }, { "epoch": 65.03012048192771, "grad_norm": 1.2270861864089966, "learning_rate": 4.9534248103525216e-05, "loss": 0.1709, "step": 10795 }, { "epoch": 65.06024096385542, "grad_norm": 1.7802960872650146, "learning_rate": 4.953145917001339e-05, "loss": 0.1783, "step": 10800 }, { "epoch": 65.09036144578313, "grad_norm": 1.1382660865783691, "learning_rate": 4.9528670236501564e-05, "loss": 0.1685, "step": 10805 }, { "epoch": 65.12048192771084, "grad_norm": 1.8339431285858154, "learning_rate": 4.952588130298974e-05, "loss": 0.1886, "step": 10810 }, { "epoch": 65.15060240963855, "grad_norm": 2.0604560375213623, "learning_rate": 4.952309236947791e-05, "loss": 0.1906, "step": 10815 }, { "epoch": 65.18072289156626, "grad_norm": 1.6225755214691162, "learning_rate": 4.9520303435966085e-05, "loss": 0.1861, "step": 10820 }, { "epoch": 65.21084337349397, "grad_norm": 1.7505550384521484, "learning_rate": 4.9517514502454266e-05, "loss": 0.1536, "step": 10825 }, { "epoch": 65.24096385542168, "grad_norm": 1.8951244354248047, "learning_rate": 4.951472556894244e-05, "loss": 0.1856, "step": 10830 }, { "epoch": 65.2710843373494, "grad_norm": 2.078463077545166, "learning_rate": 4.951193663543061e-05, "loss": 0.1505, "step": 10835 }, { "epoch": 65.3012048192771, "grad_norm": 1.7262752056121826, "learning_rate": 4.950914770191879e-05, "loss": 0.1592, "step": 10840 }, { "epoch": 65.33132530120481, "grad_norm": 1.910554051399231, "learning_rate": 4.950635876840696e-05, "loss": 0.1633, "step": 10845 }, { "epoch": 65.36144578313252, "grad_norm": 1.8438043594360352, "learning_rate": 4.9503569834895135e-05, "loss": 0.1582, "step": 10850 }, { "epoch": 65.39156626506023, "grad_norm": 1.3439587354660034, "learning_rate": 4.9500780901383315e-05, "loss": 0.1395, "step": 10855 }, { "epoch": 65.42168674698796, "grad_norm": 2.09580135345459, "learning_rate": 4.949799196787149e-05, "loss": 0.1641, "step": 10860 }, { "epoch": 65.45180722891567, "grad_norm": 2.5676605701446533, "learning_rate": 4.949520303435966e-05, "loss": 0.1582, "step": 10865 }, { "epoch": 65.48192771084338, "grad_norm": 2.134660482406616, "learning_rate": 4.9492414100847837e-05, "loss": 0.1755, "step": 10870 }, { "epoch": 65.51204819277109, "grad_norm": 1.9631792306900024, "learning_rate": 4.948962516733601e-05, "loss": 0.1695, "step": 10875 }, { "epoch": 65.5421686746988, "grad_norm": 1.410919189453125, "learning_rate": 4.9486836233824184e-05, "loss": 0.1883, "step": 10880 }, { "epoch": 65.57228915662651, "grad_norm": 1.7867623567581177, "learning_rate": 4.9484047300312365e-05, "loss": 0.1514, "step": 10885 }, { "epoch": 65.60240963855422, "grad_norm": 1.1021161079406738, "learning_rate": 4.948125836680054e-05, "loss": 0.1328, "step": 10890 }, { "epoch": 65.63253012048193, "grad_norm": 2.3028223514556885, "learning_rate": 4.947846943328871e-05, "loss": 0.2114, "step": 10895 }, { "epoch": 65.66265060240964, "grad_norm": 1.3753268718719482, "learning_rate": 4.9475680499776886e-05, "loss": 0.1843, "step": 10900 }, { "epoch": 65.69277108433735, "grad_norm": 2.1384854316711426, "learning_rate": 4.947289156626506e-05, "loss": 0.1462, "step": 10905 }, { "epoch": 65.72289156626506, "grad_norm": 1.6459414958953857, "learning_rate": 4.947010263275324e-05, "loss": 0.1894, "step": 10910 }, { "epoch": 65.75301204819277, "grad_norm": 2.067556381225586, "learning_rate": 4.9467313699241414e-05, "loss": 0.1661, "step": 10915 }, { "epoch": 65.78313253012048, "grad_norm": 1.947026252746582, "learning_rate": 4.946452476572959e-05, "loss": 0.1738, "step": 10920 }, { "epoch": 65.8132530120482, "grad_norm": 2.365640640258789, "learning_rate": 4.946173583221776e-05, "loss": 0.2179, "step": 10925 }, { "epoch": 65.8433734939759, "grad_norm": 1.8187806606292725, "learning_rate": 4.9458946898705936e-05, "loss": 0.2086, "step": 10930 }, { "epoch": 65.87349397590361, "grad_norm": 1.6368297338485718, "learning_rate": 4.945615796519411e-05, "loss": 0.1925, "step": 10935 }, { "epoch": 65.90361445783132, "grad_norm": 1.8355932235717773, "learning_rate": 4.945336903168229e-05, "loss": 0.1627, "step": 10940 }, { "epoch": 65.93373493975903, "grad_norm": 0.8479874134063721, "learning_rate": 4.9450580098170464e-05, "loss": 0.1584, "step": 10945 }, { "epoch": 65.96385542168674, "grad_norm": 1.4935309886932373, "learning_rate": 4.944779116465864e-05, "loss": 0.1427, "step": 10950 }, { "epoch": 65.99397590361446, "grad_norm": 1.5013028383255005, "learning_rate": 4.944500223114681e-05, "loss": 0.188, "step": 10955 }, { "epoch": 66.0, "eval_accuracy": 0.9370821863940229, "eval_auc": 0.9763887310910682, "eval_f1": 0.9046483909415971, "eval_loss": 0.21822898089885712, "eval_precision": 0.947565543071161, "eval_recall": 0.8654503990877993, "eval_runtime": 17.1304, "eval_samples_per_second": 148.449, "eval_steps_per_second": 0.759, "step": 10956 }, { "epoch": 66.02409638554217, "grad_norm": 1.1407597064971924, "learning_rate": 4.9442213297634985e-05, "loss": 0.1436, "step": 10960 }, { "epoch": 66.05421686746988, "grad_norm": 1.0020407438278198, "learning_rate": 4.943942436412316e-05, "loss": 0.1508, "step": 10965 }, { "epoch": 66.08433734939759, "grad_norm": 2.138779401779175, "learning_rate": 4.943663543061134e-05, "loss": 0.1629, "step": 10970 }, { "epoch": 66.1144578313253, "grad_norm": 2.0138418674468994, "learning_rate": 4.943384649709951e-05, "loss": 0.1957, "step": 10975 }, { "epoch": 66.144578313253, "grad_norm": 1.890173316001892, "learning_rate": 4.943105756358769e-05, "loss": 0.1823, "step": 10980 }, { "epoch": 66.17469879518072, "grad_norm": 1.5144275426864624, "learning_rate": 4.942826863007586e-05, "loss": 0.189, "step": 10985 }, { "epoch": 66.20481927710843, "grad_norm": 1.6190605163574219, "learning_rate": 4.9425479696564035e-05, "loss": 0.1674, "step": 10990 }, { "epoch": 66.23493975903614, "grad_norm": 1.4035422801971436, "learning_rate": 4.942269076305221e-05, "loss": 0.1799, "step": 10995 }, { "epoch": 66.26506024096386, "grad_norm": 2.2024824619293213, "learning_rate": 4.941990182954039e-05, "loss": 0.1513, "step": 11000 }, { "epoch": 66.29518072289157, "grad_norm": 1.7090049982070923, "learning_rate": 4.941711289602856e-05, "loss": 0.2451, "step": 11005 }, { "epoch": 66.32530120481928, "grad_norm": 2.0364186763763428, "learning_rate": 4.941432396251674e-05, "loss": 0.1637, "step": 11010 }, { "epoch": 66.355421686747, "grad_norm": 1.7846271991729736, "learning_rate": 4.941153502900491e-05, "loss": 0.1448, "step": 11015 }, { "epoch": 66.3855421686747, "grad_norm": 2.3138959407806396, "learning_rate": 4.9408746095493084e-05, "loss": 0.2147, "step": 11020 }, { "epoch": 66.41566265060241, "grad_norm": 1.288283109664917, "learning_rate": 4.940595716198126e-05, "loss": 0.1407, "step": 11025 }, { "epoch": 66.44578313253012, "grad_norm": 1.8594008684158325, "learning_rate": 4.940316822846944e-05, "loss": 0.1456, "step": 11030 }, { "epoch": 66.47590361445783, "grad_norm": 1.7284523248672485, "learning_rate": 4.940037929495761e-05, "loss": 0.1282, "step": 11035 }, { "epoch": 66.50602409638554, "grad_norm": 2.2721471786499023, "learning_rate": 4.9397590361445786e-05, "loss": 0.1992, "step": 11040 }, { "epoch": 66.53614457831326, "grad_norm": 0.9815234541893005, "learning_rate": 4.939480142793396e-05, "loss": 0.1513, "step": 11045 }, { "epoch": 66.56626506024097, "grad_norm": 2.513406276702881, "learning_rate": 4.9392012494422134e-05, "loss": 0.1658, "step": 11050 }, { "epoch": 66.59638554216868, "grad_norm": 1.996604561805725, "learning_rate": 4.938922356091031e-05, "loss": 0.1563, "step": 11055 }, { "epoch": 66.62650602409639, "grad_norm": 1.9300235509872437, "learning_rate": 4.938643462739849e-05, "loss": 0.2027, "step": 11060 }, { "epoch": 66.6566265060241, "grad_norm": 2.3639075756073, "learning_rate": 4.938364569388666e-05, "loss": 0.1711, "step": 11065 }, { "epoch": 66.6867469879518, "grad_norm": 2.160346746444702, "learning_rate": 4.9380856760374836e-05, "loss": 0.1821, "step": 11070 }, { "epoch": 66.71686746987952, "grad_norm": 1.9300659894943237, "learning_rate": 4.937806782686301e-05, "loss": 0.171, "step": 11075 }, { "epoch": 66.74698795180723, "grad_norm": 1.5317730903625488, "learning_rate": 4.937527889335118e-05, "loss": 0.1387, "step": 11080 }, { "epoch": 66.77710843373494, "grad_norm": 1.837944746017456, "learning_rate": 4.937248995983936e-05, "loss": 0.1894, "step": 11085 }, { "epoch": 66.80722891566265, "grad_norm": 1.6817915439605713, "learning_rate": 4.936970102632754e-05, "loss": 0.2015, "step": 11090 }, { "epoch": 66.83734939759036, "grad_norm": 2.3749804496765137, "learning_rate": 4.936691209281571e-05, "loss": 0.1769, "step": 11095 }, { "epoch": 66.86746987951807, "grad_norm": 2.061269998550415, "learning_rate": 4.9364123159303885e-05, "loss": 0.1608, "step": 11100 }, { "epoch": 66.89759036144578, "grad_norm": 1.4038336277008057, "learning_rate": 4.936133422579206e-05, "loss": 0.1681, "step": 11105 }, { "epoch": 66.92771084337349, "grad_norm": 1.8982125520706177, "learning_rate": 4.935854529228023e-05, "loss": 0.2014, "step": 11110 }, { "epoch": 66.9578313253012, "grad_norm": 1.63504159450531, "learning_rate": 4.935575635876841e-05, "loss": 0.1569, "step": 11115 }, { "epoch": 66.98795180722891, "grad_norm": 1.2141400575637817, "learning_rate": 4.935296742525659e-05, "loss": 0.165, "step": 11120 }, { "epoch": 67.0, "eval_accuracy": 0.9311836413684624, "eval_auc": 0.976971176155753, "eval_f1": 0.8936170212765957, "eval_loss": 0.24524444341659546, "eval_precision": 0.95703125, "eval_recall": 0.8380843785632839, "eval_runtime": 16.6804, "eval_samples_per_second": 152.454, "eval_steps_per_second": 0.779, "step": 11122 }, { "epoch": 67.01807228915662, "grad_norm": 1.8608736991882324, "learning_rate": 4.935017849174476e-05, "loss": 0.1377, "step": 11125 }, { "epoch": 67.04819277108433, "grad_norm": 1.6621134281158447, "learning_rate": 4.9347389558232935e-05, "loss": 0.1636, "step": 11130 }, { "epoch": 67.07831325301204, "grad_norm": 2.7037923336029053, "learning_rate": 4.934460062472111e-05, "loss": 0.1416, "step": 11135 }, { "epoch": 67.10843373493977, "grad_norm": 1.8798949718475342, "learning_rate": 4.934181169120928e-05, "loss": 0.1838, "step": 11140 }, { "epoch": 67.13855421686748, "grad_norm": 2.596620559692383, "learning_rate": 4.9339022757697456e-05, "loss": 0.1955, "step": 11145 }, { "epoch": 67.16867469879519, "grad_norm": 2.7191731929779053, "learning_rate": 4.933623382418564e-05, "loss": 0.1783, "step": 11150 }, { "epoch": 67.1987951807229, "grad_norm": 0.9744617938995361, "learning_rate": 4.933344489067381e-05, "loss": 0.1811, "step": 11155 }, { "epoch": 67.2289156626506, "grad_norm": 2.0423519611358643, "learning_rate": 4.933065595716198e-05, "loss": 0.1685, "step": 11160 }, { "epoch": 67.25903614457832, "grad_norm": 1.806595802307129, "learning_rate": 4.932786702365016e-05, "loss": 0.2129, "step": 11165 }, { "epoch": 67.28915662650603, "grad_norm": 1.54165518283844, "learning_rate": 4.932507809013833e-05, "loss": 0.1548, "step": 11170 }, { "epoch": 67.31927710843374, "grad_norm": 0.8470763564109802, "learning_rate": 4.932228915662651e-05, "loss": 0.1466, "step": 11175 }, { "epoch": 67.34939759036145, "grad_norm": 0.8902156352996826, "learning_rate": 4.9319500223114686e-05, "loss": 0.1688, "step": 11180 }, { "epoch": 67.37951807228916, "grad_norm": 1.4657398462295532, "learning_rate": 4.931671128960286e-05, "loss": 0.182, "step": 11185 }, { "epoch": 67.40963855421687, "grad_norm": 1.6776376962661743, "learning_rate": 4.9313922356091034e-05, "loss": 0.1539, "step": 11190 }, { "epoch": 67.43975903614458, "grad_norm": 2.4649460315704346, "learning_rate": 4.931113342257921e-05, "loss": 0.1457, "step": 11195 }, { "epoch": 67.46987951807229, "grad_norm": 1.9362566471099854, "learning_rate": 4.930834448906738e-05, "loss": 0.1798, "step": 11200 }, { "epoch": 67.5, "grad_norm": 1.598980188369751, "learning_rate": 4.930555555555556e-05, "loss": 0.1737, "step": 11205 }, { "epoch": 67.53012048192771, "grad_norm": 2.1399192810058594, "learning_rate": 4.9302766622043736e-05, "loss": 0.1474, "step": 11210 }, { "epoch": 67.56024096385542, "grad_norm": 2.315030813217163, "learning_rate": 4.929997768853191e-05, "loss": 0.1898, "step": 11215 }, { "epoch": 67.59036144578313, "grad_norm": 1.8207380771636963, "learning_rate": 4.9297188755020084e-05, "loss": 0.1664, "step": 11220 }, { "epoch": 67.62048192771084, "grad_norm": 1.6651870012283325, "learning_rate": 4.929439982150826e-05, "loss": 0.1708, "step": 11225 }, { "epoch": 67.65060240963855, "grad_norm": 1.4469952583312988, "learning_rate": 4.929161088799643e-05, "loss": 0.1634, "step": 11230 }, { "epoch": 67.68072289156626, "grad_norm": 1.6479268074035645, "learning_rate": 4.928882195448461e-05, "loss": 0.1632, "step": 11235 }, { "epoch": 67.71084337349397, "grad_norm": 1.4887217283248901, "learning_rate": 4.9286033020972786e-05, "loss": 0.186, "step": 11240 }, { "epoch": 67.74096385542168, "grad_norm": 1.9350062608718872, "learning_rate": 4.928324408746095e-05, "loss": 0.1922, "step": 11245 }, { "epoch": 67.7710843373494, "grad_norm": 2.2155404090881348, "learning_rate": 4.928045515394913e-05, "loss": 0.1657, "step": 11250 }, { "epoch": 67.8012048192771, "grad_norm": 2.231834650039673, "learning_rate": 4.927766622043731e-05, "loss": 0.1776, "step": 11255 }, { "epoch": 67.83132530120481, "grad_norm": 1.9670336246490479, "learning_rate": 4.927487728692548e-05, "loss": 0.1964, "step": 11260 }, { "epoch": 67.86144578313252, "grad_norm": 1.4637037515640259, "learning_rate": 4.927208835341366e-05, "loss": 0.1729, "step": 11265 }, { "epoch": 67.89156626506023, "grad_norm": 1.7573186159133911, "learning_rate": 4.9269299419901835e-05, "loss": 0.1619, "step": 11270 }, { "epoch": 67.92168674698796, "grad_norm": 2.7523934841156006, "learning_rate": 4.926651048639e-05, "loss": 0.1985, "step": 11275 }, { "epoch": 67.95180722891567, "grad_norm": 2.1700620651245117, "learning_rate": 4.926372155287818e-05, "loss": 0.1618, "step": 11280 }, { "epoch": 67.98192771084338, "grad_norm": 1.83929443359375, "learning_rate": 4.9260932619366356e-05, "loss": 0.1571, "step": 11285 }, { "epoch": 68.0, "eval_accuracy": 0.9429807314195832, "eval_auc": 0.9793717943277653, "eval_f1": 0.9143532191376255, "eval_loss": 0.2007821649312973, "eval_precision": 0.9485294117647058, "eval_recall": 0.8825541619156214, "eval_runtime": 19.754, "eval_samples_per_second": 128.733, "eval_steps_per_second": 0.658, "step": 11288 }, { "epoch": 68.01204819277109, "grad_norm": 1.3829892873764038, "learning_rate": 4.925814368585453e-05, "loss": 0.1796, "step": 11290 }, { "epoch": 68.0421686746988, "grad_norm": 1.3722093105316162, "learning_rate": 4.925535475234271e-05, "loss": 0.1445, "step": 11295 }, { "epoch": 68.07228915662651, "grad_norm": 2.054945945739746, "learning_rate": 4.9252565818830885e-05, "loss": 0.1793, "step": 11300 }, { "epoch": 68.10240963855422, "grad_norm": 0.9681638479232788, "learning_rate": 4.924977688531905e-05, "loss": 0.1451, "step": 11305 }, { "epoch": 68.13253012048193, "grad_norm": 1.8551998138427734, "learning_rate": 4.924698795180723e-05, "loss": 0.1644, "step": 11310 }, { "epoch": 68.16265060240964, "grad_norm": 1.469698190689087, "learning_rate": 4.9244199018295406e-05, "loss": 0.1554, "step": 11315 }, { "epoch": 68.19277108433735, "grad_norm": 1.8895114660263062, "learning_rate": 4.924141008478358e-05, "loss": 0.1435, "step": 11320 }, { "epoch": 68.22289156626506, "grad_norm": 2.335062026977539, "learning_rate": 4.923862115127176e-05, "loss": 0.1786, "step": 11325 }, { "epoch": 68.25301204819277, "grad_norm": 1.1214507818222046, "learning_rate": 4.923583221775993e-05, "loss": 0.1738, "step": 11330 }, { "epoch": 68.28313253012048, "grad_norm": 2.89261794090271, "learning_rate": 4.92330432842481e-05, "loss": 0.2027, "step": 11335 }, { "epoch": 68.3132530120482, "grad_norm": 2.151472568511963, "learning_rate": 4.923025435073628e-05, "loss": 0.1594, "step": 11340 }, { "epoch": 68.3433734939759, "grad_norm": 1.9266464710235596, "learning_rate": 4.9227465417224456e-05, "loss": 0.1756, "step": 11345 }, { "epoch": 68.37349397590361, "grad_norm": 2.3010458946228027, "learning_rate": 4.922467648371263e-05, "loss": 0.1746, "step": 11350 }, { "epoch": 68.40361445783132, "grad_norm": 1.6794465780258179, "learning_rate": 4.922188755020081e-05, "loss": 0.1677, "step": 11355 }, { "epoch": 68.43373493975903, "grad_norm": 1.511877417564392, "learning_rate": 4.921909861668898e-05, "loss": 0.1727, "step": 11360 }, { "epoch": 68.46385542168674, "grad_norm": 2.0173590183258057, "learning_rate": 4.921630968317715e-05, "loss": 0.1544, "step": 11365 }, { "epoch": 68.49397590361446, "grad_norm": 2.6422698497772217, "learning_rate": 4.921352074966533e-05, "loss": 0.1244, "step": 11370 }, { "epoch": 68.52409638554217, "grad_norm": 1.666720986366272, "learning_rate": 4.9210731816153505e-05, "loss": 0.1348, "step": 11375 }, { "epoch": 68.55421686746988, "grad_norm": 1.0499719381332397, "learning_rate": 4.920794288264168e-05, "loss": 0.1697, "step": 11380 }, { "epoch": 68.58433734939759, "grad_norm": 1.7370655536651611, "learning_rate": 4.920515394912986e-05, "loss": 0.2181, "step": 11385 }, { "epoch": 68.6144578313253, "grad_norm": 2.2380411624908447, "learning_rate": 4.9202365015618026e-05, "loss": 0.1668, "step": 11390 }, { "epoch": 68.644578313253, "grad_norm": 1.3639131784439087, "learning_rate": 4.91995760821062e-05, "loss": 0.1411, "step": 11395 }, { "epoch": 68.67469879518072, "grad_norm": 2.3739328384399414, "learning_rate": 4.919678714859438e-05, "loss": 0.1631, "step": 11400 }, { "epoch": 68.70481927710843, "grad_norm": 1.231765866279602, "learning_rate": 4.9193998215082555e-05, "loss": 0.151, "step": 11405 }, { "epoch": 68.73493975903614, "grad_norm": 1.5961732864379883, "learning_rate": 4.919120928157073e-05, "loss": 0.1494, "step": 11410 }, { "epoch": 68.76506024096386, "grad_norm": 1.471126675605774, "learning_rate": 4.918842034805891e-05, "loss": 0.1733, "step": 11415 }, { "epoch": 68.79518072289157, "grad_norm": 1.7205326557159424, "learning_rate": 4.9185631414547076e-05, "loss": 0.1823, "step": 11420 }, { "epoch": 68.82530120481928, "grad_norm": 2.2642674446105957, "learning_rate": 4.918284248103525e-05, "loss": 0.2046, "step": 11425 }, { "epoch": 68.855421686747, "grad_norm": 1.4918270111083984, "learning_rate": 4.918005354752343e-05, "loss": 0.1705, "step": 11430 }, { "epoch": 68.8855421686747, "grad_norm": 1.3444596529006958, "learning_rate": 4.9177264614011604e-05, "loss": 0.1763, "step": 11435 }, { "epoch": 68.91566265060241, "grad_norm": 1.2696788311004639, "learning_rate": 4.9174475680499785e-05, "loss": 0.1538, "step": 11440 }, { "epoch": 68.94578313253012, "grad_norm": 1.9824987649917603, "learning_rate": 4.917168674698795e-05, "loss": 0.1859, "step": 11445 }, { "epoch": 68.97590361445783, "grad_norm": 1.523636817932129, "learning_rate": 4.9168897813476126e-05, "loss": 0.1697, "step": 11450 }, { "epoch": 69.0, "eval_accuracy": 0.9288242233582383, "eval_auc": 0.9759845785520594, "eval_f1": 0.8914217156568687, "eval_loss": 0.2424307018518448, "eval_precision": 0.9405063291139241, "eval_recall": 0.8472063854047891, "eval_runtime": 19.8135, "eval_samples_per_second": 128.347, "eval_steps_per_second": 0.656, "step": 11454 }, { "epoch": 69.00602409638554, "grad_norm": 1.9285058975219727, "learning_rate": 4.9166108879964306e-05, "loss": 0.1525, "step": 11455 }, { "epoch": 69.03614457831326, "grad_norm": 0.9996278882026672, "learning_rate": 4.916331994645248e-05, "loss": 0.177, "step": 11460 }, { "epoch": 69.06626506024097, "grad_norm": 1.2690097093582153, "learning_rate": 4.9160531012940654e-05, "loss": 0.1339, "step": 11465 }, { "epoch": 69.09638554216868, "grad_norm": 1.4849321842193604, "learning_rate": 4.9157742079428834e-05, "loss": 0.1809, "step": 11470 }, { "epoch": 69.12650602409639, "grad_norm": 1.2057774066925049, "learning_rate": 4.9154953145917e-05, "loss": 0.1179, "step": 11475 }, { "epoch": 69.1566265060241, "grad_norm": 2.3532774448394775, "learning_rate": 4.9152164212405175e-05, "loss": 0.1738, "step": 11480 }, { "epoch": 69.1867469879518, "grad_norm": 1.6331244707107544, "learning_rate": 4.9149375278893356e-05, "loss": 0.1651, "step": 11485 }, { "epoch": 69.21686746987952, "grad_norm": 1.5818071365356445, "learning_rate": 4.914658634538153e-05, "loss": 0.1389, "step": 11490 }, { "epoch": 69.24698795180723, "grad_norm": 1.0956642627716064, "learning_rate": 4.91437974118697e-05, "loss": 0.1587, "step": 11495 }, { "epoch": 69.27710843373494, "grad_norm": 1.2369745969772339, "learning_rate": 4.9141008478357884e-05, "loss": 0.1464, "step": 11500 }, { "epoch": 69.30722891566265, "grad_norm": 1.6326029300689697, "learning_rate": 4.913821954484605e-05, "loss": 0.1822, "step": 11505 }, { "epoch": 69.33734939759036, "grad_norm": 1.6316596269607544, "learning_rate": 4.9135430611334225e-05, "loss": 0.1119, "step": 11510 }, { "epoch": 69.36746987951807, "grad_norm": 0.746854305267334, "learning_rate": 4.9132641677822405e-05, "loss": 0.1604, "step": 11515 }, { "epoch": 69.39759036144578, "grad_norm": 2.0561535358428955, "learning_rate": 4.912985274431058e-05, "loss": 0.1949, "step": 11520 }, { "epoch": 69.42771084337349, "grad_norm": 1.2303466796875, "learning_rate": 4.912706381079875e-05, "loss": 0.1913, "step": 11525 }, { "epoch": 69.4578313253012, "grad_norm": 1.2517952919006348, "learning_rate": 4.912427487728693e-05, "loss": 0.1378, "step": 11530 }, { "epoch": 69.48795180722891, "grad_norm": 1.7751539945602417, "learning_rate": 4.91214859437751e-05, "loss": 0.171, "step": 11535 }, { "epoch": 69.51807228915662, "grad_norm": 2.344428539276123, "learning_rate": 4.9118697010263274e-05, "loss": 0.1763, "step": 11540 }, { "epoch": 69.54819277108433, "grad_norm": 1.3794399499893188, "learning_rate": 4.9115908076751455e-05, "loss": 0.1585, "step": 11545 }, { "epoch": 69.57831325301204, "grad_norm": 1.4221460819244385, "learning_rate": 4.911311914323963e-05, "loss": 0.1443, "step": 11550 }, { "epoch": 69.60843373493977, "grad_norm": 1.758724570274353, "learning_rate": 4.91103302097278e-05, "loss": 0.1969, "step": 11555 }, { "epoch": 69.63855421686748, "grad_norm": 1.4691616296768188, "learning_rate": 4.9107541276215976e-05, "loss": 0.142, "step": 11560 }, { "epoch": 69.66867469879519, "grad_norm": 2.206136703491211, "learning_rate": 4.910475234270415e-05, "loss": 0.2204, "step": 11565 }, { "epoch": 69.6987951807229, "grad_norm": 1.3491761684417725, "learning_rate": 4.9101963409192324e-05, "loss": 0.1707, "step": 11570 }, { "epoch": 69.7289156626506, "grad_norm": 1.2227740287780762, "learning_rate": 4.9099174475680504e-05, "loss": 0.1353, "step": 11575 }, { "epoch": 69.75903614457832, "grad_norm": 1.7694668769836426, "learning_rate": 4.909638554216868e-05, "loss": 0.1803, "step": 11580 }, { "epoch": 69.78915662650603, "grad_norm": 1.7386656999588013, "learning_rate": 4.909359660865685e-05, "loss": 0.1719, "step": 11585 }, { "epoch": 69.81927710843374, "grad_norm": 1.1455605030059814, "learning_rate": 4.9090807675145026e-05, "loss": 0.1606, "step": 11590 }, { "epoch": 69.84939759036145, "grad_norm": 2.165029287338257, "learning_rate": 4.90880187416332e-05, "loss": 0.1926, "step": 11595 }, { "epoch": 69.87951807228916, "grad_norm": 1.8540332317352295, "learning_rate": 4.908522980812137e-05, "loss": 0.201, "step": 11600 }, { "epoch": 69.90963855421687, "grad_norm": 1.1603822708129883, "learning_rate": 4.9082440874609554e-05, "loss": 0.155, "step": 11605 }, { "epoch": 69.93975903614458, "grad_norm": 0.9770561456680298, "learning_rate": 4.907965194109773e-05, "loss": 0.1622, "step": 11610 }, { "epoch": 69.96987951807229, "grad_norm": 1.5205050706863403, "learning_rate": 4.90768630075859e-05, "loss": 0.1565, "step": 11615 }, { "epoch": 70.0, "grad_norm": 2.5346810817718506, "learning_rate": 4.9074074074074075e-05, "loss": 0.211, "step": 11620 }, { "epoch": 70.0, "eval_accuracy": 0.9193865513173417, "eval_auc": 0.9755270409189901, "eval_f1": 0.8709880427942102, "eval_loss": 0.31897255778312683, "eval_precision": 0.9719101123595506, "eval_recall": 0.7890535917901939, "eval_runtime": 19.8528, "eval_samples_per_second": 128.093, "eval_steps_per_second": 0.655, "step": 11620 }, { "epoch": 70.03012048192771, "grad_norm": 2.514573812484741, "learning_rate": 4.907128514056225e-05, "loss": 0.1649, "step": 11625 }, { "epoch": 70.06024096385542, "grad_norm": 2.3070337772369385, "learning_rate": 4.906849620705042e-05, "loss": 0.1677, "step": 11630 }, { "epoch": 70.09036144578313, "grad_norm": 1.6494768857955933, "learning_rate": 4.9065707273538603e-05, "loss": 0.1683, "step": 11635 }, { "epoch": 70.12048192771084, "grad_norm": 1.5687743425369263, "learning_rate": 4.906291834002678e-05, "loss": 0.1636, "step": 11640 }, { "epoch": 70.15060240963855, "grad_norm": 1.975725769996643, "learning_rate": 4.906012940651495e-05, "loss": 0.1856, "step": 11645 }, { "epoch": 70.18072289156626, "grad_norm": 1.9229167699813843, "learning_rate": 4.9057340473003125e-05, "loss": 0.1753, "step": 11650 }, { "epoch": 70.21084337349397, "grad_norm": 2.0328242778778076, "learning_rate": 4.90545515394913e-05, "loss": 0.1452, "step": 11655 }, { "epoch": 70.24096385542168, "grad_norm": 2.073943614959717, "learning_rate": 4.905176260597947e-05, "loss": 0.1803, "step": 11660 }, { "epoch": 70.2710843373494, "grad_norm": 1.092247724533081, "learning_rate": 4.904897367246765e-05, "loss": 0.1511, "step": 11665 }, { "epoch": 70.3012048192771, "grad_norm": 0.8671462535858154, "learning_rate": 4.904618473895583e-05, "loss": 0.1468, "step": 11670 }, { "epoch": 70.33132530120481, "grad_norm": 1.3966654539108276, "learning_rate": 4.9043395805444e-05, "loss": 0.1574, "step": 11675 }, { "epoch": 70.36144578313252, "grad_norm": 1.2109041213989258, "learning_rate": 4.9040606871932174e-05, "loss": 0.1571, "step": 11680 }, { "epoch": 70.39156626506023, "grad_norm": 1.3622057437896729, "learning_rate": 4.903781793842035e-05, "loss": 0.1584, "step": 11685 }, { "epoch": 70.42168674698796, "grad_norm": 1.86616051197052, "learning_rate": 4.903502900490852e-05, "loss": 0.1633, "step": 11690 }, { "epoch": 70.45180722891567, "grad_norm": 1.5176563262939453, "learning_rate": 4.90322400713967e-05, "loss": 0.1258, "step": 11695 }, { "epoch": 70.48192771084338, "grad_norm": 1.8466945886611938, "learning_rate": 4.9029451137884876e-05, "loss": 0.1433, "step": 11700 }, { "epoch": 70.51204819277109, "grad_norm": 1.281245231628418, "learning_rate": 4.902666220437305e-05, "loss": 0.189, "step": 11705 }, { "epoch": 70.5421686746988, "grad_norm": 1.0728424787521362, "learning_rate": 4.9023873270861224e-05, "loss": 0.1588, "step": 11710 }, { "epoch": 70.57228915662651, "grad_norm": 1.2949163913726807, "learning_rate": 4.90210843373494e-05, "loss": 0.1213, "step": 11715 }, { "epoch": 70.60240963855422, "grad_norm": 1.456634521484375, "learning_rate": 4.901829540383758e-05, "loss": 0.172, "step": 11720 }, { "epoch": 70.63253012048193, "grad_norm": 2.1834113597869873, "learning_rate": 4.901550647032575e-05, "loss": 0.1694, "step": 11725 }, { "epoch": 70.66265060240964, "grad_norm": 2.4465279579162598, "learning_rate": 4.9012717536813926e-05, "loss": 0.1921, "step": 11730 }, { "epoch": 70.69277108433735, "grad_norm": 1.586053729057312, "learning_rate": 4.90099286033021e-05, "loss": 0.1699, "step": 11735 }, { "epoch": 70.72289156626506, "grad_norm": 1.616501808166504, "learning_rate": 4.9007139669790274e-05, "loss": 0.1403, "step": 11740 }, { "epoch": 70.75301204819277, "grad_norm": 1.987012267112732, "learning_rate": 4.900435073627845e-05, "loss": 0.1609, "step": 11745 }, { "epoch": 70.78313253012048, "grad_norm": 2.1528191566467285, "learning_rate": 4.900156180276663e-05, "loss": 0.1683, "step": 11750 }, { "epoch": 70.8132530120482, "grad_norm": 2.372396469116211, "learning_rate": 4.89987728692548e-05, "loss": 0.1601, "step": 11755 }, { "epoch": 70.8433734939759, "grad_norm": 2.0807836055755615, "learning_rate": 4.8995983935742975e-05, "loss": 0.1776, "step": 11760 }, { "epoch": 70.87349397590361, "grad_norm": 2.007038116455078, "learning_rate": 4.899319500223115e-05, "loss": 0.155, "step": 11765 }, { "epoch": 70.90361445783132, "grad_norm": 2.0115725994110107, "learning_rate": 4.899040606871932e-05, "loss": 0.165, "step": 11770 }, { "epoch": 70.93373493975903, "grad_norm": 2.330153226852417, "learning_rate": 4.89876171352075e-05, "loss": 0.1782, "step": 11775 }, { "epoch": 70.96385542168674, "grad_norm": 0.9279620051383972, "learning_rate": 4.898482820169568e-05, "loss": 0.1541, "step": 11780 }, { "epoch": 70.99397590361446, "grad_norm": 1.6002525091171265, "learning_rate": 4.898203926818385e-05, "loss": 0.1468, "step": 11785 }, { "epoch": 71.0, "eval_accuracy": 0.9402280770743217, "eval_auc": 0.9803796090842267, "eval_f1": 0.9079903147699758, "eval_loss": 0.22381989657878876, "eval_precision": 0.967741935483871, "eval_recall": 0.855188141391106, "eval_runtime": 16.9098, "eval_samples_per_second": 150.386, "eval_steps_per_second": 0.769, "step": 11786 }, { "epoch": 71.02409638554217, "grad_norm": 1.6419475078582764, "learning_rate": 4.8979250334672025e-05, "loss": 0.1328, "step": 11790 }, { "epoch": 71.05421686746988, "grad_norm": 2.771260976791382, "learning_rate": 4.89764614011602e-05, "loss": 0.1447, "step": 11795 }, { "epoch": 71.08433734939759, "grad_norm": 2.3673524856567383, "learning_rate": 4.897367246764837e-05, "loss": 0.1637, "step": 11800 }, { "epoch": 71.1144578313253, "grad_norm": 2.1549925804138184, "learning_rate": 4.8970883534136546e-05, "loss": 0.1472, "step": 11805 }, { "epoch": 71.144578313253, "grad_norm": 1.625942349433899, "learning_rate": 4.896809460062473e-05, "loss": 0.1634, "step": 11810 }, { "epoch": 71.17469879518072, "grad_norm": 1.6430217027664185, "learning_rate": 4.89653056671129e-05, "loss": 0.1704, "step": 11815 }, { "epoch": 71.20481927710843, "grad_norm": 2.000575542449951, "learning_rate": 4.8962516733601075e-05, "loss": 0.1896, "step": 11820 }, { "epoch": 71.23493975903614, "grad_norm": 1.9632614850997925, "learning_rate": 4.895972780008925e-05, "loss": 0.1703, "step": 11825 }, { "epoch": 71.26506024096386, "grad_norm": 1.683100700378418, "learning_rate": 4.895693886657742e-05, "loss": 0.1493, "step": 11830 }, { "epoch": 71.29518072289157, "grad_norm": 2.165752410888672, "learning_rate": 4.8954149933065596e-05, "loss": 0.1407, "step": 11835 }, { "epoch": 71.32530120481928, "grad_norm": 2.3268678188323975, "learning_rate": 4.8951360999553777e-05, "loss": 0.1606, "step": 11840 }, { "epoch": 71.355421686747, "grad_norm": 2.8651413917541504, "learning_rate": 4.894857206604195e-05, "loss": 0.1942, "step": 11845 }, { "epoch": 71.3855421686747, "grad_norm": 1.9885239601135254, "learning_rate": 4.8945783132530124e-05, "loss": 0.1584, "step": 11850 }, { "epoch": 71.41566265060241, "grad_norm": 1.9506202936172485, "learning_rate": 4.89429941990183e-05, "loss": 0.1572, "step": 11855 }, { "epoch": 71.44578313253012, "grad_norm": 1.9616752862930298, "learning_rate": 4.894020526550647e-05, "loss": 0.1415, "step": 11860 }, { "epoch": 71.47590361445783, "grad_norm": 1.929392695426941, "learning_rate": 4.8937416331994646e-05, "loss": 0.1685, "step": 11865 }, { "epoch": 71.50602409638554, "grad_norm": 2.233412981033325, "learning_rate": 4.8934627398482826e-05, "loss": 0.1894, "step": 11870 }, { "epoch": 71.53614457831326, "grad_norm": 1.8386956453323364, "learning_rate": 4.8931838464971e-05, "loss": 0.1743, "step": 11875 }, { "epoch": 71.56626506024097, "grad_norm": 1.6761291027069092, "learning_rate": 4.892904953145917e-05, "loss": 0.1455, "step": 11880 }, { "epoch": 71.59638554216868, "grad_norm": 2.172708749771118, "learning_rate": 4.892626059794735e-05, "loss": 0.1654, "step": 11885 }, { "epoch": 71.62650602409639, "grad_norm": 1.2129136323928833, "learning_rate": 4.892347166443552e-05, "loss": 0.1644, "step": 11890 }, { "epoch": 71.6566265060241, "grad_norm": 1.0940375328063965, "learning_rate": 4.8920682730923695e-05, "loss": 0.147, "step": 11895 }, { "epoch": 71.6867469879518, "grad_norm": 0.9110568761825562, "learning_rate": 4.8917893797411876e-05, "loss": 0.1252, "step": 11900 }, { "epoch": 71.71686746987952, "grad_norm": 2.237536668777466, "learning_rate": 4.891510486390005e-05, "loss": 0.1567, "step": 11905 }, { "epoch": 71.74698795180723, "grad_norm": 0.9506092667579651, "learning_rate": 4.8912315930388216e-05, "loss": 0.1312, "step": 11910 }, { "epoch": 71.77710843373494, "grad_norm": 1.5513007640838623, "learning_rate": 4.89095269968764e-05, "loss": 0.1403, "step": 11915 }, { "epoch": 71.80722891566265, "grad_norm": 1.3525789976119995, "learning_rate": 4.890673806336457e-05, "loss": 0.1746, "step": 11920 }, { "epoch": 71.83734939759036, "grad_norm": 2.4830636978149414, "learning_rate": 4.8903949129852745e-05, "loss": 0.158, "step": 11925 }, { "epoch": 71.86746987951807, "grad_norm": 1.4283735752105713, "learning_rate": 4.8901160196340925e-05, "loss": 0.1459, "step": 11930 }, { "epoch": 71.89759036144578, "grad_norm": 0.5627816915512085, "learning_rate": 4.88983712628291e-05, "loss": 0.1592, "step": 11935 }, { "epoch": 71.92771084337349, "grad_norm": 1.2602050304412842, "learning_rate": 4.8895582329317266e-05, "loss": 0.1672, "step": 11940 }, { "epoch": 71.9578313253012, "grad_norm": 1.8462952375411987, "learning_rate": 4.8892793395805447e-05, "loss": 0.1608, "step": 11945 }, { "epoch": 71.98795180722891, "grad_norm": 1.4569382667541504, "learning_rate": 4.889000446229362e-05, "loss": 0.1659, "step": 11950 }, { "epoch": 72.0, "eval_accuracy": 0.9327565867086118, "eval_auc": 0.9771220917101162, "eval_f1": 0.8969258589511754, "eval_loss": 0.234303280711174, "eval_precision": 0.9514066496163683, "eval_recall": 0.8483466362599772, "eval_runtime": 17.1758, "eval_samples_per_second": 148.057, "eval_steps_per_second": 0.757, "step": 11952 }, { "epoch": 72.01807228915662, "grad_norm": 2.3381729125976562, "learning_rate": 4.8887215528781794e-05, "loss": 0.1808, "step": 11955 }, { "epoch": 72.04819277108433, "grad_norm": 1.6231787204742432, "learning_rate": 4.8884426595269975e-05, "loss": 0.1804, "step": 11960 }, { "epoch": 72.07831325301204, "grad_norm": 1.2411439418792725, "learning_rate": 4.888163766175814e-05, "loss": 0.1695, "step": 11965 }, { "epoch": 72.10843373493977, "grad_norm": 2.3571534156799316, "learning_rate": 4.887884872824632e-05, "loss": 0.1411, "step": 11970 }, { "epoch": 72.13855421686748, "grad_norm": 0.9407815933227539, "learning_rate": 4.8876059794734496e-05, "loss": 0.1445, "step": 11975 }, { "epoch": 72.16867469879519, "grad_norm": 1.3343504667282104, "learning_rate": 4.887327086122267e-05, "loss": 0.1516, "step": 11980 }, { "epoch": 72.1987951807229, "grad_norm": 1.809865117073059, "learning_rate": 4.887048192771085e-05, "loss": 0.1318, "step": 11985 }, { "epoch": 72.2289156626506, "grad_norm": 1.183609962463379, "learning_rate": 4.8867692994199024e-05, "loss": 0.1349, "step": 11990 }, { "epoch": 72.25903614457832, "grad_norm": 1.6165786981582642, "learning_rate": 4.886490406068719e-05, "loss": 0.1535, "step": 11995 }, { "epoch": 72.28915662650603, "grad_norm": 1.968641996383667, "learning_rate": 4.886211512717537e-05, "loss": 0.1976, "step": 12000 }, { "epoch": 72.31927710843374, "grad_norm": 1.1011695861816406, "learning_rate": 4.8859326193663546e-05, "loss": 0.2182, "step": 12005 }, { "epoch": 72.34939759036145, "grad_norm": 1.7650960683822632, "learning_rate": 4.885653726015172e-05, "loss": 0.1635, "step": 12010 }, { "epoch": 72.37951807228916, "grad_norm": 0.9837899804115295, "learning_rate": 4.88537483266399e-05, "loss": 0.14, "step": 12015 }, { "epoch": 72.40963855421687, "grad_norm": 0.8613243103027344, "learning_rate": 4.8850959393128074e-05, "loss": 0.151, "step": 12020 }, { "epoch": 72.43975903614458, "grad_norm": 1.387433648109436, "learning_rate": 4.884817045961624e-05, "loss": 0.1758, "step": 12025 }, { "epoch": 72.46987951807229, "grad_norm": 0.8315243721008301, "learning_rate": 4.884538152610442e-05, "loss": 0.136, "step": 12030 }, { "epoch": 72.5, "grad_norm": 2.1914048194885254, "learning_rate": 4.8842592592592595e-05, "loss": 0.1536, "step": 12035 }, { "epoch": 72.53012048192771, "grad_norm": 1.5686542987823486, "learning_rate": 4.883980365908077e-05, "loss": 0.1569, "step": 12040 }, { "epoch": 72.56024096385542, "grad_norm": 1.354062795639038, "learning_rate": 4.883701472556895e-05, "loss": 0.141, "step": 12045 }, { "epoch": 72.59036144578313, "grad_norm": 2.271022319793701, "learning_rate": 4.883422579205712e-05, "loss": 0.1573, "step": 12050 }, { "epoch": 72.62048192771084, "grad_norm": 1.9208792448043823, "learning_rate": 4.883143685854529e-05, "loss": 0.0976, "step": 12055 }, { "epoch": 72.65060240963855, "grad_norm": 1.3888119459152222, "learning_rate": 4.882864792503347e-05, "loss": 0.1353, "step": 12060 }, { "epoch": 72.68072289156626, "grad_norm": 1.9032936096191406, "learning_rate": 4.8825858991521645e-05, "loss": 0.1592, "step": 12065 }, { "epoch": 72.71084337349397, "grad_norm": 2.03615403175354, "learning_rate": 4.882307005800982e-05, "loss": 0.1529, "step": 12070 }, { "epoch": 72.74096385542168, "grad_norm": 0.8284222483634949, "learning_rate": 4.8820281124498e-05, "loss": 0.1552, "step": 12075 }, { "epoch": 72.7710843373494, "grad_norm": 2.2320663928985596, "learning_rate": 4.8817492190986166e-05, "loss": 0.1372, "step": 12080 }, { "epoch": 72.8012048192771, "grad_norm": 1.9461119174957275, "learning_rate": 4.881470325747434e-05, "loss": 0.1579, "step": 12085 }, { "epoch": 72.83132530120481, "grad_norm": 2.10284161567688, "learning_rate": 4.881191432396252e-05, "loss": 0.1613, "step": 12090 }, { "epoch": 72.86144578313252, "grad_norm": 2.1088132858276367, "learning_rate": 4.8809125390450694e-05, "loss": 0.1413, "step": 12095 }, { "epoch": 72.89156626506023, "grad_norm": 1.2813507318496704, "learning_rate": 4.880633645693887e-05, "loss": 0.1754, "step": 12100 }, { "epoch": 72.92168674698796, "grad_norm": 2.11826491355896, "learning_rate": 4.880354752342705e-05, "loss": 0.1781, "step": 12105 }, { "epoch": 72.95180722891567, "grad_norm": 1.9352747201919556, "learning_rate": 4.8800758589915216e-05, "loss": 0.1632, "step": 12110 }, { "epoch": 72.98192771084338, "grad_norm": 2.155324697494507, "learning_rate": 4.879796965640339e-05, "loss": 0.1499, "step": 12115 }, { "epoch": 73.0, "eval_accuracy": 0.9445536767597326, "eval_auc": 0.9816194436725658, "eval_f1": 0.9168141592920354, "eval_loss": 0.19978806376457214, "eval_precision": 0.9498777506112469, "eval_recall": 0.8859749144811858, "eval_runtime": 16.8874, "eval_samples_per_second": 150.586, "eval_steps_per_second": 0.77, "step": 12118 }, { "epoch": 73.01204819277109, "grad_norm": 2.200657844543457, "learning_rate": 4.879518072289157e-05, "loss": 0.1402, "step": 12120 }, { "epoch": 73.0421686746988, "grad_norm": 0.6882126927375793, "learning_rate": 4.8792391789379744e-05, "loss": 0.1093, "step": 12125 }, { "epoch": 73.07228915662651, "grad_norm": 2.372523307800293, "learning_rate": 4.878960285586792e-05, "loss": 0.1789, "step": 12130 }, { "epoch": 73.10240963855422, "grad_norm": 1.2518162727355957, "learning_rate": 4.87868139223561e-05, "loss": 0.1616, "step": 12135 }, { "epoch": 73.13253012048193, "grad_norm": 2.601158618927002, "learning_rate": 4.8784024988844265e-05, "loss": 0.2215, "step": 12140 }, { "epoch": 73.16265060240964, "grad_norm": 1.3373281955718994, "learning_rate": 4.878123605533244e-05, "loss": 0.182, "step": 12145 }, { "epoch": 73.19277108433735, "grad_norm": 1.938125491142273, "learning_rate": 4.877844712182062e-05, "loss": 0.188, "step": 12150 }, { "epoch": 73.22289156626506, "grad_norm": 1.121777057647705, "learning_rate": 4.8775658188308793e-05, "loss": 0.1442, "step": 12155 }, { "epoch": 73.25301204819277, "grad_norm": 1.318291187286377, "learning_rate": 4.877286925479697e-05, "loss": 0.1739, "step": 12160 }, { "epoch": 73.28313253012048, "grad_norm": 1.4455350637435913, "learning_rate": 4.877008032128514e-05, "loss": 0.1961, "step": 12165 }, { "epoch": 73.3132530120482, "grad_norm": 1.7983412742614746, "learning_rate": 4.8767291387773315e-05, "loss": 0.1264, "step": 12170 }, { "epoch": 73.3433734939759, "grad_norm": 1.8090513944625854, "learning_rate": 4.876450245426149e-05, "loss": 0.1842, "step": 12175 }, { "epoch": 73.37349397590361, "grad_norm": 1.7406883239746094, "learning_rate": 4.876171352074967e-05, "loss": 0.1476, "step": 12180 }, { "epoch": 73.40361445783132, "grad_norm": 1.712064504623413, "learning_rate": 4.875892458723784e-05, "loss": 0.2022, "step": 12185 }, { "epoch": 73.43373493975903, "grad_norm": 1.943896770477295, "learning_rate": 4.875613565372602e-05, "loss": 0.1334, "step": 12190 }, { "epoch": 73.46385542168674, "grad_norm": 1.7055920362472534, "learning_rate": 4.875334672021419e-05, "loss": 0.1452, "step": 12195 }, { "epoch": 73.49397590361446, "grad_norm": 1.2974059581756592, "learning_rate": 4.8750557786702364e-05, "loss": 0.1817, "step": 12200 }, { "epoch": 73.52409638554217, "grad_norm": 1.7666335105895996, "learning_rate": 4.874776885319054e-05, "loss": 0.1363, "step": 12205 }, { "epoch": 73.55421686746988, "grad_norm": 1.5259292125701904, "learning_rate": 4.874497991967872e-05, "loss": 0.1401, "step": 12210 }, { "epoch": 73.58433734939759, "grad_norm": 0.9198861122131348, "learning_rate": 4.874219098616689e-05, "loss": 0.1451, "step": 12215 }, { "epoch": 73.6144578313253, "grad_norm": 1.8410776853561401, "learning_rate": 4.8739402052655066e-05, "loss": 0.1501, "step": 12220 }, { "epoch": 73.644578313253, "grad_norm": 1.808163046836853, "learning_rate": 4.873661311914324e-05, "loss": 0.1307, "step": 12225 }, { "epoch": 73.67469879518072, "grad_norm": 1.8669711351394653, "learning_rate": 4.8733824185631414e-05, "loss": 0.1443, "step": 12230 }, { "epoch": 73.70481927710843, "grad_norm": 1.5613455772399902, "learning_rate": 4.8731035252119594e-05, "loss": 0.1701, "step": 12235 }, { "epoch": 73.73493975903614, "grad_norm": 1.8711267709732056, "learning_rate": 4.872824631860777e-05, "loss": 0.2047, "step": 12240 }, { "epoch": 73.76506024096386, "grad_norm": 2.122509479522705, "learning_rate": 4.872545738509594e-05, "loss": 0.1515, "step": 12245 }, { "epoch": 73.79518072289157, "grad_norm": 2.5126874446868896, "learning_rate": 4.8722668451584116e-05, "loss": 0.1601, "step": 12250 }, { "epoch": 73.82530120481928, "grad_norm": 1.0848028659820557, "learning_rate": 4.871987951807229e-05, "loss": 0.1219, "step": 12255 }, { "epoch": 73.855421686747, "grad_norm": 1.7317748069763184, "learning_rate": 4.8717090584560463e-05, "loss": 0.1528, "step": 12260 }, { "epoch": 73.8855421686747, "grad_norm": 1.4443916082382202, "learning_rate": 4.8714301651048644e-05, "loss": 0.1639, "step": 12265 }, { "epoch": 73.91566265060241, "grad_norm": 1.4768887758255005, "learning_rate": 4.871151271753682e-05, "loss": 0.1726, "step": 12270 }, { "epoch": 73.94578313253012, "grad_norm": 1.8033219575881958, "learning_rate": 4.870872378402499e-05, "loss": 0.1651, "step": 12275 }, { "epoch": 73.97590361445783, "grad_norm": 1.9352055788040161, "learning_rate": 4.8705934850513165e-05, "loss": 0.1884, "step": 12280 }, { "epoch": 74.0, "eval_accuracy": 0.9496657491152183, "eval_auc": 0.9825112485130884, "eval_f1": 0.9245283018867925, "eval_loss": 0.18663667142391205, "eval_precision": 0.9572649572649573, "eval_recall": 0.8939566704675028, "eval_runtime": 17.2644, "eval_samples_per_second": 147.297, "eval_steps_per_second": 0.753, "step": 12284 }, { "epoch": 74.00602409638554, "grad_norm": 0.5611174702644348, "learning_rate": 4.870314591700134e-05, "loss": 0.1332, "step": 12285 }, { "epoch": 74.03614457831326, "grad_norm": 1.4713517427444458, "learning_rate": 4.870035698348951e-05, "loss": 0.1472, "step": 12290 }, { "epoch": 74.06626506024097, "grad_norm": 1.0096324682235718, "learning_rate": 4.8697568049977694e-05, "loss": 0.1281, "step": 12295 }, { "epoch": 74.09638554216868, "grad_norm": 2.5639259815216064, "learning_rate": 4.869477911646587e-05, "loss": 0.1904, "step": 12300 }, { "epoch": 74.12650602409639, "grad_norm": 1.5320582389831543, "learning_rate": 4.869199018295404e-05, "loss": 0.1703, "step": 12305 }, { "epoch": 74.1566265060241, "grad_norm": 0.815729022026062, "learning_rate": 4.8689201249442215e-05, "loss": 0.1134, "step": 12310 }, { "epoch": 74.1867469879518, "grad_norm": 1.2700828313827515, "learning_rate": 4.868641231593039e-05, "loss": 0.1279, "step": 12315 }, { "epoch": 74.21686746987952, "grad_norm": 2.0344655513763428, "learning_rate": 4.868362338241856e-05, "loss": 0.1497, "step": 12320 }, { "epoch": 74.24698795180723, "grad_norm": 1.7374845743179321, "learning_rate": 4.868083444890674e-05, "loss": 0.1701, "step": 12325 }, { "epoch": 74.27710843373494, "grad_norm": 1.5035818815231323, "learning_rate": 4.867804551539492e-05, "loss": 0.1256, "step": 12330 }, { "epoch": 74.30722891566265, "grad_norm": 2.07659912109375, "learning_rate": 4.867525658188309e-05, "loss": 0.1431, "step": 12335 }, { "epoch": 74.33734939759036, "grad_norm": 1.4296315908432007, "learning_rate": 4.8672467648371265e-05, "loss": 0.1318, "step": 12340 }, { "epoch": 74.36746987951807, "grad_norm": 2.0391762256622314, "learning_rate": 4.866967871485944e-05, "loss": 0.1663, "step": 12345 }, { "epoch": 74.39759036144578, "grad_norm": 1.483923077583313, "learning_rate": 4.866688978134761e-05, "loss": 0.1721, "step": 12350 }, { "epoch": 74.42771084337349, "grad_norm": 1.750510811805725, "learning_rate": 4.866410084783579e-05, "loss": 0.1574, "step": 12355 }, { "epoch": 74.4578313253012, "grad_norm": 1.6884591579437256, "learning_rate": 4.8661311914323966e-05, "loss": 0.1368, "step": 12360 }, { "epoch": 74.48795180722891, "grad_norm": 1.468232274055481, "learning_rate": 4.865852298081214e-05, "loss": 0.1595, "step": 12365 }, { "epoch": 74.51807228915662, "grad_norm": 2.2312231063842773, "learning_rate": 4.8655734047300314e-05, "loss": 0.1624, "step": 12370 }, { "epoch": 74.54819277108433, "grad_norm": 1.472649335861206, "learning_rate": 4.865294511378849e-05, "loss": 0.181, "step": 12375 }, { "epoch": 74.57831325301204, "grad_norm": 1.16572105884552, "learning_rate": 4.865015618027666e-05, "loss": 0.1374, "step": 12380 }, { "epoch": 74.60843373493977, "grad_norm": 0.9645233750343323, "learning_rate": 4.864736724676484e-05, "loss": 0.1596, "step": 12385 }, { "epoch": 74.63855421686748, "grad_norm": 1.2052770853042603, "learning_rate": 4.8644578313253016e-05, "loss": 0.1343, "step": 12390 }, { "epoch": 74.66867469879519, "grad_norm": 1.0349658727645874, "learning_rate": 4.864178937974119e-05, "loss": 0.1606, "step": 12395 }, { "epoch": 74.6987951807229, "grad_norm": 2.264651298522949, "learning_rate": 4.8639000446229364e-05, "loss": 0.1383, "step": 12400 }, { "epoch": 74.7289156626506, "grad_norm": 1.8398776054382324, "learning_rate": 4.863621151271754e-05, "loss": 0.1364, "step": 12405 }, { "epoch": 74.75903614457832, "grad_norm": 2.0272128582000732, "learning_rate": 4.863342257920571e-05, "loss": 0.1637, "step": 12410 }, { "epoch": 74.78915662650603, "grad_norm": 1.3465744256973267, "learning_rate": 4.863063364569389e-05, "loss": 0.1429, "step": 12415 }, { "epoch": 74.81927710843374, "grad_norm": 1.5959062576293945, "learning_rate": 4.8627844712182066e-05, "loss": 0.1452, "step": 12420 }, { "epoch": 74.84939759036145, "grad_norm": 1.0067527294158936, "learning_rate": 4.862505577867024e-05, "loss": 0.1177, "step": 12425 }, { "epoch": 74.87951807228916, "grad_norm": 1.834115982055664, "learning_rate": 4.862226684515841e-05, "loss": 0.1295, "step": 12430 }, { "epoch": 74.90963855421687, "grad_norm": 0.5453358888626099, "learning_rate": 4.861947791164659e-05, "loss": 0.1491, "step": 12435 }, { "epoch": 74.93975903614458, "grad_norm": 1.7512986660003662, "learning_rate": 4.861668897813476e-05, "loss": 0.1516, "step": 12440 }, { "epoch": 74.96987951807229, "grad_norm": 2.071878671646118, "learning_rate": 4.861390004462294e-05, "loss": 0.1694, "step": 12445 }, { "epoch": 75.0, "grad_norm": 1.0349500179290771, "learning_rate": 4.8611111111111115e-05, "loss": 0.1718, "step": 12450 }, { "epoch": 75.0, "eval_accuracy": 0.9406213134093591, "eval_auc": 0.9784043606039907, "eval_f1": 0.908761329305136, "eval_loss": 0.23483532667160034, "eval_precision": 0.9665809768637532, "eval_recall": 0.8574686431014823, "eval_runtime": 19.7514, "eval_samples_per_second": 128.75, "eval_steps_per_second": 0.658, "step": 12450 }, { "epoch": 75.03012048192771, "grad_norm": 1.2066843509674072, "learning_rate": 4.860832217759929e-05, "loss": 0.1313, "step": 12455 }, { "epoch": 75.06024096385542, "grad_norm": 1.7551440000534058, "learning_rate": 4.860553324408746e-05, "loss": 0.136, "step": 12460 }, { "epoch": 75.09036144578313, "grad_norm": 0.9596983194351196, "learning_rate": 4.8602744310575637e-05, "loss": 0.1539, "step": 12465 }, { "epoch": 75.12048192771084, "grad_norm": 0.9710562825202942, "learning_rate": 4.859995537706381e-05, "loss": 0.1277, "step": 12470 }, { "epoch": 75.15060240963855, "grad_norm": 1.754207730293274, "learning_rate": 4.859716644355199e-05, "loss": 0.1658, "step": 12475 }, { "epoch": 75.18072289156626, "grad_norm": 1.8475215435028076, "learning_rate": 4.8594377510040165e-05, "loss": 0.1616, "step": 12480 }, { "epoch": 75.21084337349397, "grad_norm": 0.8701611757278442, "learning_rate": 4.859158857652834e-05, "loss": 0.1076, "step": 12485 }, { "epoch": 75.24096385542168, "grad_norm": 1.3654662370681763, "learning_rate": 4.858879964301651e-05, "loss": 0.1217, "step": 12490 }, { "epoch": 75.2710843373494, "grad_norm": 1.7945674657821655, "learning_rate": 4.8586010709504686e-05, "loss": 0.1342, "step": 12495 }, { "epoch": 75.3012048192771, "grad_norm": 0.8711778521537781, "learning_rate": 4.858322177599287e-05, "loss": 0.1648, "step": 12500 }, { "epoch": 75.33132530120481, "grad_norm": 1.753820776939392, "learning_rate": 4.858043284248104e-05, "loss": 0.1598, "step": 12505 }, { "epoch": 75.36144578313252, "grad_norm": 1.9302884340286255, "learning_rate": 4.8577643908969214e-05, "loss": 0.13, "step": 12510 }, { "epoch": 75.39156626506023, "grad_norm": 1.5695573091506958, "learning_rate": 4.857485497545739e-05, "loss": 0.1689, "step": 12515 }, { "epoch": 75.42168674698796, "grad_norm": 1.8120839595794678, "learning_rate": 4.857206604194556e-05, "loss": 0.1575, "step": 12520 }, { "epoch": 75.45180722891567, "grad_norm": 0.9831445217132568, "learning_rate": 4.8569277108433736e-05, "loss": 0.1591, "step": 12525 }, { "epoch": 75.48192771084338, "grad_norm": 1.8309271335601807, "learning_rate": 4.8566488174921916e-05, "loss": 0.1395, "step": 12530 }, { "epoch": 75.51204819277109, "grad_norm": 2.22189998626709, "learning_rate": 4.856369924141009e-05, "loss": 0.1659, "step": 12535 }, { "epoch": 75.5421686746988, "grad_norm": 2.0341784954071045, "learning_rate": 4.8560910307898264e-05, "loss": 0.1298, "step": 12540 }, { "epoch": 75.57228915662651, "grad_norm": 2.1318440437316895, "learning_rate": 4.855812137438644e-05, "loss": 0.2028, "step": 12545 }, { "epoch": 75.60240963855422, "grad_norm": 1.3413338661193848, "learning_rate": 4.855533244087461e-05, "loss": 0.2001, "step": 12550 }, { "epoch": 75.63253012048193, "grad_norm": 2.999337673187256, "learning_rate": 4.8552543507362785e-05, "loss": 0.1812, "step": 12555 }, { "epoch": 75.66265060240964, "grad_norm": 1.7685872316360474, "learning_rate": 4.8549754573850966e-05, "loss": 0.1607, "step": 12560 }, { "epoch": 75.69277108433735, "grad_norm": 0.9249743819236755, "learning_rate": 4.854696564033914e-05, "loss": 0.1203, "step": 12565 }, { "epoch": 75.72289156626506, "grad_norm": 0.767066240310669, "learning_rate": 4.854417670682731e-05, "loss": 0.1404, "step": 12570 }, { "epoch": 75.75301204819277, "grad_norm": 1.860562801361084, "learning_rate": 4.854138777331549e-05, "loss": 0.173, "step": 12575 }, { "epoch": 75.78313253012048, "grad_norm": 1.5018879175186157, "learning_rate": 4.853859883980366e-05, "loss": 0.149, "step": 12580 }, { "epoch": 75.8132530120482, "grad_norm": 1.9827969074249268, "learning_rate": 4.8535809906291835e-05, "loss": 0.1642, "step": 12585 }, { "epoch": 75.8433734939759, "grad_norm": 2.529618501663208, "learning_rate": 4.8533020972780015e-05, "loss": 0.1608, "step": 12590 }, { "epoch": 75.87349397590361, "grad_norm": 1.5653445720672607, "learning_rate": 4.853023203926819e-05, "loss": 0.1639, "step": 12595 }, { "epoch": 75.90361445783132, "grad_norm": 1.8559218645095825, "learning_rate": 4.8527443105756356e-05, "loss": 0.1563, "step": 12600 }, { "epoch": 75.93373493975903, "grad_norm": 1.2865639925003052, "learning_rate": 4.852465417224454e-05, "loss": 0.1613, "step": 12605 }, { "epoch": 75.96385542168674, "grad_norm": 1.7536803483963013, "learning_rate": 4.852186523873271e-05, "loss": 0.1621, "step": 12610 }, { "epoch": 75.99397590361446, "grad_norm": 1.8247182369232178, "learning_rate": 4.8519076305220884e-05, "loss": 0.1481, "step": 12615 }, { "epoch": 76.0, "eval_accuracy": 0.9429807314195832, "eval_auc": 0.9826009080941385, "eval_f1": 0.9141503848431024, "eval_loss": 0.18916143476963043, "eval_precision": 0.9507389162561576, "eval_recall": 0.8802736602052451, "eval_runtime": 19.7205, "eval_samples_per_second": 128.952, "eval_steps_per_second": 0.659, "step": 12616 }, { "epoch": 76.02409638554217, "grad_norm": 1.8324910402297974, "learning_rate": 4.8516287371709065e-05, "loss": 0.155, "step": 12620 }, { "epoch": 76.05421686746988, "grad_norm": 1.6081472635269165, "learning_rate": 4.851349843819724e-05, "loss": 0.1395, "step": 12625 }, { "epoch": 76.08433734939759, "grad_norm": 2.3763484954833984, "learning_rate": 4.8510709504685406e-05, "loss": 0.1555, "step": 12630 }, { "epoch": 76.1144578313253, "grad_norm": 1.6817314624786377, "learning_rate": 4.8507920571173586e-05, "loss": 0.1345, "step": 12635 }, { "epoch": 76.144578313253, "grad_norm": 1.8232126235961914, "learning_rate": 4.850513163766176e-05, "loss": 0.131, "step": 12640 }, { "epoch": 76.17469879518072, "grad_norm": 1.329211950302124, "learning_rate": 4.8502342704149934e-05, "loss": 0.1191, "step": 12645 }, { "epoch": 76.20481927710843, "grad_norm": 1.6847299337387085, "learning_rate": 4.8499553770638114e-05, "loss": 0.1599, "step": 12650 }, { "epoch": 76.23493975903614, "grad_norm": 1.6902977228164673, "learning_rate": 4.849676483712629e-05, "loss": 0.1614, "step": 12655 }, { "epoch": 76.26506024096386, "grad_norm": 1.05877685546875, "learning_rate": 4.8493975903614455e-05, "loss": 0.1795, "step": 12660 }, { "epoch": 76.29518072289157, "grad_norm": 1.3436675071716309, "learning_rate": 4.8491186970102636e-05, "loss": 0.133, "step": 12665 }, { "epoch": 76.32530120481928, "grad_norm": 1.3640121221542358, "learning_rate": 4.848839803659081e-05, "loss": 0.1552, "step": 12670 }, { "epoch": 76.355421686747, "grad_norm": 2.0116324424743652, "learning_rate": 4.848560910307898e-05, "loss": 0.12, "step": 12675 }, { "epoch": 76.3855421686747, "grad_norm": 1.7177692651748657, "learning_rate": 4.8482820169567164e-05, "loss": 0.1633, "step": 12680 }, { "epoch": 76.41566265060241, "grad_norm": 2.392770528793335, "learning_rate": 4.848003123605534e-05, "loss": 0.1482, "step": 12685 }, { "epoch": 76.44578313253012, "grad_norm": 2.1464264392852783, "learning_rate": 4.8477242302543505e-05, "loss": 0.1402, "step": 12690 }, { "epoch": 76.47590361445783, "grad_norm": 1.3617451190948486, "learning_rate": 4.8474453369031685e-05, "loss": 0.1193, "step": 12695 }, { "epoch": 76.50602409638554, "grad_norm": 2.1297426223754883, "learning_rate": 4.847166443551986e-05, "loss": 0.1829, "step": 12700 }, { "epoch": 76.53614457831326, "grad_norm": 1.5493841171264648, "learning_rate": 4.846887550200803e-05, "loss": 0.1352, "step": 12705 }, { "epoch": 76.56626506024097, "grad_norm": 1.562913179397583, "learning_rate": 4.8466086568496214e-05, "loss": 0.1285, "step": 12710 }, { "epoch": 76.59638554216868, "grad_norm": 0.7667553424835205, "learning_rate": 4.846329763498438e-05, "loss": 0.1241, "step": 12715 }, { "epoch": 76.62650602409639, "grad_norm": 1.4597588777542114, "learning_rate": 4.8460508701472554e-05, "loss": 0.1429, "step": 12720 }, { "epoch": 76.6566265060241, "grad_norm": 2.1104636192321777, "learning_rate": 4.8457719767960735e-05, "loss": 0.1218, "step": 12725 }, { "epoch": 76.6867469879518, "grad_norm": 0.8995615839958191, "learning_rate": 4.845493083444891e-05, "loss": 0.1343, "step": 12730 }, { "epoch": 76.71686746987952, "grad_norm": 1.0554083585739136, "learning_rate": 4.845214190093708e-05, "loss": 0.1517, "step": 12735 }, { "epoch": 76.74698795180723, "grad_norm": 2.7860047817230225, "learning_rate": 4.844935296742526e-05, "loss": 0.1242, "step": 12740 }, { "epoch": 76.77710843373494, "grad_norm": 2.118823528289795, "learning_rate": 4.844656403391343e-05, "loss": 0.1753, "step": 12745 }, { "epoch": 76.80722891566265, "grad_norm": 1.9358220100402832, "learning_rate": 4.8443775100401604e-05, "loss": 0.1583, "step": 12750 }, { "epoch": 76.83734939759036, "grad_norm": 1.3970794677734375, "learning_rate": 4.8440986166889784e-05, "loss": 0.119, "step": 12755 }, { "epoch": 76.86746987951807, "grad_norm": 1.7617546319961548, "learning_rate": 4.843819723337796e-05, "loss": 0.1805, "step": 12760 }, { "epoch": 76.89759036144578, "grad_norm": 1.2866977453231812, "learning_rate": 4.843540829986614e-05, "loss": 0.1619, "step": 12765 }, { "epoch": 76.92771084337349, "grad_norm": 1.5546061992645264, "learning_rate": 4.843261936635431e-05, "loss": 0.147, "step": 12770 }, { "epoch": 76.9578313253012, "grad_norm": 1.4257575273513794, "learning_rate": 4.842983043284248e-05, "loss": 0.1575, "step": 12775 }, { "epoch": 76.98795180722891, "grad_norm": 1.601998209953308, "learning_rate": 4.842704149933066e-05, "loss": 0.159, "step": 12780 }, { "epoch": 77.0, "eval_accuracy": 0.9335430593786866, "eval_auc": 0.9828859708079355, "eval_f1": 0.8963825873697119, "eval_loss": 0.23264428973197937, "eval_precision": 0.9694960212201591, "eval_recall": 0.8335233751425314, "eval_runtime": 19.5293, "eval_samples_per_second": 130.214, "eval_steps_per_second": 0.666, "step": 12782 }, { "epoch": 77.01807228915662, "grad_norm": 0.8001152276992798, "learning_rate": 4.8424252565818834e-05, "loss": 0.1266, "step": 12785 }, { "epoch": 77.04819277108433, "grad_norm": 1.8577905893325806, "learning_rate": 4.842146363230701e-05, "loss": 0.1358, "step": 12790 }, { "epoch": 77.07831325301204, "grad_norm": 1.4647650718688965, "learning_rate": 4.841867469879519e-05, "loss": 0.1319, "step": 12795 }, { "epoch": 77.10843373493977, "grad_norm": 1.3896921873092651, "learning_rate": 4.8415885765283355e-05, "loss": 0.1553, "step": 12800 }, { "epoch": 77.13855421686748, "grad_norm": 1.3923394680023193, "learning_rate": 4.841309683177153e-05, "loss": 0.1246, "step": 12805 }, { "epoch": 77.16867469879519, "grad_norm": 1.335044503211975, "learning_rate": 4.841030789825971e-05, "loss": 0.1555, "step": 12810 }, { "epoch": 77.1987951807229, "grad_norm": 1.0598628520965576, "learning_rate": 4.8407518964747884e-05, "loss": 0.1296, "step": 12815 }, { "epoch": 77.2289156626506, "grad_norm": 1.370320200920105, "learning_rate": 4.840473003123606e-05, "loss": 0.1289, "step": 12820 }, { "epoch": 77.25903614457832, "grad_norm": 2.2430672645568848, "learning_rate": 4.840194109772424e-05, "loss": 0.2017, "step": 12825 }, { "epoch": 77.28915662650603, "grad_norm": 0.730043351650238, "learning_rate": 4.8399152164212405e-05, "loss": 0.1927, "step": 12830 }, { "epoch": 77.31927710843374, "grad_norm": 1.2062714099884033, "learning_rate": 4.839636323070058e-05, "loss": 0.1194, "step": 12835 }, { "epoch": 77.34939759036145, "grad_norm": 1.6250019073486328, "learning_rate": 4.839357429718876e-05, "loss": 0.1492, "step": 12840 }, { "epoch": 77.37951807228916, "grad_norm": 1.2313861846923828, "learning_rate": 4.839078536367693e-05, "loss": 0.1086, "step": 12845 }, { "epoch": 77.40963855421687, "grad_norm": 2.189713478088379, "learning_rate": 4.838799643016511e-05, "loss": 0.1701, "step": 12850 }, { "epoch": 77.43975903614458, "grad_norm": 1.831549882888794, "learning_rate": 4.838520749665329e-05, "loss": 0.1678, "step": 12855 }, { "epoch": 77.46987951807229, "grad_norm": 1.2970221042633057, "learning_rate": 4.8382418563141454e-05, "loss": 0.1435, "step": 12860 }, { "epoch": 77.5, "grad_norm": 1.7378772497177124, "learning_rate": 4.837962962962963e-05, "loss": 0.1353, "step": 12865 }, { "epoch": 77.53012048192771, "grad_norm": 1.8176010847091675, "learning_rate": 4.837684069611781e-05, "loss": 0.1281, "step": 12870 }, { "epoch": 77.56024096385542, "grad_norm": 1.1678860187530518, "learning_rate": 4.837405176260598e-05, "loss": 0.1532, "step": 12875 }, { "epoch": 77.59036144578313, "grad_norm": 1.607122778892517, "learning_rate": 4.8371262829094156e-05, "loss": 0.1976, "step": 12880 }, { "epoch": 77.62048192771084, "grad_norm": 1.2736643552780151, "learning_rate": 4.836847389558233e-05, "loss": 0.1448, "step": 12885 }, { "epoch": 77.65060240963855, "grad_norm": 1.4824341535568237, "learning_rate": 4.8365684962070504e-05, "loss": 0.1351, "step": 12890 }, { "epoch": 77.68072289156626, "grad_norm": 1.9544227123260498, "learning_rate": 4.836289602855868e-05, "loss": 0.1599, "step": 12895 }, { "epoch": 77.71084337349397, "grad_norm": 1.311608910560608, "learning_rate": 4.836010709504686e-05, "loss": 0.1564, "step": 12900 }, { "epoch": 77.74096385542168, "grad_norm": 1.8438730239868164, "learning_rate": 4.835731816153503e-05, "loss": 0.1607, "step": 12905 }, { "epoch": 77.7710843373494, "grad_norm": 1.3831279277801514, "learning_rate": 4.8354529228023206e-05, "loss": 0.1686, "step": 12910 }, { "epoch": 77.8012048192771, "grad_norm": 2.1410272121429443, "learning_rate": 4.835174029451138e-05, "loss": 0.1589, "step": 12915 }, { "epoch": 77.83132530120481, "grad_norm": 1.1024550199508667, "learning_rate": 4.8348951360999554e-05, "loss": 0.1355, "step": 12920 }, { "epoch": 77.86144578313252, "grad_norm": 1.274852991104126, "learning_rate": 4.834616242748773e-05, "loss": 0.1728, "step": 12925 }, { "epoch": 77.89156626506023, "grad_norm": 1.4424153566360474, "learning_rate": 4.834337349397591e-05, "loss": 0.143, "step": 12930 }, { "epoch": 77.92168674698796, "grad_norm": 0.7716652750968933, "learning_rate": 4.834058456046408e-05, "loss": 0.1549, "step": 12935 }, { "epoch": 77.95180722891567, "grad_norm": 0.8140466809272766, "learning_rate": 4.8337795626952256e-05, "loss": 0.1277, "step": 12940 }, { "epoch": 77.98192771084338, "grad_norm": 2.625098705291748, "learning_rate": 4.833500669344043e-05, "loss": 0.1516, "step": 12945 }, { "epoch": 78.0, "eval_accuracy": 0.9303971686983877, "eval_auc": 0.9816009642169297, "eval_f1": 0.8910769230769231, "eval_loss": 0.2438606321811676, "eval_precision": 0.9679144385026738, "eval_recall": 0.8255416191562144, "eval_runtime": 17.1969, "eval_samples_per_second": 147.875, "eval_steps_per_second": 0.756, "step": 12948 }, { "epoch": 78.01204819277109, "grad_norm": 1.4964710474014282, "learning_rate": 4.83322177599286e-05, "loss": 0.1541, "step": 12950 }, { "epoch": 78.0421686746988, "grad_norm": 1.4898884296417236, "learning_rate": 4.832942882641678e-05, "loss": 0.1372, "step": 12955 }, { "epoch": 78.07228915662651, "grad_norm": 1.0486506223678589, "learning_rate": 4.832663989290496e-05, "loss": 0.1392, "step": 12960 }, { "epoch": 78.10240963855422, "grad_norm": 1.1756079196929932, "learning_rate": 4.832385095939313e-05, "loss": 0.1627, "step": 12965 }, { "epoch": 78.13253012048193, "grad_norm": 1.2145124673843384, "learning_rate": 4.8321062025881305e-05, "loss": 0.1319, "step": 12970 }, { "epoch": 78.16265060240964, "grad_norm": 1.7100399732589722, "learning_rate": 4.831827309236948e-05, "loss": 0.1514, "step": 12975 }, { "epoch": 78.19277108433735, "grad_norm": 2.0911970138549805, "learning_rate": 4.831548415885765e-05, "loss": 0.1498, "step": 12980 }, { "epoch": 78.22289156626506, "grad_norm": 0.9005778431892395, "learning_rate": 4.8312695225345826e-05, "loss": 0.141, "step": 12985 }, { "epoch": 78.25301204819277, "grad_norm": 0.8048529028892517, "learning_rate": 4.830990629183401e-05, "loss": 0.1234, "step": 12990 }, { "epoch": 78.28313253012048, "grad_norm": 2.1382839679718018, "learning_rate": 4.830711735832218e-05, "loss": 0.1294, "step": 12995 }, { "epoch": 78.3132530120482, "grad_norm": 2.0718533992767334, "learning_rate": 4.8304328424810355e-05, "loss": 0.1749, "step": 13000 }, { "epoch": 78.3433734939759, "grad_norm": 1.7664657831192017, "learning_rate": 4.830153949129853e-05, "loss": 0.1551, "step": 13005 }, { "epoch": 78.37349397590361, "grad_norm": 2.1365315914154053, "learning_rate": 4.82987505577867e-05, "loss": 0.1415, "step": 13010 }, { "epoch": 78.40361445783132, "grad_norm": 2.132537603378296, "learning_rate": 4.8295961624274876e-05, "loss": 0.1177, "step": 13015 }, { "epoch": 78.43373493975903, "grad_norm": 1.7908108234405518, "learning_rate": 4.8293172690763057e-05, "loss": 0.1503, "step": 13020 }, { "epoch": 78.46385542168674, "grad_norm": 2.238882064819336, "learning_rate": 4.829038375725123e-05, "loss": 0.1397, "step": 13025 }, { "epoch": 78.49397590361446, "grad_norm": 1.7467546463012695, "learning_rate": 4.8287594823739404e-05, "loss": 0.1553, "step": 13030 }, { "epoch": 78.52409638554217, "grad_norm": 1.1847363710403442, "learning_rate": 4.828480589022758e-05, "loss": 0.1648, "step": 13035 }, { "epoch": 78.55421686746988, "grad_norm": 1.646297574043274, "learning_rate": 4.828201695671575e-05, "loss": 0.1653, "step": 13040 }, { "epoch": 78.58433734939759, "grad_norm": 1.0398398637771606, "learning_rate": 4.827922802320393e-05, "loss": 0.1528, "step": 13045 }, { "epoch": 78.6144578313253, "grad_norm": 1.7448629140853882, "learning_rate": 4.8276439089692106e-05, "loss": 0.1327, "step": 13050 }, { "epoch": 78.644578313253, "grad_norm": 1.9700462818145752, "learning_rate": 4.827365015618028e-05, "loss": 0.1495, "step": 13055 }, { "epoch": 78.67469879518072, "grad_norm": 1.2562404870986938, "learning_rate": 4.8270861222668454e-05, "loss": 0.151, "step": 13060 }, { "epoch": 78.70481927710843, "grad_norm": 1.384185552597046, "learning_rate": 4.826807228915663e-05, "loss": 0.1414, "step": 13065 }, { "epoch": 78.73493975903614, "grad_norm": 1.5802093744277954, "learning_rate": 4.82652833556448e-05, "loss": 0.1524, "step": 13070 }, { "epoch": 78.76506024096386, "grad_norm": 1.0003732442855835, "learning_rate": 4.826249442213298e-05, "loss": 0.1264, "step": 13075 }, { "epoch": 78.79518072289157, "grad_norm": 1.2597041130065918, "learning_rate": 4.8259705488621156e-05, "loss": 0.1209, "step": 13080 }, { "epoch": 78.82530120481928, "grad_norm": 1.2785955667495728, "learning_rate": 4.825691655510933e-05, "loss": 0.121, "step": 13085 }, { "epoch": 78.855421686747, "grad_norm": 1.2658116817474365, "learning_rate": 4.82541276215975e-05, "loss": 0.1855, "step": 13090 }, { "epoch": 78.8855421686747, "grad_norm": 2.0068914890289307, "learning_rate": 4.825133868808568e-05, "loss": 0.173, "step": 13095 }, { "epoch": 78.91566265060241, "grad_norm": 2.194626808166504, "learning_rate": 4.824854975457385e-05, "loss": 0.1793, "step": 13100 }, { "epoch": 78.94578313253012, "grad_norm": 0.5974365472793579, "learning_rate": 4.824576082106203e-05, "loss": 0.1157, "step": 13105 }, { "epoch": 78.97590361445783, "grad_norm": 1.793129563331604, "learning_rate": 4.8242971887550205e-05, "loss": 0.1381, "step": 13110 }, { "epoch": 79.0, "eval_accuracy": 0.9445536767597326, "eval_auc": 0.9816995213136567, "eval_f1": 0.9154169166166767, "eval_loss": 0.21309958398342133, "eval_precision": 0.9658227848101266, "eval_recall": 0.8700114025085519, "eval_runtime": 16.8826, "eval_samples_per_second": 150.629, "eval_steps_per_second": 0.77, "step": 13114 }, { "epoch": 79.00602409638554, "grad_norm": 1.3499771356582642, "learning_rate": 4.824018295403838e-05, "loss": 0.137, "step": 13115 }, { "epoch": 79.03614457831326, "grad_norm": 1.3147327899932861, "learning_rate": 4.823739402052655e-05, "loss": 0.1477, "step": 13120 }, { "epoch": 79.06626506024097, "grad_norm": 1.6996004581451416, "learning_rate": 4.823460508701473e-05, "loss": 0.1337, "step": 13125 }, { "epoch": 79.09638554216868, "grad_norm": 1.8124324083328247, "learning_rate": 4.82318161535029e-05, "loss": 0.154, "step": 13130 }, { "epoch": 79.12650602409639, "grad_norm": 0.9253437519073486, "learning_rate": 4.822902721999108e-05, "loss": 0.1171, "step": 13135 }, { "epoch": 79.1566265060241, "grad_norm": 1.4336225986480713, "learning_rate": 4.8226238286479255e-05, "loss": 0.1267, "step": 13140 }, { "epoch": 79.1867469879518, "grad_norm": 0.7874173521995544, "learning_rate": 4.822344935296743e-05, "loss": 0.1139, "step": 13145 }, { "epoch": 79.21686746987952, "grad_norm": 2.4000792503356934, "learning_rate": 4.82206604194556e-05, "loss": 0.1352, "step": 13150 }, { "epoch": 79.24698795180723, "grad_norm": 1.6120721101760864, "learning_rate": 4.8217871485943776e-05, "loss": 0.1332, "step": 13155 }, { "epoch": 79.27710843373494, "grad_norm": 2.109287738800049, "learning_rate": 4.821508255243195e-05, "loss": 0.1505, "step": 13160 }, { "epoch": 79.30722891566265, "grad_norm": 1.2057571411132812, "learning_rate": 4.821229361892013e-05, "loss": 0.13, "step": 13165 }, { "epoch": 79.33734939759036, "grad_norm": 1.8289693593978882, "learning_rate": 4.8209504685408304e-05, "loss": 0.1708, "step": 13170 }, { "epoch": 79.36746987951807, "grad_norm": 1.6963151693344116, "learning_rate": 4.820671575189648e-05, "loss": 0.1157, "step": 13175 }, { "epoch": 79.39759036144578, "grad_norm": 1.7568334341049194, "learning_rate": 4.820392681838465e-05, "loss": 0.1222, "step": 13180 }, { "epoch": 79.42771084337349, "grad_norm": 1.2924585342407227, "learning_rate": 4.8201137884872826e-05, "loss": 0.1325, "step": 13185 }, { "epoch": 79.4578313253012, "grad_norm": 1.5911897420883179, "learning_rate": 4.8198348951361e-05, "loss": 0.1534, "step": 13190 }, { "epoch": 79.48795180722891, "grad_norm": 2.340956926345825, "learning_rate": 4.819556001784918e-05, "loss": 0.1413, "step": 13195 }, { "epoch": 79.51807228915662, "grad_norm": 1.9254859685897827, "learning_rate": 4.8192771084337354e-05, "loss": 0.1404, "step": 13200 }, { "epoch": 79.54819277108433, "grad_norm": 2.4253478050231934, "learning_rate": 4.818998215082553e-05, "loss": 0.1981, "step": 13205 }, { "epoch": 79.57831325301204, "grad_norm": 1.8151286840438843, "learning_rate": 4.81871932173137e-05, "loss": 0.1392, "step": 13210 }, { "epoch": 79.60843373493977, "grad_norm": 2.336282968521118, "learning_rate": 4.8184404283801875e-05, "loss": 0.1375, "step": 13215 }, { "epoch": 79.63855421686748, "grad_norm": 1.485291838645935, "learning_rate": 4.818161535029005e-05, "loss": 0.1329, "step": 13220 }, { "epoch": 79.66867469879519, "grad_norm": 1.2211674451828003, "learning_rate": 4.817882641677823e-05, "loss": 0.1499, "step": 13225 }, { "epoch": 79.6987951807229, "grad_norm": 3.9037880897521973, "learning_rate": 4.8176037483266403e-05, "loss": 0.1366, "step": 13230 }, { "epoch": 79.7289156626506, "grad_norm": 2.195895195007324, "learning_rate": 4.817324854975457e-05, "loss": 0.124, "step": 13235 }, { "epoch": 79.75903614457832, "grad_norm": 2.0953190326690674, "learning_rate": 4.817045961624275e-05, "loss": 0.1333, "step": 13240 }, { "epoch": 79.78915662650603, "grad_norm": 0.6409681439399719, "learning_rate": 4.8167670682730925e-05, "loss": 0.1607, "step": 13245 }, { "epoch": 79.81927710843374, "grad_norm": 0.958230197429657, "learning_rate": 4.81648817492191e-05, "loss": 0.1246, "step": 13250 }, { "epoch": 79.84939759036145, "grad_norm": 1.3889961242675781, "learning_rate": 4.816209281570728e-05, "loss": 0.1231, "step": 13255 }, { "epoch": 79.87951807228916, "grad_norm": 1.667174220085144, "learning_rate": 4.815930388219545e-05, "loss": 0.1712, "step": 13260 }, { "epoch": 79.90963855421687, "grad_norm": 2.9418723583221436, "learning_rate": 4.815651494868362e-05, "loss": 0.1526, "step": 13265 }, { "epoch": 79.93975903614458, "grad_norm": 2.489426612854004, "learning_rate": 4.81537260151718e-05, "loss": 0.1575, "step": 13270 }, { "epoch": 79.96987951807229, "grad_norm": 1.1667286157608032, "learning_rate": 4.8150937081659974e-05, "loss": 0.0973, "step": 13275 }, { "epoch": 80.0, "grad_norm": 1.7366154193878174, "learning_rate": 4.814814814814815e-05, "loss": 0.1904, "step": 13280 }, { "epoch": 80.0, "eval_accuracy": 0.9359024773889107, "eval_auc": 0.9796424841316229, "eval_f1": 0.9013914095583787, "eval_loss": 0.2617673873901367, "eval_precision": 0.9600515463917526, "eval_recall": 0.8494868871151653, "eval_runtime": 17.1382, "eval_samples_per_second": 148.382, "eval_steps_per_second": 0.759, "step": 13280 }, { "epoch": 80.03012048192771, "grad_norm": 2.099924087524414, "learning_rate": 4.814535921463633e-05, "loss": 0.1552, "step": 13285 }, { "epoch": 80.06024096385542, "grad_norm": 2.3259527683258057, "learning_rate": 4.81425702811245e-05, "loss": 0.1649, "step": 13290 }, { "epoch": 80.09036144578313, "grad_norm": 2.0278472900390625, "learning_rate": 4.8139781347612676e-05, "loss": 0.1454, "step": 13295 }, { "epoch": 80.12048192771084, "grad_norm": 1.2483811378479004, "learning_rate": 4.813699241410085e-05, "loss": 0.1447, "step": 13300 }, { "epoch": 80.15060240963855, "grad_norm": 1.6578233242034912, "learning_rate": 4.8134203480589024e-05, "loss": 0.149, "step": 13305 }, { "epoch": 80.18072289156626, "grad_norm": 0.9661278128623962, "learning_rate": 4.8131414547077205e-05, "loss": 0.1159, "step": 13310 }, { "epoch": 80.21084337349397, "grad_norm": 0.6693234443664551, "learning_rate": 4.812862561356538e-05, "loss": 0.1184, "step": 13315 }, { "epoch": 80.24096385542168, "grad_norm": 2.000763416290283, "learning_rate": 4.812583668005355e-05, "loss": 0.1051, "step": 13320 }, { "epoch": 80.2710843373494, "grad_norm": 1.2876056432724, "learning_rate": 4.8123047746541726e-05, "loss": 0.1108, "step": 13325 }, { "epoch": 80.3012048192771, "grad_norm": 2.2616982460021973, "learning_rate": 4.81202588130299e-05, "loss": 0.1141, "step": 13330 }, { "epoch": 80.33132530120481, "grad_norm": 2.013023614883423, "learning_rate": 4.8117469879518074e-05, "loss": 0.1253, "step": 13335 }, { "epoch": 80.36144578313252, "grad_norm": 1.6886414289474487, "learning_rate": 4.8114680946006254e-05, "loss": 0.1459, "step": 13340 }, { "epoch": 80.39156626506023, "grad_norm": 0.890705406665802, "learning_rate": 4.811189201249443e-05, "loss": 0.1345, "step": 13345 }, { "epoch": 80.42168674698796, "grad_norm": 1.505310297012329, "learning_rate": 4.8109103078982595e-05, "loss": 0.1349, "step": 13350 }, { "epoch": 80.45180722891567, "grad_norm": 0.7418832778930664, "learning_rate": 4.8106314145470775e-05, "loss": 0.1217, "step": 13355 }, { "epoch": 80.48192771084338, "grad_norm": 2.120851993560791, "learning_rate": 4.810352521195895e-05, "loss": 0.1624, "step": 13360 }, { "epoch": 80.51204819277109, "grad_norm": 0.7259140014648438, "learning_rate": 4.810073627844712e-05, "loss": 0.1541, "step": 13365 }, { "epoch": 80.5421686746988, "grad_norm": 1.0406228303909302, "learning_rate": 4.8097947344935304e-05, "loss": 0.1148, "step": 13370 }, { "epoch": 80.57228915662651, "grad_norm": 0.7475118637084961, "learning_rate": 4.809515841142348e-05, "loss": 0.1043, "step": 13375 }, { "epoch": 80.60240963855422, "grad_norm": 1.8983980417251587, "learning_rate": 4.8092369477911644e-05, "loss": 0.1558, "step": 13380 }, { "epoch": 80.63253012048193, "grad_norm": 2.024404287338257, "learning_rate": 4.8089580544399825e-05, "loss": 0.1794, "step": 13385 }, { "epoch": 80.66265060240964, "grad_norm": 1.3346220254898071, "learning_rate": 4.8086791610888e-05, "loss": 0.1716, "step": 13390 }, { "epoch": 80.69277108433735, "grad_norm": 1.786770224571228, "learning_rate": 4.808400267737617e-05, "loss": 0.1154, "step": 13395 }, { "epoch": 80.72289156626506, "grad_norm": 1.6041686534881592, "learning_rate": 4.808121374386435e-05, "loss": 0.1529, "step": 13400 }, { "epoch": 80.75301204819277, "grad_norm": 1.4374834299087524, "learning_rate": 4.807842481035253e-05, "loss": 0.1418, "step": 13405 }, { "epoch": 80.78313253012048, "grad_norm": 2.041236639022827, "learning_rate": 4.8075635876840694e-05, "loss": 0.1339, "step": 13410 }, { "epoch": 80.8132530120482, "grad_norm": 2.5768935680389404, "learning_rate": 4.8072846943328875e-05, "loss": 0.1567, "step": 13415 }, { "epoch": 80.8433734939759, "grad_norm": 1.8797093629837036, "learning_rate": 4.807005800981705e-05, "loss": 0.1377, "step": 13420 }, { "epoch": 80.87349397590361, "grad_norm": 1.3676620721817017, "learning_rate": 4.806726907630522e-05, "loss": 0.1501, "step": 13425 }, { "epoch": 80.90361445783132, "grad_norm": 1.518554925918579, "learning_rate": 4.80644801427934e-05, "loss": 0.1604, "step": 13430 }, { "epoch": 80.93373493975903, "grad_norm": 1.760703444480896, "learning_rate": 4.806169120928157e-05, "loss": 0.1476, "step": 13435 }, { "epoch": 80.96385542168674, "grad_norm": 1.6967287063598633, "learning_rate": 4.8058902275769744e-05, "loss": 0.1615, "step": 13440 }, { "epoch": 80.99397590361446, "grad_norm": 0.8997840285301208, "learning_rate": 4.8056113342257924e-05, "loss": 0.1547, "step": 13445 }, { "epoch": 81.0, "eval_accuracy": 0.94494691309477, "eval_auc": 0.9844738351440918, "eval_f1": 0.9163679808841099, "eval_loss": 0.20724187791347504, "eval_precision": 0.9623588456712673, "eval_recall": 0.8745724059293044, "eval_runtime": 16.8526, "eval_samples_per_second": 150.896, "eval_steps_per_second": 0.771, "step": 13446 }, { "epoch": 81.02409638554217, "grad_norm": 1.3024643659591675, "learning_rate": 4.80533244087461e-05, "loss": 0.1334, "step": 13450 }, { "epoch": 81.05421686746988, "grad_norm": 1.3637980222702026, "learning_rate": 4.805053547523427e-05, "loss": 0.1369, "step": 13455 }, { "epoch": 81.08433734939759, "grad_norm": 1.1892391443252563, "learning_rate": 4.804774654172245e-05, "loss": 0.1497, "step": 13460 }, { "epoch": 81.1144578313253, "grad_norm": 1.665014624595642, "learning_rate": 4.804495760821062e-05, "loss": 0.1227, "step": 13465 }, { "epoch": 81.144578313253, "grad_norm": 1.7259368896484375, "learning_rate": 4.804216867469879e-05, "loss": 0.1468, "step": 13470 }, { "epoch": 81.17469879518072, "grad_norm": 1.2737250328063965, "learning_rate": 4.8039379741186974e-05, "loss": 0.1334, "step": 13475 }, { "epoch": 81.20481927710843, "grad_norm": 1.6451976299285889, "learning_rate": 4.803659080767515e-05, "loss": 0.1112, "step": 13480 }, { "epoch": 81.23493975903614, "grad_norm": 2.124725341796875, "learning_rate": 4.803380187416332e-05, "loss": 0.1246, "step": 13485 }, { "epoch": 81.26506024096386, "grad_norm": 0.8188139796257019, "learning_rate": 4.80310129406515e-05, "loss": 0.1303, "step": 13490 }, { "epoch": 81.29518072289157, "grad_norm": 1.6451257467269897, "learning_rate": 4.802822400713967e-05, "loss": 0.1185, "step": 13495 }, { "epoch": 81.32530120481928, "grad_norm": 1.5442471504211426, "learning_rate": 4.802543507362784e-05, "loss": 0.1691, "step": 13500 }, { "epoch": 81.355421686747, "grad_norm": 2.715916633605957, "learning_rate": 4.802264614011602e-05, "loss": 0.1385, "step": 13505 }, { "epoch": 81.3855421686747, "grad_norm": 2.193662643432617, "learning_rate": 4.80198572066042e-05, "loss": 0.1342, "step": 13510 }, { "epoch": 81.41566265060241, "grad_norm": 1.1958253383636475, "learning_rate": 4.801706827309237e-05, "loss": 0.1308, "step": 13515 }, { "epoch": 81.44578313253012, "grad_norm": 1.690035343170166, "learning_rate": 4.8014279339580545e-05, "loss": 0.1178, "step": 13520 }, { "epoch": 81.47590361445783, "grad_norm": 1.9274437427520752, "learning_rate": 4.801149040606872e-05, "loss": 0.1432, "step": 13525 }, { "epoch": 81.50602409638554, "grad_norm": 1.0109832286834717, "learning_rate": 4.800870147255689e-05, "loss": 0.1213, "step": 13530 }, { "epoch": 81.53614457831326, "grad_norm": 2.056666374206543, "learning_rate": 4.800591253904507e-05, "loss": 0.132, "step": 13535 }, { "epoch": 81.56626506024097, "grad_norm": 1.9702845811843872, "learning_rate": 4.8003123605533247e-05, "loss": 0.1443, "step": 13540 }, { "epoch": 81.59638554216868, "grad_norm": 1.5064159631729126, "learning_rate": 4.800033467202142e-05, "loss": 0.1233, "step": 13545 }, { "epoch": 81.62650602409639, "grad_norm": 2.276059627532959, "learning_rate": 4.7997545738509594e-05, "loss": 0.1261, "step": 13550 }, { "epoch": 81.6566265060241, "grad_norm": 1.5666084289550781, "learning_rate": 4.799475680499777e-05, "loss": 0.1236, "step": 13555 }, { "epoch": 81.6867469879518, "grad_norm": 2.2686519622802734, "learning_rate": 4.799196787148594e-05, "loss": 0.1748, "step": 13560 }, { "epoch": 81.71686746987952, "grad_norm": 2.449956178665161, "learning_rate": 4.798917893797412e-05, "loss": 0.1864, "step": 13565 }, { "epoch": 81.74698795180723, "grad_norm": 1.606286883354187, "learning_rate": 4.7986390004462296e-05, "loss": 0.1134, "step": 13570 }, { "epoch": 81.77710843373494, "grad_norm": 1.2915971279144287, "learning_rate": 4.798360107095048e-05, "loss": 0.1477, "step": 13575 }, { "epoch": 81.80722891566265, "grad_norm": 2.0134148597717285, "learning_rate": 4.7980812137438644e-05, "loss": 0.1765, "step": 13580 }, { "epoch": 81.83734939759036, "grad_norm": 1.4820908308029175, "learning_rate": 4.797802320392682e-05, "loss": 0.1427, "step": 13585 }, { "epoch": 81.86746987951807, "grad_norm": 3.5725741386413574, "learning_rate": 4.7975234270415e-05, "loss": 0.1195, "step": 13590 }, { "epoch": 81.89759036144578, "grad_norm": 1.6857229471206665, "learning_rate": 4.797244533690317e-05, "loss": 0.1409, "step": 13595 }, { "epoch": 81.92771084337349, "grad_norm": 0.9184637069702148, "learning_rate": 4.7969656403391346e-05, "loss": 0.1504, "step": 13600 }, { "epoch": 81.9578313253012, "grad_norm": 1.696109414100647, "learning_rate": 4.796686746987952e-05, "loss": 0.1665, "step": 13605 }, { "epoch": 81.98795180722891, "grad_norm": 2.1991405487060547, "learning_rate": 4.796407853636769e-05, "loss": 0.1613, "step": 13610 }, { "epoch": 82.0, "eval_accuracy": 0.9473063311049941, "eval_auc": 0.9821652720381198, "eval_f1": 0.9201430274135876, "eval_loss": 0.20730562508106232, "eval_precision": 0.9637952559300874, "eval_recall": 0.8802736602052451, "eval_runtime": 16.6574, "eval_samples_per_second": 152.665, "eval_steps_per_second": 0.78, "step": 13612 }, { "epoch": 82.01807228915662, "grad_norm": 1.758849024772644, "learning_rate": 4.796128960285587e-05, "loss": 0.1349, "step": 13615 }, { "epoch": 82.04819277108433, "grad_norm": 0.6276155710220337, "learning_rate": 4.795850066934405e-05, "loss": 0.0966, "step": 13620 }, { "epoch": 82.07831325301204, "grad_norm": 1.5245643854141235, "learning_rate": 4.795571173583222e-05, "loss": 0.1456, "step": 13625 }, { "epoch": 82.10843373493977, "grad_norm": 2.19927716255188, "learning_rate": 4.7952922802320395e-05, "loss": 0.1452, "step": 13630 }, { "epoch": 82.13855421686748, "grad_norm": 1.0611395835876465, "learning_rate": 4.795013386880857e-05, "loss": 0.1266, "step": 13635 }, { "epoch": 82.16867469879519, "grad_norm": 1.6245512962341309, "learning_rate": 4.794734493529674e-05, "loss": 0.1096, "step": 13640 }, { "epoch": 82.1987951807229, "grad_norm": 1.3010703325271606, "learning_rate": 4.7944556001784917e-05, "loss": 0.1368, "step": 13645 }, { "epoch": 82.2289156626506, "grad_norm": 1.6115851402282715, "learning_rate": 4.79417670682731e-05, "loss": 0.1584, "step": 13650 }, { "epoch": 82.25903614457832, "grad_norm": 0.7851889133453369, "learning_rate": 4.793897813476127e-05, "loss": 0.108, "step": 13655 }, { "epoch": 82.28915662650603, "grad_norm": 2.6004412174224854, "learning_rate": 4.7936189201249445e-05, "loss": 0.2029, "step": 13660 }, { "epoch": 82.31927710843374, "grad_norm": 0.9214454293251038, "learning_rate": 4.793340026773762e-05, "loss": 0.129, "step": 13665 }, { "epoch": 82.34939759036145, "grad_norm": 1.3081505298614502, "learning_rate": 4.793061133422579e-05, "loss": 0.1535, "step": 13670 }, { "epoch": 82.37951807228916, "grad_norm": 2.4201855659484863, "learning_rate": 4.7927822400713966e-05, "loss": 0.1492, "step": 13675 }, { "epoch": 82.40963855421687, "grad_norm": 1.2794040441513062, "learning_rate": 4.792503346720215e-05, "loss": 0.1998, "step": 13680 }, { "epoch": 82.43975903614458, "grad_norm": 1.3795418739318848, "learning_rate": 4.792224453369032e-05, "loss": 0.1272, "step": 13685 }, { "epoch": 82.46987951807229, "grad_norm": 0.9484094381332397, "learning_rate": 4.7919455600178494e-05, "loss": 0.1306, "step": 13690 }, { "epoch": 82.5, "grad_norm": 1.1522639989852905, "learning_rate": 4.791666666666667e-05, "loss": 0.1385, "step": 13695 }, { "epoch": 82.53012048192771, "grad_norm": 1.298977255821228, "learning_rate": 4.791387773315484e-05, "loss": 0.143, "step": 13700 }, { "epoch": 82.56024096385542, "grad_norm": 1.7475353479385376, "learning_rate": 4.7911088799643016e-05, "loss": 0.1288, "step": 13705 }, { "epoch": 82.59036144578313, "grad_norm": 2.226940393447876, "learning_rate": 4.7908299866131196e-05, "loss": 0.1297, "step": 13710 }, { "epoch": 82.62048192771084, "grad_norm": 2.041835308074951, "learning_rate": 4.790551093261937e-05, "loss": 0.1503, "step": 13715 }, { "epoch": 82.65060240963855, "grad_norm": 1.4314419031143188, "learning_rate": 4.7902721999107544e-05, "loss": 0.114, "step": 13720 }, { "epoch": 82.68072289156626, "grad_norm": 2.819429397583008, "learning_rate": 4.789993306559572e-05, "loss": 0.1246, "step": 13725 }, { "epoch": 82.71084337349397, "grad_norm": 0.9852308630943298, "learning_rate": 4.789714413208389e-05, "loss": 0.1114, "step": 13730 }, { "epoch": 82.74096385542168, "grad_norm": 1.1365885734558105, "learning_rate": 4.7894355198572065e-05, "loss": 0.1119, "step": 13735 }, { "epoch": 82.7710843373494, "grad_norm": 1.4428057670593262, "learning_rate": 4.7891566265060246e-05, "loss": 0.1132, "step": 13740 }, { "epoch": 82.8012048192771, "grad_norm": 2.48418927192688, "learning_rate": 4.788877733154842e-05, "loss": 0.0916, "step": 13745 }, { "epoch": 82.83132530120481, "grad_norm": 2.8917527198791504, "learning_rate": 4.7885988398036593e-05, "loss": 0.1774, "step": 13750 }, { "epoch": 82.86144578313252, "grad_norm": 2.5240049362182617, "learning_rate": 4.788319946452477e-05, "loss": 0.1591, "step": 13755 }, { "epoch": 82.89156626506023, "grad_norm": 2.186082124710083, "learning_rate": 4.788041053101294e-05, "loss": 0.1646, "step": 13760 }, { "epoch": 82.92168674698796, "grad_norm": 1.6441371440887451, "learning_rate": 4.7877621597501115e-05, "loss": 0.1163, "step": 13765 }, { "epoch": 82.95180722891567, "grad_norm": 1.3157641887664795, "learning_rate": 4.7874832663989295e-05, "loss": 0.1297, "step": 13770 }, { "epoch": 82.98192771084338, "grad_norm": 0.8910824656486511, "learning_rate": 4.787204373047747e-05, "loss": 0.1247, "step": 13775 }, { "epoch": 83.0, "eval_accuracy": 0.9402280770743217, "eval_auc": 0.9830995111841772, "eval_f1": 0.9090909090909091, "eval_loss": 0.2184453010559082, "eval_precision": 0.9559748427672956, "eval_recall": 0.8665906499429875, "eval_runtime": 16.7944, "eval_samples_per_second": 151.42, "eval_steps_per_second": 0.774, "step": 13778 }, { "epoch": 83.01204819277109, "grad_norm": 1.388502836227417, "learning_rate": 4.786925479696564e-05, "loss": 0.1414, "step": 13780 }, { "epoch": 83.0421686746988, "grad_norm": 2.4867851734161377, "learning_rate": 4.786646586345382e-05, "loss": 0.1678, "step": 13785 }, { "epoch": 83.07228915662651, "grad_norm": 1.294487476348877, "learning_rate": 4.786367692994199e-05, "loss": 0.1222, "step": 13790 }, { "epoch": 83.10240963855422, "grad_norm": 1.1315916776657104, "learning_rate": 4.7860887996430164e-05, "loss": 0.1111, "step": 13795 }, { "epoch": 83.13253012048193, "grad_norm": 1.9068232774734497, "learning_rate": 4.7858099062918345e-05, "loss": 0.1363, "step": 13800 }, { "epoch": 83.16265060240964, "grad_norm": 1.085170030593872, "learning_rate": 4.785531012940652e-05, "loss": 0.1469, "step": 13805 }, { "epoch": 83.19277108433735, "grad_norm": 1.0437813997268677, "learning_rate": 4.785252119589469e-05, "loss": 0.1131, "step": 13810 }, { "epoch": 83.22289156626506, "grad_norm": 1.1478298902511597, "learning_rate": 4.7849732262382866e-05, "loss": 0.1231, "step": 13815 }, { "epoch": 83.25301204819277, "grad_norm": 1.557233452796936, "learning_rate": 4.784694332887104e-05, "loss": 0.161, "step": 13820 }, { "epoch": 83.28313253012048, "grad_norm": 1.7268131971359253, "learning_rate": 4.7844154395359214e-05, "loss": 0.1343, "step": 13825 }, { "epoch": 83.3132530120482, "grad_norm": 1.5991772413253784, "learning_rate": 4.7841365461847394e-05, "loss": 0.1193, "step": 13830 }, { "epoch": 83.3433734939759, "grad_norm": 1.3358711004257202, "learning_rate": 4.783857652833557e-05, "loss": 0.0934, "step": 13835 }, { "epoch": 83.37349397590361, "grad_norm": 1.6789311170578003, "learning_rate": 4.783578759482374e-05, "loss": 0.1213, "step": 13840 }, { "epoch": 83.40361445783132, "grad_norm": 1.81136953830719, "learning_rate": 4.7832998661311916e-05, "loss": 0.1346, "step": 13845 }, { "epoch": 83.43373493975903, "grad_norm": 1.3308287858963013, "learning_rate": 4.783020972780009e-05, "loss": 0.096, "step": 13850 }, { "epoch": 83.46385542168674, "grad_norm": 2.7993826866149902, "learning_rate": 4.782742079428827e-05, "loss": 0.1815, "step": 13855 }, { "epoch": 83.49397590361446, "grad_norm": 2.379805564880371, "learning_rate": 4.7824631860776444e-05, "loss": 0.1174, "step": 13860 }, { "epoch": 83.52409638554217, "grad_norm": 1.6603949069976807, "learning_rate": 4.782184292726462e-05, "loss": 0.1559, "step": 13865 }, { "epoch": 83.55421686746988, "grad_norm": 2.775527000427246, "learning_rate": 4.781905399375279e-05, "loss": 0.1516, "step": 13870 }, { "epoch": 83.58433734939759, "grad_norm": 2.9960052967071533, "learning_rate": 4.7816265060240965e-05, "loss": 0.1372, "step": 13875 }, { "epoch": 83.6144578313253, "grad_norm": 2.177501678466797, "learning_rate": 4.781347612672914e-05, "loss": 0.1798, "step": 13880 }, { "epoch": 83.644578313253, "grad_norm": 1.3907676935195923, "learning_rate": 4.781068719321732e-05, "loss": 0.1448, "step": 13885 }, { "epoch": 83.67469879518072, "grad_norm": 1.755016803741455, "learning_rate": 4.7807898259705494e-05, "loss": 0.132, "step": 13890 }, { "epoch": 83.70481927710843, "grad_norm": 2.779858112335205, "learning_rate": 4.780510932619367e-05, "loss": 0.1507, "step": 13895 }, { "epoch": 83.73493975903614, "grad_norm": 1.2361454963684082, "learning_rate": 4.780232039268184e-05, "loss": 0.1408, "step": 13900 }, { "epoch": 83.76506024096386, "grad_norm": 1.7588298320770264, "learning_rate": 4.7799531459170015e-05, "loss": 0.142, "step": 13905 }, { "epoch": 83.79518072289157, "grad_norm": 1.982361078262329, "learning_rate": 4.779674252565819e-05, "loss": 0.1245, "step": 13910 }, { "epoch": 83.82530120481928, "grad_norm": 1.4219602346420288, "learning_rate": 4.779395359214637e-05, "loss": 0.1286, "step": 13915 }, { "epoch": 83.855421686747, "grad_norm": 0.875392735004425, "learning_rate": 4.779116465863454e-05, "loss": 0.1867, "step": 13920 }, { "epoch": 83.8855421686747, "grad_norm": 1.4474810361862183, "learning_rate": 4.778837572512272e-05, "loss": 0.1386, "step": 13925 }, { "epoch": 83.91566265060241, "grad_norm": 1.7907133102416992, "learning_rate": 4.778558679161089e-05, "loss": 0.1493, "step": 13930 }, { "epoch": 83.94578313253012, "grad_norm": 1.8963080644607544, "learning_rate": 4.7782797858099065e-05, "loss": 0.1287, "step": 13935 }, { "epoch": 83.97590361445783, "grad_norm": 1.8549200296401978, "learning_rate": 4.778000892458724e-05, "loss": 0.1263, "step": 13940 }, { "epoch": 84.0, "eval_accuracy": 0.9355092410538733, "eval_auc": 0.9806407169481248, "eval_f1": 0.8996328029375765, "eval_loss": 0.2553333342075348, "eval_precision": 0.9709379128137384, "eval_recall": 0.8380843785632839, "eval_runtime": 16.8708, "eval_samples_per_second": 150.734, "eval_steps_per_second": 0.771, "step": 13944 }, { "epoch": 84.00602409638554, "grad_norm": 1.2648922204971313, "learning_rate": 4.777721999107542e-05, "loss": 0.1059, "step": 13945 }, { "epoch": 84.03614457831326, "grad_norm": 0.9515756964683533, "learning_rate": 4.777443105756359e-05, "loss": 0.1389, "step": 13950 }, { "epoch": 84.06626506024097, "grad_norm": 1.7787405252456665, "learning_rate": 4.777164212405176e-05, "loss": 0.1563, "step": 13955 }, { "epoch": 84.09638554216868, "grad_norm": 0.850971519947052, "learning_rate": 4.776885319053994e-05, "loss": 0.1349, "step": 13960 }, { "epoch": 84.12650602409639, "grad_norm": 1.711816668510437, "learning_rate": 4.7766064257028114e-05, "loss": 0.1404, "step": 13965 }, { "epoch": 84.1566265060241, "grad_norm": 0.7210081815719604, "learning_rate": 4.776327532351629e-05, "loss": 0.1305, "step": 13970 }, { "epoch": 84.1867469879518, "grad_norm": 0.9942130446434021, "learning_rate": 4.776048639000447e-05, "loss": 0.1155, "step": 13975 }, { "epoch": 84.21686746987952, "grad_norm": 0.5861592292785645, "learning_rate": 4.775769745649264e-05, "loss": 0.1212, "step": 13980 }, { "epoch": 84.24698795180723, "grad_norm": 0.8859348893165588, "learning_rate": 4.775490852298081e-05, "loss": 0.1155, "step": 13985 }, { "epoch": 84.27710843373494, "grad_norm": 1.1719838380813599, "learning_rate": 4.775211958946899e-05, "loss": 0.101, "step": 13990 }, { "epoch": 84.30722891566265, "grad_norm": 1.1109471321105957, "learning_rate": 4.7749330655957164e-05, "loss": 0.1757, "step": 13995 }, { "epoch": 84.33734939759036, "grad_norm": 1.2019339799880981, "learning_rate": 4.774654172244534e-05, "loss": 0.133, "step": 14000 }, { "epoch": 84.36746987951807, "grad_norm": 2.0538597106933594, "learning_rate": 4.774375278893352e-05, "loss": 0.1767, "step": 14005 }, { "epoch": 84.39759036144578, "grad_norm": 1.268314242362976, "learning_rate": 4.774096385542169e-05, "loss": 0.1562, "step": 14010 }, { "epoch": 84.42771084337349, "grad_norm": 1.7017498016357422, "learning_rate": 4.773817492190986e-05, "loss": 0.1514, "step": 14015 }, { "epoch": 84.4578313253012, "grad_norm": 1.9080393314361572, "learning_rate": 4.773538598839804e-05, "loss": 0.1621, "step": 14020 }, { "epoch": 84.48795180722891, "grad_norm": 1.2499710321426392, "learning_rate": 4.773259705488621e-05, "loss": 0.1595, "step": 14025 }, { "epoch": 84.51807228915662, "grad_norm": 1.181410312652588, "learning_rate": 4.772980812137439e-05, "loss": 0.1465, "step": 14030 }, { "epoch": 84.54819277108433, "grad_norm": 1.1394851207733154, "learning_rate": 4.772701918786257e-05, "loss": 0.1263, "step": 14035 }, { "epoch": 84.57831325301204, "grad_norm": 1.396920919418335, "learning_rate": 4.772423025435074e-05, "loss": 0.1157, "step": 14040 }, { "epoch": 84.60843373493977, "grad_norm": 1.530899167060852, "learning_rate": 4.772144132083891e-05, "loss": 0.1305, "step": 14045 }, { "epoch": 84.63855421686748, "grad_norm": 2.6912682056427, "learning_rate": 4.771865238732709e-05, "loss": 0.139, "step": 14050 }, { "epoch": 84.66867469879519, "grad_norm": 2.976313829421997, "learning_rate": 4.771586345381526e-05, "loss": 0.1747, "step": 14055 }, { "epoch": 84.6987951807229, "grad_norm": 1.7152972221374512, "learning_rate": 4.7713074520303437e-05, "loss": 0.1337, "step": 14060 }, { "epoch": 84.7289156626506, "grad_norm": 1.9049164056777954, "learning_rate": 4.771028558679162e-05, "loss": 0.1506, "step": 14065 }, { "epoch": 84.75903614457832, "grad_norm": 1.6719861030578613, "learning_rate": 4.7707496653279784e-05, "loss": 0.1509, "step": 14070 }, { "epoch": 84.78915662650603, "grad_norm": 1.9368972778320312, "learning_rate": 4.770470771976796e-05, "loss": 0.1756, "step": 14075 }, { "epoch": 84.81927710843374, "grad_norm": 1.2720271348953247, "learning_rate": 4.770191878625614e-05, "loss": 0.1275, "step": 14080 }, { "epoch": 84.84939759036145, "grad_norm": 1.9887089729309082, "learning_rate": 4.769912985274431e-05, "loss": 0.1453, "step": 14085 }, { "epoch": 84.87951807228916, "grad_norm": 1.2748991250991821, "learning_rate": 4.7696340919232486e-05, "loss": 0.1356, "step": 14090 }, { "epoch": 84.90963855421687, "grad_norm": 0.7837698459625244, "learning_rate": 4.769355198572067e-05, "loss": 0.1034, "step": 14095 }, { "epoch": 84.93975903614458, "grad_norm": 1.91938054561615, "learning_rate": 4.7690763052208834e-05, "loss": 0.1837, "step": 14100 }, { "epoch": 84.96987951807229, "grad_norm": 1.103245496749878, "learning_rate": 4.7687974118697014e-05, "loss": 0.1537, "step": 14105 }, { "epoch": 85.0, "grad_norm": 1.532736897468567, "learning_rate": 4.768518518518519e-05, "loss": 0.1253, "step": 14110 }, { "epoch": 85.0, "eval_accuracy": 0.9433739677546206, "eval_auc": 0.9841418893669213, "eval_f1": 0.9143876337693222, "eval_loss": 0.18828138709068298, "eval_precision": 0.9552795031055901, "eval_recall": 0.8768529076396807, "eval_runtime": 17.0104, "eval_samples_per_second": 149.497, "eval_steps_per_second": 0.764, "step": 14110 }, { "epoch": 85.03012048192771, "grad_norm": 1.0667166709899902, "learning_rate": 4.768239625167336e-05, "loss": 0.1471, "step": 14115 }, { "epoch": 85.06024096385542, "grad_norm": 1.475081205368042, "learning_rate": 4.767960731816154e-05, "loss": 0.1133, "step": 14120 }, { "epoch": 85.09036144578313, "grad_norm": 0.9159479737281799, "learning_rate": 4.7676818384649716e-05, "loss": 0.131, "step": 14125 }, { "epoch": 85.12048192771084, "grad_norm": 1.6095043420791626, "learning_rate": 4.767402945113788e-05, "loss": 0.1483, "step": 14130 }, { "epoch": 85.15060240963855, "grad_norm": 1.4973933696746826, "learning_rate": 4.7671240517626064e-05, "loss": 0.1482, "step": 14135 }, { "epoch": 85.18072289156626, "grad_norm": 2.2780914306640625, "learning_rate": 4.766845158411424e-05, "loss": 0.1662, "step": 14140 }, { "epoch": 85.21084337349397, "grad_norm": 1.1623525619506836, "learning_rate": 4.766566265060241e-05, "loss": 0.1236, "step": 14145 }, { "epoch": 85.24096385542168, "grad_norm": 1.9882997274398804, "learning_rate": 4.766287371709059e-05, "loss": 0.1287, "step": 14150 }, { "epoch": 85.2710843373494, "grad_norm": 0.5409373044967651, "learning_rate": 4.766008478357876e-05, "loss": 0.1124, "step": 14155 }, { "epoch": 85.3012048192771, "grad_norm": 1.2196239233016968, "learning_rate": 4.765729585006693e-05, "loss": 0.1435, "step": 14160 }, { "epoch": 85.33132530120481, "grad_norm": 1.3611983060836792, "learning_rate": 4.765450691655511e-05, "loss": 0.1293, "step": 14165 }, { "epoch": 85.36144578313252, "grad_norm": 1.6768685579299927, "learning_rate": 4.765171798304329e-05, "loss": 0.125, "step": 14170 }, { "epoch": 85.39156626506023, "grad_norm": 1.4841111898422241, "learning_rate": 4.764892904953146e-05, "loss": 0.1151, "step": 14175 }, { "epoch": 85.42168674698796, "grad_norm": 0.5706485509872437, "learning_rate": 4.764614011601964e-05, "loss": 0.1005, "step": 14180 }, { "epoch": 85.45180722891567, "grad_norm": 0.9013178944587708, "learning_rate": 4.764335118250781e-05, "loss": 0.1316, "step": 14185 }, { "epoch": 85.48192771084338, "grad_norm": 1.557949423789978, "learning_rate": 4.764056224899598e-05, "loss": 0.1359, "step": 14190 }, { "epoch": 85.51204819277109, "grad_norm": 2.801118850708008, "learning_rate": 4.763777331548416e-05, "loss": 0.1569, "step": 14195 }, { "epoch": 85.5421686746988, "grad_norm": 1.2411048412322998, "learning_rate": 4.763498438197234e-05, "loss": 0.1311, "step": 14200 }, { "epoch": 85.57228915662651, "grad_norm": 2.2194406986236572, "learning_rate": 4.763219544846051e-05, "loss": 0.1688, "step": 14205 }, { "epoch": 85.60240963855422, "grad_norm": 1.5219783782958984, "learning_rate": 4.762940651494869e-05, "loss": 0.1452, "step": 14210 }, { "epoch": 85.63253012048193, "grad_norm": 1.2245383262634277, "learning_rate": 4.762661758143686e-05, "loss": 0.1317, "step": 14215 }, { "epoch": 85.66265060240964, "grad_norm": 1.17124342918396, "learning_rate": 4.762382864792503e-05, "loss": 0.108, "step": 14220 }, { "epoch": 85.69277108433735, "grad_norm": 1.87996244430542, "learning_rate": 4.762103971441321e-05, "loss": 0.1348, "step": 14225 }, { "epoch": 85.72289156626506, "grad_norm": 1.8117679357528687, "learning_rate": 4.7618250780901386e-05, "loss": 0.1515, "step": 14230 }, { "epoch": 85.75301204819277, "grad_norm": 1.6517044305801392, "learning_rate": 4.761546184738956e-05, "loss": 0.1497, "step": 14235 }, { "epoch": 85.78313253012048, "grad_norm": 2.1645545959472656, "learning_rate": 4.7612672913877734e-05, "loss": 0.1189, "step": 14240 }, { "epoch": 85.8132530120482, "grad_norm": 2.453587770462036, "learning_rate": 4.760988398036591e-05, "loss": 0.1548, "step": 14245 }, { "epoch": 85.8433734939759, "grad_norm": 2.449235677719116, "learning_rate": 4.760709504685408e-05, "loss": 0.1199, "step": 14250 }, { "epoch": 85.87349397590361, "grad_norm": 1.6433216333389282, "learning_rate": 4.760430611334226e-05, "loss": 0.1388, "step": 14255 }, { "epoch": 85.90361445783132, "grad_norm": 1.9673657417297363, "learning_rate": 4.7601517179830436e-05, "loss": 0.1739, "step": 14260 }, { "epoch": 85.93373493975903, "grad_norm": 0.7465510964393616, "learning_rate": 4.759872824631861e-05, "loss": 0.0861, "step": 14265 }, { "epoch": 85.96385542168674, "grad_norm": 1.1348292827606201, "learning_rate": 4.759593931280678e-05, "loss": 0.0973, "step": 14270 }, { "epoch": 85.99397590361446, "grad_norm": 0.902716875076294, "learning_rate": 4.759315037929496e-05, "loss": 0.1709, "step": 14275 }, { "epoch": 86.0, "eval_accuracy": 0.9480928037750688, "eval_auc": 0.9864627721099842, "eval_f1": 0.9217081850533808, "eval_loss": 0.1892118602991104, "eval_precision": 0.9604449938195303, "eval_recall": 0.8859749144811858, "eval_runtime": 16.9175, "eval_samples_per_second": 150.318, "eval_steps_per_second": 0.768, "step": 14276 }, { "epoch": 86.02409638554217, "grad_norm": 1.6581169366836548, "learning_rate": 4.759036144578313e-05, "loss": 0.1411, "step": 14280 }, { "epoch": 86.05421686746988, "grad_norm": 1.0091661214828491, "learning_rate": 4.758757251227131e-05, "loss": 0.1101, "step": 14285 }, { "epoch": 86.08433734939759, "grad_norm": 1.1630033254623413, "learning_rate": 4.7584783578759485e-05, "loss": 0.1147, "step": 14290 }, { "epoch": 86.1144578313253, "grad_norm": 0.7185408473014832, "learning_rate": 4.758199464524766e-05, "loss": 0.1016, "step": 14295 }, { "epoch": 86.144578313253, "grad_norm": 1.512789249420166, "learning_rate": 4.757920571173583e-05, "loss": 0.1485, "step": 14300 }, { "epoch": 86.17469879518072, "grad_norm": 0.4657615125179291, "learning_rate": 4.757641677822401e-05, "loss": 0.1186, "step": 14305 }, { "epoch": 86.20481927710843, "grad_norm": 2.3068294525146484, "learning_rate": 4.757362784471218e-05, "loss": 0.1433, "step": 14310 }, { "epoch": 86.23493975903614, "grad_norm": 1.127864956855774, "learning_rate": 4.757083891120036e-05, "loss": 0.1397, "step": 14315 }, { "epoch": 86.26506024096386, "grad_norm": 1.1965922117233276, "learning_rate": 4.7568049977688535e-05, "loss": 0.1298, "step": 14320 }, { "epoch": 86.29518072289157, "grad_norm": 1.5388059616088867, "learning_rate": 4.756526104417671e-05, "loss": 0.1224, "step": 14325 }, { "epoch": 86.32530120481928, "grad_norm": 2.3614656925201416, "learning_rate": 4.756247211066488e-05, "loss": 0.142, "step": 14330 }, { "epoch": 86.355421686747, "grad_norm": 1.8381450176239014, "learning_rate": 4.7559683177153056e-05, "loss": 0.1116, "step": 14335 }, { "epoch": 86.3855421686747, "grad_norm": 1.1881322860717773, "learning_rate": 4.755689424364123e-05, "loss": 0.1662, "step": 14340 }, { "epoch": 86.41566265060241, "grad_norm": 1.2886515855789185, "learning_rate": 4.755410531012941e-05, "loss": 0.1068, "step": 14345 }, { "epoch": 86.44578313253012, "grad_norm": 1.9574270248413086, "learning_rate": 4.7551316376617584e-05, "loss": 0.1485, "step": 14350 }, { "epoch": 86.47590361445783, "grad_norm": 0.9434966444969177, "learning_rate": 4.754852744310576e-05, "loss": 0.1241, "step": 14355 }, { "epoch": 86.50602409638554, "grad_norm": 0.7882797718048096, "learning_rate": 4.754573850959393e-05, "loss": 0.1348, "step": 14360 }, { "epoch": 86.53614457831326, "grad_norm": 1.2485636472702026, "learning_rate": 4.7542949576082106e-05, "loss": 0.113, "step": 14365 }, { "epoch": 86.56626506024097, "grad_norm": 1.1508848667144775, "learning_rate": 4.7540160642570286e-05, "loss": 0.127, "step": 14370 }, { "epoch": 86.59638554216868, "grad_norm": 1.799537181854248, "learning_rate": 4.753737170905846e-05, "loss": 0.1802, "step": 14375 }, { "epoch": 86.62650602409639, "grad_norm": 2.453477621078491, "learning_rate": 4.7534582775546634e-05, "loss": 0.1481, "step": 14380 }, { "epoch": 86.6566265060241, "grad_norm": 0.9357603192329407, "learning_rate": 4.753179384203481e-05, "loss": 0.143, "step": 14385 }, { "epoch": 86.6867469879518, "grad_norm": 1.1688281297683716, "learning_rate": 4.752900490852298e-05, "loss": 0.1439, "step": 14390 }, { "epoch": 86.71686746987952, "grad_norm": 0.8570228815078735, "learning_rate": 4.7526215975011155e-05, "loss": 0.1239, "step": 14395 }, { "epoch": 86.74698795180723, "grad_norm": 1.5286699533462524, "learning_rate": 4.7523427041499336e-05, "loss": 0.1491, "step": 14400 }, { "epoch": 86.77710843373494, "grad_norm": 1.9417452812194824, "learning_rate": 4.752063810798751e-05, "loss": 0.1122, "step": 14405 }, { "epoch": 86.80722891566265, "grad_norm": 1.2182284593582153, "learning_rate": 4.7517849174475684e-05, "loss": 0.0986, "step": 14410 }, { "epoch": 86.83734939759036, "grad_norm": 0.8801213502883911, "learning_rate": 4.751506024096386e-05, "loss": 0.0997, "step": 14415 }, { "epoch": 86.86746987951807, "grad_norm": 1.2546206712722778, "learning_rate": 4.751227130745203e-05, "loss": 0.1667, "step": 14420 }, { "epoch": 86.89759036144578, "grad_norm": 1.6996676921844482, "learning_rate": 4.7509482373940205e-05, "loss": 0.1432, "step": 14425 }, { "epoch": 86.92771084337349, "grad_norm": 0.9992546439170837, "learning_rate": 4.7506693440428386e-05, "loss": 0.1392, "step": 14430 }, { "epoch": 86.9578313253012, "grad_norm": 1.7589772939682007, "learning_rate": 4.750390450691656e-05, "loss": 0.1297, "step": 14435 }, { "epoch": 86.98795180722891, "grad_norm": 1.7378164529800415, "learning_rate": 4.750111557340473e-05, "loss": 0.1469, "step": 14440 }, { "epoch": 87.0, "eval_accuracy": 0.9508454581203303, "eval_auc": 0.985316703648392, "eval_f1": 0.9255509231685527, "eval_loss": 0.1991521120071411, "eval_precision": 0.9688279301745636, "eval_recall": 0.8859749144811858, "eval_runtime": 16.9096, "eval_samples_per_second": 150.388, "eval_steps_per_second": 0.769, "step": 14442 }, { "epoch": 87.01807228915662, "grad_norm": 1.0375066995620728, "learning_rate": 4.749832663989291e-05, "loss": 0.1279, "step": 14445 }, { "epoch": 87.04819277108433, "grad_norm": 1.4889252185821533, "learning_rate": 4.749553770638108e-05, "loss": 0.1232, "step": 14450 }, { "epoch": 87.07831325301204, "grad_norm": 1.7051292657852173, "learning_rate": 4.7492748772869254e-05, "loss": 0.091, "step": 14455 }, { "epoch": 87.10843373493977, "grad_norm": 1.5671590566635132, "learning_rate": 4.7489959839357435e-05, "loss": 0.0976, "step": 14460 }, { "epoch": 87.13855421686748, "grad_norm": 1.0408308506011963, "learning_rate": 4.748717090584561e-05, "loss": 0.1275, "step": 14465 }, { "epoch": 87.16867469879519, "grad_norm": 1.5001578330993652, "learning_rate": 4.748438197233378e-05, "loss": 0.1436, "step": 14470 }, { "epoch": 87.1987951807229, "grad_norm": 0.7032421827316284, "learning_rate": 4.7481593038821956e-05, "loss": 0.1021, "step": 14475 }, { "epoch": 87.2289156626506, "grad_norm": 0.7100562453269958, "learning_rate": 4.747880410531013e-05, "loss": 0.1344, "step": 14480 }, { "epoch": 87.25903614457832, "grad_norm": 1.7033170461654663, "learning_rate": 4.7476015171798304e-05, "loss": 0.1775, "step": 14485 }, { "epoch": 87.28915662650603, "grad_norm": 1.7784265279769897, "learning_rate": 4.7473226238286485e-05, "loss": 0.1229, "step": 14490 }, { "epoch": 87.31927710843374, "grad_norm": 1.1719051599502563, "learning_rate": 4.747043730477466e-05, "loss": 0.1322, "step": 14495 }, { "epoch": 87.34939759036145, "grad_norm": 1.3448373079299927, "learning_rate": 4.746764837126283e-05, "loss": 0.1047, "step": 14500 }, { "epoch": 87.37951807228916, "grad_norm": 1.515297293663025, "learning_rate": 4.7464859437751006e-05, "loss": 0.1331, "step": 14505 }, { "epoch": 87.40963855421687, "grad_norm": 1.7046703100204468, "learning_rate": 4.746207050423918e-05, "loss": 0.1517, "step": 14510 }, { "epoch": 87.43975903614458, "grad_norm": 2.1440868377685547, "learning_rate": 4.7459281570727354e-05, "loss": 0.147, "step": 14515 }, { "epoch": 87.46987951807229, "grad_norm": 1.4107609987258911, "learning_rate": 4.7456492637215534e-05, "loss": 0.1326, "step": 14520 }, { "epoch": 87.5, "grad_norm": 2.455655574798584, "learning_rate": 4.745370370370371e-05, "loss": 0.1378, "step": 14525 }, { "epoch": 87.53012048192771, "grad_norm": 1.4808694124221802, "learning_rate": 4.745091477019188e-05, "loss": 0.1255, "step": 14530 }, { "epoch": 87.56024096385542, "grad_norm": 0.9613294005393982, "learning_rate": 4.7448125836680056e-05, "loss": 0.1225, "step": 14535 }, { "epoch": 87.59036144578313, "grad_norm": 1.7466461658477783, "learning_rate": 4.744533690316823e-05, "loss": 0.1056, "step": 14540 }, { "epoch": 87.62048192771084, "grad_norm": 1.7773330211639404, "learning_rate": 4.74425479696564e-05, "loss": 0.1692, "step": 14545 }, { "epoch": 87.65060240963855, "grad_norm": 1.3193150758743286, "learning_rate": 4.7439759036144584e-05, "loss": 0.1485, "step": 14550 }, { "epoch": 87.68072289156626, "grad_norm": 1.9448775053024292, "learning_rate": 4.743697010263276e-05, "loss": 0.152, "step": 14555 }, { "epoch": 87.71084337349397, "grad_norm": 1.9711596965789795, "learning_rate": 4.743418116912093e-05, "loss": 0.1283, "step": 14560 }, { "epoch": 87.74096385542168, "grad_norm": 0.5820239186286926, "learning_rate": 4.7431392235609105e-05, "loss": 0.1317, "step": 14565 }, { "epoch": 87.7710843373494, "grad_norm": 1.3593775033950806, "learning_rate": 4.742860330209728e-05, "loss": 0.15, "step": 14570 }, { "epoch": 87.8012048192771, "grad_norm": 0.7001518607139587, "learning_rate": 4.742581436858545e-05, "loss": 0.1119, "step": 14575 }, { "epoch": 87.83132530120481, "grad_norm": 2.0903732776641846, "learning_rate": 4.742302543507363e-05, "loss": 0.1496, "step": 14580 }, { "epoch": 87.86144578313252, "grad_norm": 1.6374163627624512, "learning_rate": 4.742023650156181e-05, "loss": 0.1526, "step": 14585 }, { "epoch": 87.89156626506023, "grad_norm": 0.8886626958847046, "learning_rate": 4.7417447568049974e-05, "loss": 0.0918, "step": 14590 }, { "epoch": 87.92168674698796, "grad_norm": 1.5021910667419434, "learning_rate": 4.7414658634538155e-05, "loss": 0.1256, "step": 14595 }, { "epoch": 87.95180722891567, "grad_norm": 1.6364808082580566, "learning_rate": 4.741186970102633e-05, "loss": 0.1244, "step": 14600 }, { "epoch": 87.98192771084338, "grad_norm": 2.0130977630615234, "learning_rate": 4.74090807675145e-05, "loss": 0.1283, "step": 14605 }, { "epoch": 88.0, "eval_accuracy": 0.9445536767597326, "eval_auc": 0.983247689041409, "eval_f1": 0.9153153153153153, "eval_loss": 0.21100325882434845, "eval_precision": 0.9670050761421319, "eval_recall": 0.8688711516533637, "eval_runtime": 19.5416, "eval_samples_per_second": 130.133, "eval_steps_per_second": 0.665, "step": 14608 }, { "epoch": 88.01204819277109, "grad_norm": 1.7429838180541992, "learning_rate": 4.740629183400268e-05, "loss": 0.1304, "step": 14610 }, { "epoch": 88.0421686746988, "grad_norm": 2.927536725997925, "learning_rate": 4.7403502900490857e-05, "loss": 0.1399, "step": 14615 }, { "epoch": 88.07228915662651, "grad_norm": 0.9462056159973145, "learning_rate": 4.7400713966979024e-05, "loss": 0.1014, "step": 14620 }, { "epoch": 88.10240963855422, "grad_norm": 1.36577308177948, "learning_rate": 4.7397925033467204e-05, "loss": 0.1577, "step": 14625 }, { "epoch": 88.13253012048193, "grad_norm": 1.4795022010803223, "learning_rate": 4.739513609995538e-05, "loss": 0.1463, "step": 14630 }, { "epoch": 88.16265060240964, "grad_norm": 1.4152950048446655, "learning_rate": 4.739234716644356e-05, "loss": 0.1526, "step": 14635 }, { "epoch": 88.19277108433735, "grad_norm": 1.4010131359100342, "learning_rate": 4.738955823293173e-05, "loss": 0.1232, "step": 14640 }, { "epoch": 88.22289156626506, "grad_norm": 0.896776556968689, "learning_rate": 4.7386769299419906e-05, "loss": 0.1316, "step": 14645 }, { "epoch": 88.25301204819277, "grad_norm": 0.8451403379440308, "learning_rate": 4.738398036590808e-05, "loss": 0.1152, "step": 14650 }, { "epoch": 88.28313253012048, "grad_norm": 1.6329681873321533, "learning_rate": 4.7381191432396254e-05, "loss": 0.1036, "step": 14655 }, { "epoch": 88.3132530120482, "grad_norm": 0.7393683791160583, "learning_rate": 4.737840249888443e-05, "loss": 0.1191, "step": 14660 }, { "epoch": 88.3433734939759, "grad_norm": 1.9568076133728027, "learning_rate": 4.737561356537261e-05, "loss": 0.1321, "step": 14665 }, { "epoch": 88.37349397590361, "grad_norm": 0.9561821222305298, "learning_rate": 4.737282463186078e-05, "loss": 0.1272, "step": 14670 }, { "epoch": 88.40361445783132, "grad_norm": 2.8008792400360107, "learning_rate": 4.7370035698348956e-05, "loss": 0.1335, "step": 14675 }, { "epoch": 88.43373493975903, "grad_norm": 1.8802995681762695, "learning_rate": 4.736724676483713e-05, "loss": 0.1407, "step": 14680 }, { "epoch": 88.46385542168674, "grad_norm": 0.9617180228233337, "learning_rate": 4.73644578313253e-05, "loss": 0.1284, "step": 14685 }, { "epoch": 88.49397590361446, "grad_norm": 1.9008986949920654, "learning_rate": 4.736166889781348e-05, "loss": 0.1761, "step": 14690 }, { "epoch": 88.52409638554217, "grad_norm": 1.58428156375885, "learning_rate": 4.735887996430166e-05, "loss": 0.1205, "step": 14695 }, { "epoch": 88.55421686746988, "grad_norm": 1.0729864835739136, "learning_rate": 4.735609103078983e-05, "loss": 0.1183, "step": 14700 }, { "epoch": 88.58433734939759, "grad_norm": 1.0596061944961548, "learning_rate": 4.7353302097278e-05, "loss": 0.1105, "step": 14705 }, { "epoch": 88.6144578313253, "grad_norm": 1.2726976871490479, "learning_rate": 4.735051316376618e-05, "loss": 0.1047, "step": 14710 }, { "epoch": 88.644578313253, "grad_norm": 1.7473506927490234, "learning_rate": 4.734772423025435e-05, "loss": 0.1383, "step": 14715 }, { "epoch": 88.67469879518072, "grad_norm": 1.214264154434204, "learning_rate": 4.734493529674253e-05, "loss": 0.146, "step": 14720 }, { "epoch": 88.70481927710843, "grad_norm": 0.8195292353630066, "learning_rate": 4.734214636323071e-05, "loss": 0.1379, "step": 14725 }, { "epoch": 88.73493975903614, "grad_norm": 0.8225693106651306, "learning_rate": 4.733935742971888e-05, "loss": 0.1321, "step": 14730 }, { "epoch": 88.76506024096386, "grad_norm": 1.8106837272644043, "learning_rate": 4.733656849620705e-05, "loss": 0.1224, "step": 14735 }, { "epoch": 88.79518072289157, "grad_norm": 1.5850567817687988, "learning_rate": 4.733377956269523e-05, "loss": 0.1366, "step": 14740 }, { "epoch": 88.82530120481928, "grad_norm": 1.3589211702346802, "learning_rate": 4.73309906291834e-05, "loss": 0.1274, "step": 14745 }, { "epoch": 88.855421686747, "grad_norm": 1.5127644538879395, "learning_rate": 4.7328201695671576e-05, "loss": 0.158, "step": 14750 }, { "epoch": 88.8855421686747, "grad_norm": 1.4552568197250366, "learning_rate": 4.732541276215976e-05, "loss": 0.0927, "step": 14755 }, { "epoch": 88.91566265060241, "grad_norm": 1.2925678491592407, "learning_rate": 4.732262382864793e-05, "loss": 0.1184, "step": 14760 }, { "epoch": 88.94578313253012, "grad_norm": 0.8832511901855469, "learning_rate": 4.73198348951361e-05, "loss": 0.1404, "step": 14765 }, { "epoch": 88.97590361445783, "grad_norm": 1.4447160959243774, "learning_rate": 4.731704596162428e-05, "loss": 0.1684, "step": 14770 }, { "epoch": 89.0, "eval_accuracy": 0.9500589854502556, "eval_auc": 0.9829287473256122, "eval_f1": 0.9251620506776664, "eval_loss": 0.1974656581878662, "eval_precision": 0.9573170731707317, "eval_recall": 0.895096921322691, "eval_runtime": 19.6403, "eval_samples_per_second": 129.479, "eval_steps_per_second": 0.662, "step": 14774 }, { "epoch": 89.00602409638554, "grad_norm": 1.3041869401931763, "learning_rate": 4.731425702811245e-05, "loss": 0.1104, "step": 14775 }, { "epoch": 89.03614457831326, "grad_norm": 1.4686906337738037, "learning_rate": 4.7311468094600626e-05, "loss": 0.1235, "step": 14780 }, { "epoch": 89.06626506024097, "grad_norm": 2.0663414001464844, "learning_rate": 4.7308679161088806e-05, "loss": 0.1675, "step": 14785 }, { "epoch": 89.09638554216868, "grad_norm": 0.9813429713249207, "learning_rate": 4.730589022757697e-05, "loss": 0.1309, "step": 14790 }, { "epoch": 89.12650602409639, "grad_norm": 1.1441782712936401, "learning_rate": 4.730310129406515e-05, "loss": 0.1265, "step": 14795 }, { "epoch": 89.1566265060241, "grad_norm": 1.6443047523498535, "learning_rate": 4.730031236055333e-05, "loss": 0.1648, "step": 14800 }, { "epoch": 89.1867469879518, "grad_norm": 2.0546061992645264, "learning_rate": 4.72975234270415e-05, "loss": 0.1715, "step": 14805 }, { "epoch": 89.21686746987952, "grad_norm": 1.258359432220459, "learning_rate": 4.7294734493529675e-05, "loss": 0.1182, "step": 14810 }, { "epoch": 89.24698795180723, "grad_norm": 0.9215362668037415, "learning_rate": 4.7291945560017856e-05, "loss": 0.1619, "step": 14815 }, { "epoch": 89.27710843373494, "grad_norm": 0.7660539746284485, "learning_rate": 4.728915662650602e-05, "loss": 0.1134, "step": 14820 }, { "epoch": 89.30722891566265, "grad_norm": 1.2452195882797241, "learning_rate": 4.72863676929942e-05, "loss": 0.1275, "step": 14825 }, { "epoch": 89.33734939759036, "grad_norm": 0.6670877933502197, "learning_rate": 4.728357875948238e-05, "loss": 0.1495, "step": 14830 }, { "epoch": 89.36746987951807, "grad_norm": 2.953082323074341, "learning_rate": 4.728078982597055e-05, "loss": 0.1247, "step": 14835 }, { "epoch": 89.39759036144578, "grad_norm": 1.5403558015823364, "learning_rate": 4.7278000892458725e-05, "loss": 0.1465, "step": 14840 }, { "epoch": 89.42771084337349, "grad_norm": 1.5101865530014038, "learning_rate": 4.7275211958946905e-05, "loss": 0.0789, "step": 14845 }, { "epoch": 89.4578313253012, "grad_norm": 1.332971453666687, "learning_rate": 4.727242302543507e-05, "loss": 0.1089, "step": 14850 }, { "epoch": 89.48795180722891, "grad_norm": 2.0793509483337402, "learning_rate": 4.7269634091923246e-05, "loss": 0.1236, "step": 14855 }, { "epoch": 89.51807228915662, "grad_norm": 0.8516743779182434, "learning_rate": 4.726684515841143e-05, "loss": 0.0934, "step": 14860 }, { "epoch": 89.54819277108433, "grad_norm": 1.6131713390350342, "learning_rate": 4.72640562248996e-05, "loss": 0.1237, "step": 14865 }, { "epoch": 89.57831325301204, "grad_norm": 1.8243027925491333, "learning_rate": 4.7261267291387774e-05, "loss": 0.1706, "step": 14870 }, { "epoch": 89.60843373493977, "grad_norm": 2.104937791824341, "learning_rate": 4.725847835787595e-05, "loss": 0.1364, "step": 14875 }, { "epoch": 89.63855421686748, "grad_norm": 2.301976203918457, "learning_rate": 4.725568942436412e-05, "loss": 0.145, "step": 14880 }, { "epoch": 89.66867469879519, "grad_norm": 1.202064037322998, "learning_rate": 4.7252900490852296e-05, "loss": 0.1191, "step": 14885 }, { "epoch": 89.6987951807229, "grad_norm": 1.10089111328125, "learning_rate": 4.7250111557340476e-05, "loss": 0.1553, "step": 14890 }, { "epoch": 89.7289156626506, "grad_norm": 0.7968999743461609, "learning_rate": 4.724732262382865e-05, "loss": 0.1369, "step": 14895 }, { "epoch": 89.75903614457832, "grad_norm": 1.395979404449463, "learning_rate": 4.724453369031683e-05, "loss": 0.1556, "step": 14900 }, { "epoch": 89.78915662650603, "grad_norm": 1.076385736465454, "learning_rate": 4.7241744756805e-05, "loss": 0.159, "step": 14905 }, { "epoch": 89.81927710843374, "grad_norm": 1.197190523147583, "learning_rate": 4.723895582329317e-05, "loss": 0.119, "step": 14910 }, { "epoch": 89.84939759036145, "grad_norm": 1.8807010650634766, "learning_rate": 4.723616688978135e-05, "loss": 0.1558, "step": 14915 }, { "epoch": 89.87951807228916, "grad_norm": 0.935214102268219, "learning_rate": 4.7233377956269526e-05, "loss": 0.1514, "step": 14920 }, { "epoch": 89.90963855421687, "grad_norm": 1.4490586519241333, "learning_rate": 4.72305890227577e-05, "loss": 0.1365, "step": 14925 }, { "epoch": 89.93975903614458, "grad_norm": 1.6296980381011963, "learning_rate": 4.722780008924588e-05, "loss": 0.1283, "step": 14930 }, { "epoch": 89.96987951807229, "grad_norm": 2.673417568206787, "learning_rate": 4.722501115573405e-05, "loss": 0.134, "step": 14935 }, { "epoch": 90.0, "grad_norm": 1.232714295387268, "learning_rate": 4.722222222222222e-05, "loss": 0.1445, "step": 14940 }, { "epoch": 90.0, "eval_accuracy": 0.9445536767597326, "eval_auc": 0.9855658340873408, "eval_f1": 0.9149064574532287, "eval_loss": 0.2041027843952179, "eval_precision": 0.9717948717948718, "eval_recall": 0.8643101482326112, "eval_runtime": 19.7223, "eval_samples_per_second": 128.94, "eval_steps_per_second": 0.659, "step": 14940 }, { "epoch": 90.03012048192771, "grad_norm": 1.9144062995910645, "learning_rate": 4.72194332887104e-05, "loss": 0.1296, "step": 14945 }, { "epoch": 90.06024096385542, "grad_norm": 1.984622597694397, "learning_rate": 4.7216644355198575e-05, "loss": 0.1341, "step": 14950 }, { "epoch": 90.09036144578313, "grad_norm": 0.7810771465301514, "learning_rate": 4.721385542168675e-05, "loss": 0.1028, "step": 14955 }, { "epoch": 90.12048192771084, "grad_norm": 1.0553934574127197, "learning_rate": 4.721106648817493e-05, "loss": 0.15, "step": 14960 }, { "epoch": 90.15060240963855, "grad_norm": 2.661931037902832, "learning_rate": 4.72082775546631e-05, "loss": 0.1553, "step": 14965 }, { "epoch": 90.18072289156626, "grad_norm": 1.422800898551941, "learning_rate": 4.720548862115127e-05, "loss": 0.1241, "step": 14970 }, { "epoch": 90.21084337349397, "grad_norm": 1.7622931003570557, "learning_rate": 4.720269968763945e-05, "loss": 0.1031, "step": 14975 }, { "epoch": 90.24096385542168, "grad_norm": 1.3108450174331665, "learning_rate": 4.7199910754127625e-05, "loss": 0.0993, "step": 14980 }, { "epoch": 90.2710843373494, "grad_norm": 1.8057940006256104, "learning_rate": 4.71971218206158e-05, "loss": 0.1394, "step": 14985 }, { "epoch": 90.3012048192771, "grad_norm": 1.7234320640563965, "learning_rate": 4.719433288710397e-05, "loss": 0.1404, "step": 14990 }, { "epoch": 90.33132530120481, "grad_norm": 1.6687768697738647, "learning_rate": 4.7191543953592146e-05, "loss": 0.1298, "step": 14995 }, { "epoch": 90.36144578313252, "grad_norm": 1.7635596990585327, "learning_rate": 4.718875502008032e-05, "loss": 0.1032, "step": 15000 }, { "epoch": 90.39156626506023, "grad_norm": 1.9103935956954956, "learning_rate": 4.71859660865685e-05, "loss": 0.1, "step": 15005 }, { "epoch": 90.42168674698796, "grad_norm": 1.3442938327789307, "learning_rate": 4.7183177153056675e-05, "loss": 0.098, "step": 15010 }, { "epoch": 90.45180722891567, "grad_norm": 1.2276017665863037, "learning_rate": 4.718038821954485e-05, "loss": 0.105, "step": 15015 }, { "epoch": 90.48192771084338, "grad_norm": 2.2045164108276367, "learning_rate": 4.717759928603302e-05, "loss": 0.1812, "step": 15020 }, { "epoch": 90.51204819277109, "grad_norm": 3.1401782035827637, "learning_rate": 4.7174810352521196e-05, "loss": 0.1533, "step": 15025 }, { "epoch": 90.5421686746988, "grad_norm": 2.1406538486480713, "learning_rate": 4.717202141900937e-05, "loss": 0.1508, "step": 15030 }, { "epoch": 90.57228915662651, "grad_norm": 2.2567074298858643, "learning_rate": 4.716923248549755e-05, "loss": 0.1367, "step": 15035 }, { "epoch": 90.60240963855422, "grad_norm": 1.9470092058181763, "learning_rate": 4.7166443551985724e-05, "loss": 0.1665, "step": 15040 }, { "epoch": 90.63253012048193, "grad_norm": 0.9224712252616882, "learning_rate": 4.71636546184739e-05, "loss": 0.142, "step": 15045 }, { "epoch": 90.66265060240964, "grad_norm": 1.0731422901153564, "learning_rate": 4.716086568496207e-05, "loss": 0.1301, "step": 15050 }, { "epoch": 90.69277108433735, "grad_norm": 1.3973654508590698, "learning_rate": 4.7158076751450246e-05, "loss": 0.1579, "step": 15055 }, { "epoch": 90.72289156626506, "grad_norm": 1.42367684841156, "learning_rate": 4.715528781793842e-05, "loss": 0.1171, "step": 15060 }, { "epoch": 90.75301204819277, "grad_norm": 1.3728097677230835, "learning_rate": 4.71524988844266e-05, "loss": 0.1156, "step": 15065 }, { "epoch": 90.78313253012048, "grad_norm": 2.5987324714660645, "learning_rate": 4.7149709950914774e-05, "loss": 0.1293, "step": 15070 }, { "epoch": 90.8132530120482, "grad_norm": 1.7050949335098267, "learning_rate": 4.714692101740295e-05, "loss": 0.1283, "step": 15075 }, { "epoch": 90.8433734939759, "grad_norm": 2.5224061012268066, "learning_rate": 4.714413208389112e-05, "loss": 0.126, "step": 15080 }, { "epoch": 90.87349397590361, "grad_norm": 1.523181676864624, "learning_rate": 4.7141343150379295e-05, "loss": 0.1507, "step": 15085 }, { "epoch": 90.90361445783132, "grad_norm": 1.1003533601760864, "learning_rate": 4.713855421686747e-05, "loss": 0.1503, "step": 15090 }, { "epoch": 90.93373493975903, "grad_norm": 1.1355431079864502, "learning_rate": 4.713576528335565e-05, "loss": 0.1301, "step": 15095 }, { "epoch": 90.96385542168674, "grad_norm": 0.584497332572937, "learning_rate": 4.713297634984382e-05, "loss": 0.0904, "step": 15100 }, { "epoch": 90.99397590361446, "grad_norm": 0.8010081052780151, "learning_rate": 4.7130187416332e-05, "loss": 0.1301, "step": 15105 }, { "epoch": 91.0, "eval_accuracy": 0.9441604404246953, "eval_auc": 0.9860302159632383, "eval_f1": 0.9143546441495778, "eval_loss": 0.20790936052799225, "eval_precision": 0.970550576184379, "eval_recall": 0.8643101482326112, "eval_runtime": 16.9068, "eval_samples_per_second": 150.413, "eval_steps_per_second": 0.769, "step": 15106 }, { "epoch": 91.02409638554217, "grad_norm": 1.8080776929855347, "learning_rate": 4.712739848282017e-05, "loss": 0.1072, "step": 15110 }, { "epoch": 91.05421686746988, "grad_norm": 1.793641448020935, "learning_rate": 4.7124609549308345e-05, "loss": 0.1225, "step": 15115 }, { "epoch": 91.08433734939759, "grad_norm": 1.1437933444976807, "learning_rate": 4.712182061579652e-05, "loss": 0.1045, "step": 15120 }, { "epoch": 91.1144578313253, "grad_norm": 1.4480006694793701, "learning_rate": 4.71190316822847e-05, "loss": 0.1177, "step": 15125 }, { "epoch": 91.144578313253, "grad_norm": 0.5498224496841431, "learning_rate": 4.711624274877287e-05, "loss": 0.0925, "step": 15130 }, { "epoch": 91.17469879518072, "grad_norm": 1.589474081993103, "learning_rate": 4.7113453815261047e-05, "loss": 0.1477, "step": 15135 }, { "epoch": 91.20481927710843, "grad_norm": 1.3703738451004028, "learning_rate": 4.711066488174922e-05, "loss": 0.1123, "step": 15140 }, { "epoch": 91.23493975903614, "grad_norm": 2.3812754154205322, "learning_rate": 4.7107875948237394e-05, "loss": 0.1176, "step": 15145 }, { "epoch": 91.26506024096386, "grad_norm": 1.2320115566253662, "learning_rate": 4.710508701472557e-05, "loss": 0.123, "step": 15150 }, { "epoch": 91.29518072289157, "grad_norm": 1.0371230840682983, "learning_rate": 4.710229808121375e-05, "loss": 0.1162, "step": 15155 }, { "epoch": 91.32530120481928, "grad_norm": 2.1828746795654297, "learning_rate": 4.709950914770192e-05, "loss": 0.116, "step": 15160 }, { "epoch": 91.355421686747, "grad_norm": 1.36992609500885, "learning_rate": 4.7096720214190096e-05, "loss": 0.1102, "step": 15165 }, { "epoch": 91.3855421686747, "grad_norm": 0.7277379035949707, "learning_rate": 4.709393128067827e-05, "loss": 0.1223, "step": 15170 }, { "epoch": 91.41566265060241, "grad_norm": 1.4891860485076904, "learning_rate": 4.7091142347166444e-05, "loss": 0.1366, "step": 15175 }, { "epoch": 91.44578313253012, "grad_norm": 1.5657923221588135, "learning_rate": 4.7088353413654624e-05, "loss": 0.1714, "step": 15180 }, { "epoch": 91.47590361445783, "grad_norm": 0.8934221863746643, "learning_rate": 4.70855644801428e-05, "loss": 0.1274, "step": 15185 }, { "epoch": 91.50602409638554, "grad_norm": 2.132737636566162, "learning_rate": 4.708277554663097e-05, "loss": 0.1274, "step": 15190 }, { "epoch": 91.53614457831326, "grad_norm": 1.4237350225448608, "learning_rate": 4.7079986613119146e-05, "loss": 0.1337, "step": 15195 }, { "epoch": 91.56626506024097, "grad_norm": 0.7660580277442932, "learning_rate": 4.707719767960732e-05, "loss": 0.1011, "step": 15200 }, { "epoch": 91.59638554216868, "grad_norm": 1.0952409505844116, "learning_rate": 4.707440874609549e-05, "loss": 0.1232, "step": 15205 }, { "epoch": 91.62650602409639, "grad_norm": 2.080371856689453, "learning_rate": 4.7071619812583674e-05, "loss": 0.1521, "step": 15210 }, { "epoch": 91.6566265060241, "grad_norm": 1.226672649383545, "learning_rate": 4.706883087907185e-05, "loss": 0.112, "step": 15215 }, { "epoch": 91.6867469879518, "grad_norm": 1.8429591655731201, "learning_rate": 4.706604194556002e-05, "loss": 0.1257, "step": 15220 }, { "epoch": 91.71686746987952, "grad_norm": 2.1857190132141113, "learning_rate": 4.7063253012048195e-05, "loss": 0.1498, "step": 15225 }, { "epoch": 91.74698795180723, "grad_norm": 1.732683777809143, "learning_rate": 4.706046407853637e-05, "loss": 0.1414, "step": 15230 }, { "epoch": 91.77710843373494, "grad_norm": 2.2705845832824707, "learning_rate": 4.705767514502454e-05, "loss": 0.1272, "step": 15235 }, { "epoch": 91.80722891566265, "grad_norm": 1.2718278169631958, "learning_rate": 4.705488621151272e-05, "loss": 0.1495, "step": 15240 }, { "epoch": 91.83734939759036, "grad_norm": 1.7926607131958008, "learning_rate": 4.70520972780009e-05, "loss": 0.1427, "step": 15245 }, { "epoch": 91.86746987951807, "grad_norm": 0.5878552794456482, "learning_rate": 4.704930834448907e-05, "loss": 0.0921, "step": 15250 }, { "epoch": 91.89759036144578, "grad_norm": 1.603685975074768, "learning_rate": 4.7046519410977245e-05, "loss": 0.1339, "step": 15255 }, { "epoch": 91.92771084337349, "grad_norm": 1.0628222227096558, "learning_rate": 4.704373047746542e-05, "loss": 0.1122, "step": 15260 }, { "epoch": 91.9578313253012, "grad_norm": 1.1744061708450317, "learning_rate": 4.704094154395359e-05, "loss": 0.1312, "step": 15265 }, { "epoch": 91.98795180722891, "grad_norm": 1.7355620861053467, "learning_rate": 4.703815261044177e-05, "loss": 0.1141, "step": 15270 }, { "epoch": 92.0, "eval_accuracy": 0.9343295320487613, "eval_auc": 0.9789443713631405, "eval_f1": 0.8986035215543412, "eval_loss": 0.2643064260482788, "eval_precision": 0.961038961038961, "eval_recall": 0.8437856328392246, "eval_runtime": 16.749, "eval_samples_per_second": 151.83, "eval_steps_per_second": 0.776, "step": 15272 }, { "epoch": 92.01807228915662, "grad_norm": 0.8560194373130798, "learning_rate": 4.703536367692995e-05, "loss": 0.1348, "step": 15275 }, { "epoch": 92.04819277108433, "grad_norm": 0.6043124198913574, "learning_rate": 4.703257474341812e-05, "loss": 0.1624, "step": 15280 }, { "epoch": 92.07831325301204, "grad_norm": 1.2813502550125122, "learning_rate": 4.7029785809906294e-05, "loss": 0.1363, "step": 15285 }, { "epoch": 92.10843373493977, "grad_norm": 1.1883949041366577, "learning_rate": 4.702699687639447e-05, "loss": 0.1241, "step": 15290 }, { "epoch": 92.13855421686748, "grad_norm": 2.056490421295166, "learning_rate": 4.702420794288264e-05, "loss": 0.1383, "step": 15295 }, { "epoch": 92.16867469879519, "grad_norm": 1.779117226600647, "learning_rate": 4.702141900937082e-05, "loss": 0.1315, "step": 15300 }, { "epoch": 92.1987951807229, "grad_norm": 2.0701324939727783, "learning_rate": 4.7018630075858996e-05, "loss": 0.1325, "step": 15305 }, { "epoch": 92.2289156626506, "grad_norm": 0.8832273483276367, "learning_rate": 4.701584114234717e-05, "loss": 0.1064, "step": 15310 }, { "epoch": 92.25903614457832, "grad_norm": 1.8025341033935547, "learning_rate": 4.7013052208835344e-05, "loss": 0.1213, "step": 15315 }, { "epoch": 92.28915662650603, "grad_norm": 2.2082834243774414, "learning_rate": 4.701026327532352e-05, "loss": 0.1331, "step": 15320 }, { "epoch": 92.31927710843374, "grad_norm": 1.1036571264266968, "learning_rate": 4.700747434181169e-05, "loss": 0.1427, "step": 15325 }, { "epoch": 92.34939759036145, "grad_norm": 1.1703567504882812, "learning_rate": 4.700468540829987e-05, "loss": 0.1182, "step": 15330 }, { "epoch": 92.37951807228916, "grad_norm": 2.180934429168701, "learning_rate": 4.7001896474788046e-05, "loss": 0.166, "step": 15335 }, { "epoch": 92.40963855421687, "grad_norm": 0.8977643847465515, "learning_rate": 4.699910754127621e-05, "loss": 0.1225, "step": 15340 }, { "epoch": 92.43975903614458, "grad_norm": 1.1891638040542603, "learning_rate": 4.6996318607764393e-05, "loss": 0.1297, "step": 15345 }, { "epoch": 92.46987951807229, "grad_norm": 1.1613346338272095, "learning_rate": 4.699352967425257e-05, "loss": 0.1483, "step": 15350 }, { "epoch": 92.5, "grad_norm": 1.102997899055481, "learning_rate": 4.699074074074074e-05, "loss": 0.1559, "step": 15355 }, { "epoch": 92.53012048192771, "grad_norm": 0.9282310605049133, "learning_rate": 4.698795180722892e-05, "loss": 0.1242, "step": 15360 }, { "epoch": 92.56024096385542, "grad_norm": 1.2180817127227783, "learning_rate": 4.6985162873717095e-05, "loss": 0.1544, "step": 15365 }, { "epoch": 92.59036144578313, "grad_norm": 2.0906457901000977, "learning_rate": 4.698237394020526e-05, "loss": 0.1391, "step": 15370 }, { "epoch": 92.62048192771084, "grad_norm": 1.5469666719436646, "learning_rate": 4.697958500669344e-05, "loss": 0.1291, "step": 15375 }, { "epoch": 92.65060240963855, "grad_norm": 1.2399280071258545, "learning_rate": 4.697679607318162e-05, "loss": 0.1557, "step": 15380 }, { "epoch": 92.68072289156626, "grad_norm": 1.096070408821106, "learning_rate": 4.697400713966979e-05, "loss": 0.1335, "step": 15385 }, { "epoch": 92.71084337349397, "grad_norm": 1.0564683675765991, "learning_rate": 4.697121820615797e-05, "loss": 0.129, "step": 15390 }, { "epoch": 92.74096385542168, "grad_norm": 0.9859278202056885, "learning_rate": 4.6968429272646145e-05, "loss": 0.1225, "step": 15395 }, { "epoch": 92.7710843373494, "grad_norm": 1.4117382764816284, "learning_rate": 4.696564033913431e-05, "loss": 0.1513, "step": 15400 }, { "epoch": 92.8012048192771, "grad_norm": 1.4786218404769897, "learning_rate": 4.696285140562249e-05, "loss": 0.1329, "step": 15405 }, { "epoch": 92.83132530120481, "grad_norm": 0.9054820537567139, "learning_rate": 4.6960062472110666e-05, "loss": 0.079, "step": 15410 }, { "epoch": 92.86144578313252, "grad_norm": 1.3725407123565674, "learning_rate": 4.695727353859884e-05, "loss": 0.13, "step": 15415 }, { "epoch": 92.89156626506023, "grad_norm": 1.6865205764770508, "learning_rate": 4.695448460508702e-05, "loss": 0.1606, "step": 15420 }, { "epoch": 92.92168674698796, "grad_norm": 1.0956621170043945, "learning_rate": 4.695169567157519e-05, "loss": 0.1176, "step": 15425 }, { "epoch": 92.95180722891567, "grad_norm": 1.4258737564086914, "learning_rate": 4.694890673806337e-05, "loss": 0.0975, "step": 15430 }, { "epoch": 92.98192771084338, "grad_norm": 1.2521849870681763, "learning_rate": 4.694611780455154e-05, "loss": 0.1715, "step": 15435 }, { "epoch": 93.0, "eval_accuracy": 0.9366889500589854, "eval_auc": 0.9824588900554522, "eval_f1": 0.9022465088038859, "eval_loss": 0.23510372638702393, "eval_precision": 0.964935064935065, "eval_recall": 0.8472063854047891, "eval_runtime": 19.5607, "eval_samples_per_second": 130.006, "eval_steps_per_second": 0.665, "step": 15438 }, { "epoch": 93.01204819277109, "grad_norm": 0.8004704117774963, "learning_rate": 4.6943328871039716e-05, "loss": 0.0874, "step": 15440 }, { "epoch": 93.0421686746988, "grad_norm": 0.9212279319763184, "learning_rate": 4.6940539937527896e-05, "loss": 0.1018, "step": 15445 }, { "epoch": 93.07228915662651, "grad_norm": 1.2655545473098755, "learning_rate": 4.693775100401607e-05, "loss": 0.1464, "step": 15450 }, { "epoch": 93.10240963855422, "grad_norm": 1.1404651403427124, "learning_rate": 4.693496207050424e-05, "loss": 0.1161, "step": 15455 }, { "epoch": 93.13253012048193, "grad_norm": 1.7204524278640747, "learning_rate": 4.693217313699242e-05, "loss": 0.1166, "step": 15460 }, { "epoch": 93.16265060240964, "grad_norm": 2.0438854694366455, "learning_rate": 4.692938420348059e-05, "loss": 0.1214, "step": 15465 }, { "epoch": 93.19277108433735, "grad_norm": 2.632890224456787, "learning_rate": 4.6926595269968765e-05, "loss": 0.1182, "step": 15470 }, { "epoch": 93.22289156626506, "grad_norm": 1.2251503467559814, "learning_rate": 4.6923806336456946e-05, "loss": 0.1437, "step": 15475 }, { "epoch": 93.25301204819277, "grad_norm": 1.5710704326629639, "learning_rate": 4.692101740294512e-05, "loss": 0.0881, "step": 15480 }, { "epoch": 93.28313253012048, "grad_norm": 0.9319076538085938, "learning_rate": 4.691822846943329e-05, "loss": 0.1176, "step": 15485 }, { "epoch": 93.3132530120482, "grad_norm": 1.3066734075546265, "learning_rate": 4.691543953592147e-05, "loss": 0.1278, "step": 15490 }, { "epoch": 93.3433734939759, "grad_norm": 1.5395375490188599, "learning_rate": 4.691265060240964e-05, "loss": 0.1177, "step": 15495 }, { "epoch": 93.37349397590361, "grad_norm": 0.9871209263801575, "learning_rate": 4.6909861668897815e-05, "loss": 0.1598, "step": 15500 }, { "epoch": 93.40361445783132, "grad_norm": 0.8233869671821594, "learning_rate": 4.6907072735385996e-05, "loss": 0.1343, "step": 15505 }, { "epoch": 93.43373493975903, "grad_norm": 1.3258026838302612, "learning_rate": 4.690428380187416e-05, "loss": 0.126, "step": 15510 }, { "epoch": 93.46385542168674, "grad_norm": 1.9142283201217651, "learning_rate": 4.6901494868362336e-05, "loss": 0.1588, "step": 15515 }, { "epoch": 93.49397590361446, "grad_norm": 0.9098950624465942, "learning_rate": 4.689870593485052e-05, "loss": 0.1162, "step": 15520 }, { "epoch": 93.52409638554217, "grad_norm": 1.0858807563781738, "learning_rate": 4.689591700133869e-05, "loss": 0.1241, "step": 15525 }, { "epoch": 93.55421686746988, "grad_norm": 1.2349001169204712, "learning_rate": 4.6893128067826865e-05, "loss": 0.1169, "step": 15530 }, { "epoch": 93.58433734939759, "grad_norm": 0.9307500123977661, "learning_rate": 4.6890339134315045e-05, "loss": 0.0899, "step": 15535 }, { "epoch": 93.6144578313253, "grad_norm": 1.7344714403152466, "learning_rate": 4.688755020080321e-05, "loss": 0.1213, "step": 15540 }, { "epoch": 93.644578313253, "grad_norm": 0.9669047594070435, "learning_rate": 4.6884761267291386e-05, "loss": 0.0895, "step": 15545 }, { "epoch": 93.67469879518072, "grad_norm": 2.3350412845611572, "learning_rate": 4.6881972333779566e-05, "loss": 0.1303, "step": 15550 }, { "epoch": 93.70481927710843, "grad_norm": 1.516924500465393, "learning_rate": 4.687918340026774e-05, "loss": 0.1043, "step": 15555 }, { "epoch": 93.73493975903614, "grad_norm": 3.6865155696868896, "learning_rate": 4.6876394466755914e-05, "loss": 0.1422, "step": 15560 }, { "epoch": 93.76506024096386, "grad_norm": 1.2372610569000244, "learning_rate": 4.6873605533244095e-05, "loss": 0.1153, "step": 15565 }, { "epoch": 93.79518072289157, "grad_norm": 2.35564923286438, "learning_rate": 4.687081659973226e-05, "loss": 0.1343, "step": 15570 }, { "epoch": 93.82530120481928, "grad_norm": 0.6407546997070312, "learning_rate": 4.6868027666220435e-05, "loss": 0.0994, "step": 15575 }, { "epoch": 93.855421686747, "grad_norm": 0.9919431805610657, "learning_rate": 4.6865238732708616e-05, "loss": 0.1158, "step": 15580 }, { "epoch": 93.8855421686747, "grad_norm": 1.9676462411880493, "learning_rate": 4.686244979919679e-05, "loss": 0.1579, "step": 15585 }, { "epoch": 93.91566265060241, "grad_norm": 0.8513187170028687, "learning_rate": 4.6859660865684964e-05, "loss": 0.1263, "step": 15590 }, { "epoch": 93.94578313253012, "grad_norm": 0.6019960045814514, "learning_rate": 4.6856871932173144e-05, "loss": 0.14, "step": 15595 }, { "epoch": 93.97590361445783, "grad_norm": 0.6069483160972595, "learning_rate": 4.685408299866131e-05, "loss": 0.1197, "step": 15600 }, { "epoch": 94.0, "eval_accuracy": 0.936295713723948, "eval_auc": 0.9818925289614135, "eval_f1": 0.9010989010989011, "eval_loss": 0.252782940864563, "eval_precision": 0.9697766097240473, "eval_recall": 0.8415051311288484, "eval_runtime": 17.0499, "eval_samples_per_second": 149.15, "eval_steps_per_second": 0.762, "step": 15604 }, { "epoch": 94.00602409638554, "grad_norm": 0.9746034145355225, "learning_rate": 4.6851294065149485e-05, "loss": 0.1174, "step": 15605 }, { "epoch": 94.03614457831326, "grad_norm": 2.57694411277771, "learning_rate": 4.6848505131637666e-05, "loss": 0.1268, "step": 15610 }, { "epoch": 94.06626506024097, "grad_norm": 0.77326899766922, "learning_rate": 4.684571619812584e-05, "loss": 0.1191, "step": 15615 }, { "epoch": 94.09638554216868, "grad_norm": 1.114821195602417, "learning_rate": 4.684292726461401e-05, "loss": 0.093, "step": 15620 }, { "epoch": 94.12650602409639, "grad_norm": 2.533151149749756, "learning_rate": 4.684013833110219e-05, "loss": 0.1474, "step": 15625 }, { "epoch": 94.1566265060241, "grad_norm": 1.6700258255004883, "learning_rate": 4.683734939759036e-05, "loss": 0.1325, "step": 15630 }, { "epoch": 94.1867469879518, "grad_norm": 1.7010748386383057, "learning_rate": 4.6834560464078535e-05, "loss": 0.0969, "step": 15635 }, { "epoch": 94.21686746987952, "grad_norm": 1.4638324975967407, "learning_rate": 4.6831771530566715e-05, "loss": 0.1433, "step": 15640 }, { "epoch": 94.24698795180723, "grad_norm": 1.4791585206985474, "learning_rate": 4.682898259705489e-05, "loss": 0.1462, "step": 15645 }, { "epoch": 94.27710843373494, "grad_norm": 0.7184770703315735, "learning_rate": 4.682619366354306e-05, "loss": 0.1424, "step": 15650 }, { "epoch": 94.30722891566265, "grad_norm": 1.324702262878418, "learning_rate": 4.6823404730031237e-05, "loss": 0.146, "step": 15655 }, { "epoch": 94.33734939759036, "grad_norm": 1.2018970251083374, "learning_rate": 4.682061579651941e-05, "loss": 0.1174, "step": 15660 }, { "epoch": 94.36746987951807, "grad_norm": 1.1097744703292847, "learning_rate": 4.6817826863007584e-05, "loss": 0.121, "step": 15665 }, { "epoch": 94.39759036144578, "grad_norm": 1.4790349006652832, "learning_rate": 4.6815037929495765e-05, "loss": 0.1149, "step": 15670 }, { "epoch": 94.42771084337349, "grad_norm": 1.5985567569732666, "learning_rate": 4.681224899598394e-05, "loss": 0.1182, "step": 15675 }, { "epoch": 94.4578313253012, "grad_norm": 2.173953056335449, "learning_rate": 4.680946006247211e-05, "loss": 0.1467, "step": 15680 }, { "epoch": 94.48795180722891, "grad_norm": 1.268538475036621, "learning_rate": 4.6806671128960286e-05, "loss": 0.1264, "step": 15685 }, { "epoch": 94.51807228915662, "grad_norm": 1.8958779573440552, "learning_rate": 4.680388219544846e-05, "loss": 0.1349, "step": 15690 }, { "epoch": 94.54819277108433, "grad_norm": 0.5450842976570129, "learning_rate": 4.6801093261936634e-05, "loss": 0.119, "step": 15695 }, { "epoch": 94.57831325301204, "grad_norm": 1.3927003145217896, "learning_rate": 4.6798304328424814e-05, "loss": 0.089, "step": 15700 }, { "epoch": 94.60843373493977, "grad_norm": 1.4803962707519531, "learning_rate": 4.679551539491299e-05, "loss": 0.159, "step": 15705 }, { "epoch": 94.63855421686748, "grad_norm": 1.481036901473999, "learning_rate": 4.679272646140116e-05, "loss": 0.1033, "step": 15710 }, { "epoch": 94.66867469879519, "grad_norm": 2.072739601135254, "learning_rate": 4.6789937527889336e-05, "loss": 0.1313, "step": 15715 }, { "epoch": 94.6987951807229, "grad_norm": 0.9376345872879028, "learning_rate": 4.678714859437751e-05, "loss": 0.116, "step": 15720 }, { "epoch": 94.7289156626506, "grad_norm": 0.8326073288917542, "learning_rate": 4.678435966086569e-05, "loss": 0.1274, "step": 15725 }, { "epoch": 94.75903614457832, "grad_norm": 1.4961594343185425, "learning_rate": 4.6781570727353864e-05, "loss": 0.1398, "step": 15730 }, { "epoch": 94.78915662650603, "grad_norm": 2.2496695518493652, "learning_rate": 4.677878179384204e-05, "loss": 0.1562, "step": 15735 }, { "epoch": 94.81927710843374, "grad_norm": 0.6008087396621704, "learning_rate": 4.677599286033021e-05, "loss": 0.1167, "step": 15740 }, { "epoch": 94.84939759036145, "grad_norm": 1.6028498411178589, "learning_rate": 4.6773203926818385e-05, "loss": 0.1245, "step": 15745 }, { "epoch": 94.87951807228916, "grad_norm": 1.37605881690979, "learning_rate": 4.677041499330656e-05, "loss": 0.1065, "step": 15750 }, { "epoch": 94.90963855421687, "grad_norm": 0.9388094544410706, "learning_rate": 4.676762605979474e-05, "loss": 0.1322, "step": 15755 }, { "epoch": 94.93975903614458, "grad_norm": 0.8253313899040222, "learning_rate": 4.676483712628291e-05, "loss": 0.1097, "step": 15760 }, { "epoch": 94.96987951807229, "grad_norm": 0.8523429036140442, "learning_rate": 4.676204819277109e-05, "loss": 0.1099, "step": 15765 }, { "epoch": 95.0, "grad_norm": 1.5498061180114746, "learning_rate": 4.675925925925926e-05, "loss": 0.1522, "step": 15770 }, { "epoch": 95.0, "eval_accuracy": 0.9500589854502556, "eval_auc": 0.9869014880752757, "eval_f1": 0.9252501471453797, "eval_loss": 0.19789645075798035, "eval_precision": 0.9562043795620438, "eval_recall": 0.8962371721778791, "eval_runtime": 16.8002, "eval_samples_per_second": 151.367, "eval_steps_per_second": 0.774, "step": 15770 }, { "epoch": 95.03012048192771, "grad_norm": 1.4736396074295044, "learning_rate": 4.6756470325747435e-05, "loss": 0.1415, "step": 15775 }, { "epoch": 95.06024096385542, "grad_norm": 2.076514959335327, "learning_rate": 4.675368139223561e-05, "loss": 0.1535, "step": 15780 }, { "epoch": 95.09036144578313, "grad_norm": 1.2584677934646606, "learning_rate": 4.675089245872379e-05, "loss": 0.1, "step": 15785 }, { "epoch": 95.12048192771084, "grad_norm": 1.3247615098953247, "learning_rate": 4.674810352521196e-05, "loss": 0.1273, "step": 15790 }, { "epoch": 95.15060240963855, "grad_norm": 1.2890088558197021, "learning_rate": 4.674531459170014e-05, "loss": 0.1322, "step": 15795 }, { "epoch": 95.18072289156626, "grad_norm": 1.6674821376800537, "learning_rate": 4.674252565818831e-05, "loss": 0.1026, "step": 15800 }, { "epoch": 95.21084337349397, "grad_norm": 0.9363815784454346, "learning_rate": 4.6739736724676484e-05, "loss": 0.1242, "step": 15805 }, { "epoch": 95.24096385542168, "grad_norm": 1.679032564163208, "learning_rate": 4.673694779116466e-05, "loss": 0.1314, "step": 15810 }, { "epoch": 95.2710843373494, "grad_norm": 1.0566807985305786, "learning_rate": 4.673415885765284e-05, "loss": 0.159, "step": 15815 }, { "epoch": 95.3012048192771, "grad_norm": 1.3573179244995117, "learning_rate": 4.673136992414101e-05, "loss": 0.1406, "step": 15820 }, { "epoch": 95.33132530120481, "grad_norm": 0.8744345307350159, "learning_rate": 4.6728580990629186e-05, "loss": 0.1365, "step": 15825 }, { "epoch": 95.36144578313252, "grad_norm": 1.0821994543075562, "learning_rate": 4.672579205711736e-05, "loss": 0.1234, "step": 15830 }, { "epoch": 95.39156626506023, "grad_norm": 1.922343134880066, "learning_rate": 4.6723003123605534e-05, "loss": 0.1118, "step": 15835 }, { "epoch": 95.42168674698796, "grad_norm": 0.9799426794052124, "learning_rate": 4.672021419009371e-05, "loss": 0.0908, "step": 15840 }, { "epoch": 95.45180722891567, "grad_norm": 2.637895345687866, "learning_rate": 4.671742525658189e-05, "loss": 0.105, "step": 15845 }, { "epoch": 95.48192771084338, "grad_norm": 1.9730476140975952, "learning_rate": 4.671463632307006e-05, "loss": 0.1127, "step": 15850 }, { "epoch": 95.51204819277109, "grad_norm": 3.6851091384887695, "learning_rate": 4.6711847389558236e-05, "loss": 0.1292, "step": 15855 }, { "epoch": 95.5421686746988, "grad_norm": 1.5435463190078735, "learning_rate": 4.670905845604641e-05, "loss": 0.162, "step": 15860 }, { "epoch": 95.57228915662651, "grad_norm": 1.4445061683654785, "learning_rate": 4.670626952253458e-05, "loss": 0.1507, "step": 15865 }, { "epoch": 95.60240963855422, "grad_norm": 1.2720803022384644, "learning_rate": 4.670348058902276e-05, "loss": 0.1351, "step": 15870 }, { "epoch": 95.63253012048193, "grad_norm": 1.1591479778289795, "learning_rate": 4.670069165551094e-05, "loss": 0.115, "step": 15875 }, { "epoch": 95.66265060240964, "grad_norm": 1.4791457653045654, "learning_rate": 4.669790272199911e-05, "loss": 0.1304, "step": 15880 }, { "epoch": 95.69277108433735, "grad_norm": 1.037960410118103, "learning_rate": 4.6695113788487285e-05, "loss": 0.1382, "step": 15885 }, { "epoch": 95.72289156626506, "grad_norm": 0.9557424187660217, "learning_rate": 4.669232485497546e-05, "loss": 0.1161, "step": 15890 }, { "epoch": 95.75301204819277, "grad_norm": 1.8750642538070679, "learning_rate": 4.668953592146363e-05, "loss": 0.1205, "step": 15895 }, { "epoch": 95.78313253012048, "grad_norm": 1.0831913948059082, "learning_rate": 4.668674698795181e-05, "loss": 0.0839, "step": 15900 }, { "epoch": 95.8132530120482, "grad_norm": 0.7221660017967224, "learning_rate": 4.668395805443999e-05, "loss": 0.1134, "step": 15905 }, { "epoch": 95.8433734939759, "grad_norm": 1.5886327028274536, "learning_rate": 4.668116912092816e-05, "loss": 0.1243, "step": 15910 }, { "epoch": 95.87349397590361, "grad_norm": 1.270094394683838, "learning_rate": 4.6678380187416335e-05, "loss": 0.1272, "step": 15915 }, { "epoch": 95.90361445783132, "grad_norm": 2.4260833263397217, "learning_rate": 4.667559125390451e-05, "loss": 0.1571, "step": 15920 }, { "epoch": 95.93373493975903, "grad_norm": 0.9783493876457214, "learning_rate": 4.667280232039268e-05, "loss": 0.1062, "step": 15925 }, { "epoch": 95.96385542168674, "grad_norm": 1.5558791160583496, "learning_rate": 4.6670013386880856e-05, "loss": 0.1235, "step": 15930 }, { "epoch": 95.99397590361446, "grad_norm": 1.8295536041259766, "learning_rate": 4.666722445336904e-05, "loss": 0.1616, "step": 15935 }, { "epoch": 96.0, "eval_accuracy": 0.9418010224144711, "eval_auc": 0.9832754082248636, "eval_f1": 0.9121140142517815, "eval_loss": 0.2412232756614685, "eval_precision": 0.9516728624535316, "eval_recall": 0.8757126567844926, "eval_runtime": 17.0487, "eval_samples_per_second": 149.161, "eval_steps_per_second": 0.763, "step": 15936 }, { "epoch": 96.02409638554217, "grad_norm": 1.7150579690933228, "learning_rate": 4.666443551985721e-05, "loss": 0.1433, "step": 15940 }, { "epoch": 96.05421686746988, "grad_norm": 1.854522705078125, "learning_rate": 4.6661646586345384e-05, "loss": 0.1344, "step": 15945 }, { "epoch": 96.08433734939759, "grad_norm": 1.563353180885315, "learning_rate": 4.665885765283356e-05, "loss": 0.1068, "step": 15950 }, { "epoch": 96.1144578313253, "grad_norm": 0.6915176510810852, "learning_rate": 4.665606871932173e-05, "loss": 0.1171, "step": 15955 }, { "epoch": 96.144578313253, "grad_norm": 0.9819210171699524, "learning_rate": 4.6653279785809906e-05, "loss": 0.1068, "step": 15960 }, { "epoch": 96.17469879518072, "grad_norm": 1.0088191032409668, "learning_rate": 4.6650490852298086e-05, "loss": 0.0913, "step": 15965 }, { "epoch": 96.20481927710843, "grad_norm": 1.2183645963668823, "learning_rate": 4.664770191878626e-05, "loss": 0.129, "step": 15970 }, { "epoch": 96.23493975903614, "grad_norm": 0.746493399143219, "learning_rate": 4.6644912985274434e-05, "loss": 0.1636, "step": 15975 }, { "epoch": 96.26506024096386, "grad_norm": 2.4620680809020996, "learning_rate": 4.664212405176261e-05, "loss": 0.1255, "step": 15980 }, { "epoch": 96.29518072289157, "grad_norm": 1.318218469619751, "learning_rate": 4.663933511825078e-05, "loss": 0.1316, "step": 15985 }, { "epoch": 96.32530120481928, "grad_norm": 1.3522661924362183, "learning_rate": 4.663654618473896e-05, "loss": 0.1186, "step": 15990 }, { "epoch": 96.355421686747, "grad_norm": 1.5552898645401, "learning_rate": 4.6633757251227136e-05, "loss": 0.1408, "step": 15995 }, { "epoch": 96.3855421686747, "grad_norm": 0.9430577754974365, "learning_rate": 4.663096831771531e-05, "loss": 0.122, "step": 16000 }, { "epoch": 96.41566265060241, "grad_norm": 0.7161679267883301, "learning_rate": 4.6628179384203484e-05, "loss": 0.1138, "step": 16005 }, { "epoch": 96.44578313253012, "grad_norm": 2.2095699310302734, "learning_rate": 4.662539045069166e-05, "loss": 0.1104, "step": 16010 }, { "epoch": 96.47590361445783, "grad_norm": 1.1146612167358398, "learning_rate": 4.662260151717983e-05, "loss": 0.0945, "step": 16015 }, { "epoch": 96.50602409638554, "grad_norm": 0.7639297246932983, "learning_rate": 4.661981258366801e-05, "loss": 0.0998, "step": 16020 }, { "epoch": 96.53614457831326, "grad_norm": 1.307127594947815, "learning_rate": 4.6617023650156186e-05, "loss": 0.1117, "step": 16025 }, { "epoch": 96.56626506024097, "grad_norm": 1.696721076965332, "learning_rate": 4.661423471664436e-05, "loss": 0.1107, "step": 16030 }, { "epoch": 96.59638554216868, "grad_norm": 2.453157663345337, "learning_rate": 4.661144578313253e-05, "loss": 0.1544, "step": 16035 }, { "epoch": 96.62650602409639, "grad_norm": 1.5119658708572388, "learning_rate": 4.660865684962071e-05, "loss": 0.1055, "step": 16040 }, { "epoch": 96.6566265060241, "grad_norm": 1.1876705884933472, "learning_rate": 4.660586791610888e-05, "loss": 0.1325, "step": 16045 }, { "epoch": 96.6867469879518, "grad_norm": 0.7079341411590576, "learning_rate": 4.660307898259706e-05, "loss": 0.1109, "step": 16050 }, { "epoch": 96.71686746987952, "grad_norm": 2.3505184650421143, "learning_rate": 4.6600290049085235e-05, "loss": 0.0938, "step": 16055 }, { "epoch": 96.74698795180723, "grad_norm": 1.5596070289611816, "learning_rate": 4.65975011155734e-05, "loss": 0.1855, "step": 16060 }, { "epoch": 96.77710843373494, "grad_norm": 0.8612086772918701, "learning_rate": 4.659471218206158e-05, "loss": 0.1204, "step": 16065 }, { "epoch": 96.80722891566265, "grad_norm": 1.3117247819900513, "learning_rate": 4.6591923248549756e-05, "loss": 0.1037, "step": 16070 }, { "epoch": 96.83734939759036, "grad_norm": 1.7731101512908936, "learning_rate": 4.658913431503793e-05, "loss": 0.1445, "step": 16075 }, { "epoch": 96.86746987951807, "grad_norm": 1.4806108474731445, "learning_rate": 4.658634538152611e-05, "loss": 0.1438, "step": 16080 }, { "epoch": 96.89759036144578, "grad_norm": 1.2414298057556152, "learning_rate": 4.6583556448014285e-05, "loss": 0.1147, "step": 16085 }, { "epoch": 96.92771084337349, "grad_norm": 1.7466983795166016, "learning_rate": 4.658076751450245e-05, "loss": 0.1361, "step": 16090 }, { "epoch": 96.9578313253012, "grad_norm": 1.1356420516967773, "learning_rate": 4.657797858099063e-05, "loss": 0.1157, "step": 16095 }, { "epoch": 96.98795180722891, "grad_norm": 2.459758758544922, "learning_rate": 4.6575189647478806e-05, "loss": 0.1374, "step": 16100 }, { "epoch": 97.0, "eval_accuracy": 0.9457333857648447, "eval_auc": 0.9851661303061704, "eval_f1": 0.9173652694610779, "eval_loss": 0.22718866169452667, "eval_precision": 0.9659520807061791, "eval_recall": 0.8734321550741163, "eval_runtime": 16.5732, "eval_samples_per_second": 153.441, "eval_steps_per_second": 0.784, "step": 16102 }, { "epoch": 97.01807228915662, "grad_norm": 1.7173633575439453, "learning_rate": 4.657240071396698e-05, "loss": 0.1034, "step": 16105 }, { "epoch": 97.04819277108433, "grad_norm": 0.6392005681991577, "learning_rate": 4.656961178045516e-05, "loss": 0.1016, "step": 16110 }, { "epoch": 97.07831325301204, "grad_norm": 1.4957361221313477, "learning_rate": 4.6566822846943334e-05, "loss": 0.1364, "step": 16115 }, { "epoch": 97.10843373493977, "grad_norm": 2.0598764419555664, "learning_rate": 4.65640339134315e-05, "loss": 0.1122, "step": 16120 }, { "epoch": 97.13855421686748, "grad_norm": 1.5152614116668701, "learning_rate": 4.656124497991968e-05, "loss": 0.1032, "step": 16125 }, { "epoch": 97.16867469879519, "grad_norm": 2.9211199283599854, "learning_rate": 4.6558456046407856e-05, "loss": 0.1917, "step": 16130 }, { "epoch": 97.1987951807229, "grad_norm": 2.7019996643066406, "learning_rate": 4.655566711289603e-05, "loss": 0.1223, "step": 16135 }, { "epoch": 97.2289156626506, "grad_norm": 1.0914349555969238, "learning_rate": 4.655287817938421e-05, "loss": 0.1117, "step": 16140 }, { "epoch": 97.25903614457832, "grad_norm": 0.7972518801689148, "learning_rate": 4.655008924587238e-05, "loss": 0.1037, "step": 16145 }, { "epoch": 97.28915662650603, "grad_norm": 0.6472540497779846, "learning_rate": 4.654730031236055e-05, "loss": 0.1293, "step": 16150 }, { "epoch": 97.31927710843374, "grad_norm": 0.8009729981422424, "learning_rate": 4.654451137884873e-05, "loss": 0.1242, "step": 16155 }, { "epoch": 97.34939759036145, "grad_norm": 1.0288755893707275, "learning_rate": 4.6541722445336905e-05, "loss": 0.1358, "step": 16160 }, { "epoch": 97.37951807228916, "grad_norm": 0.8072810173034668, "learning_rate": 4.653893351182508e-05, "loss": 0.1229, "step": 16165 }, { "epoch": 97.40963855421687, "grad_norm": 2.337909460067749, "learning_rate": 4.653614457831326e-05, "loss": 0.0932, "step": 16170 }, { "epoch": 97.43975903614458, "grad_norm": 0.561859130859375, "learning_rate": 4.6533355644801426e-05, "loss": 0.0918, "step": 16175 }, { "epoch": 97.46987951807229, "grad_norm": 1.3851932287216187, "learning_rate": 4.65305667112896e-05, "loss": 0.1177, "step": 16180 }, { "epoch": 97.5, "grad_norm": 1.1070224046707153, "learning_rate": 4.652777777777778e-05, "loss": 0.1441, "step": 16185 }, { "epoch": 97.53012048192771, "grad_norm": 1.4459174871444702, "learning_rate": 4.6524988844265955e-05, "loss": 0.1391, "step": 16190 }, { "epoch": 97.56024096385542, "grad_norm": 0.9987706542015076, "learning_rate": 4.652219991075413e-05, "loss": 0.1393, "step": 16195 }, { "epoch": 97.59036144578313, "grad_norm": 1.2517822980880737, "learning_rate": 4.651941097724231e-05, "loss": 0.1147, "step": 16200 }, { "epoch": 97.62048192771084, "grad_norm": 0.6012493968009949, "learning_rate": 4.6516622043730476e-05, "loss": 0.1125, "step": 16205 }, { "epoch": 97.65060240963855, "grad_norm": 0.6572882533073425, "learning_rate": 4.651383311021865e-05, "loss": 0.1406, "step": 16210 }, { "epoch": 97.68072289156626, "grad_norm": 1.877267837524414, "learning_rate": 4.651104417670683e-05, "loss": 0.1172, "step": 16215 }, { "epoch": 97.71084337349397, "grad_norm": 1.3656926155090332, "learning_rate": 4.6508255243195004e-05, "loss": 0.1493, "step": 16220 }, { "epoch": 97.74096385542168, "grad_norm": 2.071478843688965, "learning_rate": 4.650546630968318e-05, "loss": 0.1464, "step": 16225 }, { "epoch": 97.7710843373494, "grad_norm": 1.0450981855392456, "learning_rate": 4.650267737617136e-05, "loss": 0.123, "step": 16230 }, { "epoch": 97.8012048192771, "grad_norm": 1.2422423362731934, "learning_rate": 4.6499888442659526e-05, "loss": 0.1201, "step": 16235 }, { "epoch": 97.83132530120481, "grad_norm": 1.80039381980896, "learning_rate": 4.6497099509147706e-05, "loss": 0.1066, "step": 16240 }, { "epoch": 97.86144578313252, "grad_norm": 2.6740550994873047, "learning_rate": 4.649431057563588e-05, "loss": 0.145, "step": 16245 }, { "epoch": 97.89156626506023, "grad_norm": 1.6974881887435913, "learning_rate": 4.6491521642124054e-05, "loss": 0.1251, "step": 16250 }, { "epoch": 97.92168674698796, "grad_norm": 0.8452017307281494, "learning_rate": 4.6488732708612234e-05, "loss": 0.1148, "step": 16255 }, { "epoch": 97.95180722891567, "grad_norm": 0.8741090893745422, "learning_rate": 4.64859437751004e-05, "loss": 0.1366, "step": 16260 }, { "epoch": 97.98192771084338, "grad_norm": 1.6355156898498535, "learning_rate": 4.6483154841588575e-05, "loss": 0.1239, "step": 16265 }, { "epoch": 98.0, "eval_accuracy": 0.9547778214707039, "eval_auc": 0.9865613292067112, "eval_f1": 0.9329446064139941, "eval_loss": 0.1774035394191742, "eval_precision": 0.954653937947494, "eval_recall": 0.9122006841505131, "eval_runtime": 19.7506, "eval_samples_per_second": 128.756, "eval_steps_per_second": 0.658, "step": 16268 } ], "logging_steps": 5, "max_steps": 99600, "num_input_tokens_seen": 0, "num_train_epochs": 600, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 100, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 44 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.319025282240568e+21, "train_batch_size": 100, "trial_name": null, "trial_params": null }