diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,6 +1,6 @@ { - "best_metric": 0.7183188796043396, - "best_model_checkpoint": "./test_ast\\checkpoint-1260", + "best_metric": 0.6530519723892212, + "best_model_checkpoint": "./test_ast\\checkpoint-1320", "epoch": 15.0, "eval_steps": 5, "global_step": 3915, @@ -11,8731 +11,8731 @@ { "epoch": 0.02, "learning_rate": 0.00019984674329501915, - "loss": 3.2249, + "loss": 3.0547, "step": 5 }, { "epoch": 0.02, - "eval_accuracy": 0.09770114942528736, - "eval_loss": 3.1166629791259766, - "eval_runtime": 430.1734, - "eval_samples_per_second": 0.404, - "eval_steps_per_second": 0.051, + "eval_accuracy": 0.12643678160919541, + "eval_loss": 3.064624309539795, + "eval_runtime": 132.8013, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 5 }, { "epoch": 0.04, "learning_rate": 0.00019964240102171137, - "loss": 3.3654, + "loss": 2.8437, "step": 10 }, { "epoch": 0.04, - "eval_accuracy": 0.16666666666666666, - "eval_loss": 3.1070964336395264, - "eval_runtime": 220.5651, - "eval_samples_per_second": 0.789, - "eval_steps_per_second": 0.1, + "eval_accuracy": 0.15517241379310345, + "eval_loss": 2.6111764907836914, + "eval_runtime": 135.5688, + "eval_samples_per_second": 1.283, + "eval_steps_per_second": 0.162, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.00019938697318007664, - "loss": 2.952, + "loss": 2.7144, "step": 15 }, { "epoch": 0.06, - "eval_accuracy": 0.28160919540229884, - "eval_loss": 2.3668224811553955, - "eval_runtime": 235.0668, - "eval_samples_per_second": 0.74, - "eval_steps_per_second": 0.094, + "eval_accuracy": 0.22988505747126436, + "eval_loss": 2.3521151542663574, + "eval_runtime": 131.8277, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 15 }, { "epoch": 0.08, "learning_rate": 0.0001991315453384419, - "loss": 2.6551, + "loss": 2.3026, "step": 20 }, { "epoch": 0.08, - "eval_accuracy": 0.22988505747126436, - "eval_loss": 3.0940752029418945, - "eval_runtime": 263.2242, - "eval_samples_per_second": 0.661, - "eval_steps_per_second": 0.084, + "eval_accuracy": 0.25862068965517243, + "eval_loss": 2.28005313873291, + "eval_runtime": 131.5426, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.00019887611749680716, - "loss": 3.2285, + "loss": 2.4449, "step": 25 }, { "epoch": 0.1, - "eval_accuracy": 0.13793103448275862, - "eval_loss": 2.4251976013183594, - "eval_runtime": 240.2214, - "eval_samples_per_second": 0.724, - "eval_steps_per_second": 0.092, + "eval_accuracy": 0.20689655172413793, + "eval_loss": 2.2755722999572754, + "eval_runtime": 133.8549, + "eval_samples_per_second": 1.3, + "eval_steps_per_second": 0.164, "step": 25 }, { "epoch": 0.11, "learning_rate": 0.00019862068965517243, - "loss": 2.4251, + "loss": 2.2115, "step": 30 }, { "epoch": 0.11, - "eval_accuracy": 0.15517241379310345, - "eval_loss": 2.3162126541137695, - "eval_runtime": 271.4124, - "eval_samples_per_second": 0.641, - "eval_steps_per_second": 0.081, + "eval_accuracy": 0.3218390804597701, + "eval_loss": 1.9326324462890625, + "eval_runtime": 133.4669, + "eval_samples_per_second": 1.304, + "eval_steps_per_second": 0.165, "step": 30 }, { "epoch": 0.13, "learning_rate": 0.0001983652618135377, - "loss": 2.2216, + "loss": 1.9883, "step": 35 }, { "epoch": 0.13, - "eval_accuracy": 0.2413793103448276, - "eval_loss": 2.2742679119110107, - "eval_runtime": 292.5369, - "eval_samples_per_second": 0.595, - "eval_steps_per_second": 0.075, + "eval_accuracy": 0.13793103448275862, + "eval_loss": 2.199158191680908, + "eval_runtime": 133.3902, + "eval_samples_per_second": 1.304, + "eval_steps_per_second": 0.165, "step": 35 }, { "epoch": 0.15, "learning_rate": 0.00019810983397190295, - "loss": 1.982, + "loss": 1.829, "step": 40 }, { "epoch": 0.15, - "eval_accuracy": 0.25862068965517243, - "eval_loss": 2.558276653289795, - "eval_runtime": 282.7639, - "eval_samples_per_second": 0.615, - "eval_steps_per_second": 0.078, + "eval_accuracy": 0.28735632183908044, + "eval_loss": 2.290984630584717, + "eval_runtime": 132.8831, + "eval_samples_per_second": 1.309, + "eval_steps_per_second": 0.166, "step": 40 }, { "epoch": 0.17, "learning_rate": 0.0001978544061302682, - "loss": 2.2904, + "loss": 1.9662, "step": 45 }, { "epoch": 0.17, - "eval_accuracy": 0.1896551724137931, - "eval_loss": 2.2774674892425537, - "eval_runtime": 272.0899, - "eval_samples_per_second": 0.639, - "eval_steps_per_second": 0.081, + "eval_accuracy": 0.3103448275862069, + "eval_loss": 1.9885331392288208, + "eval_runtime": 132.7666, + "eval_samples_per_second": 1.311, + "eval_steps_per_second": 0.166, "step": 45 }, { "epoch": 0.19, "learning_rate": 0.00019759897828863348, - "loss": 2.2136, + "loss": 2.2471, "step": 50 }, { "epoch": 0.19, - "eval_accuracy": 0.1724137931034483, - "eval_loss": 2.264404535293579, - "eval_runtime": 274.6774, - "eval_samples_per_second": 0.633, - "eval_steps_per_second": 0.08, + "eval_accuracy": 0.19540229885057472, + "eval_loss": 2.141151189804077, + "eval_runtime": 132.4565, + "eval_samples_per_second": 1.314, + "eval_steps_per_second": 0.166, "step": 50 }, { "epoch": 0.21, "learning_rate": 0.00019734355044699872, - "loss": 1.9795, + "loss": 1.8606, "step": 55 }, { "epoch": 0.21, - "eval_accuracy": 0.27586206896551724, - "eval_loss": 2.131664752960205, - "eval_runtime": 289.0641, - "eval_samples_per_second": 0.602, - "eval_steps_per_second": 0.076, + "eval_accuracy": 0.3735632183908046, + "eval_loss": 1.9202948808670044, + "eval_runtime": 133.0296, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 55 }, { "epoch": 0.23, - "learning_rate": 0.000197088122605364, - "loss": 2.1172, + "learning_rate": 0.00019713920817369094, + "loss": 1.8904, "step": 60 }, { "epoch": 0.23, - "eval_accuracy": 0.2988505747126437, - "eval_loss": 2.00719952583313, - "eval_runtime": 237.5063, - "eval_samples_per_second": 0.733, - "eval_steps_per_second": 0.093, + "eval_accuracy": 0.3735632183908046, + "eval_loss": 1.8267676830291748, + "eval_runtime": 140.614, + "eval_samples_per_second": 1.237, + "eval_steps_per_second": 0.156, "step": 60 }, { "epoch": 0.25, - "learning_rate": 0.00019683269476372924, - "loss": 2.1899, + "learning_rate": 0.0001968837803320562, + "loss": 1.9574, "step": 65 }, { "epoch": 0.25, - "eval_accuracy": 0.19540229885057472, - "eval_loss": 1.9535428285598755, - "eval_runtime": 127.0996, - "eval_samples_per_second": 1.369, - "eval_steps_per_second": 0.173, + "eval_accuracy": 0.3563218390804598, + "eval_loss": 1.690549612045288, + "eval_runtime": 140.4999, + "eval_samples_per_second": 1.238, + "eval_steps_per_second": 0.157, "step": 65 }, { "epoch": 0.27, - "learning_rate": 0.00019657726692209453, - "loss": 2.1116, + "learning_rate": 0.00019662835249042147, + "loss": 1.7038, "step": 70 }, { "epoch": 0.27, - "eval_accuracy": 0.22988505747126436, - "eval_loss": 1.9114965200424194, - "eval_runtime": 123.0583, - "eval_samples_per_second": 1.414, - "eval_steps_per_second": 0.179, + "eval_accuracy": 0.3505747126436782, + "eval_loss": 1.8225674629211426, + "eval_runtime": 141.0186, + "eval_samples_per_second": 1.234, + "eval_steps_per_second": 0.156, "step": 70 }, { "epoch": 0.29, - "learning_rate": 0.00019632183908045977, - "loss": 2.0904, + "learning_rate": 0.00019637292464878673, + "loss": 1.9195, "step": 75 }, { "epoch": 0.29, - "eval_accuracy": 0.3160919540229885, - "eval_loss": 1.9307175874710083, - "eval_runtime": 110.3002, - "eval_samples_per_second": 1.578, - "eval_steps_per_second": 0.199, + "eval_accuracy": 0.3793103448275862, + "eval_loss": 1.6538840532302856, + "eval_runtime": 141.7043, + "eval_samples_per_second": 1.228, + "eval_steps_per_second": 0.155, "step": 75 }, { "epoch": 0.31, - "learning_rate": 0.00019606641123882503, - "loss": 1.9124, + "learning_rate": 0.000196117496807152, + "loss": 1.8161, "step": 80 }, { "epoch": 0.31, - "eval_accuracy": 0.3390804597701149, - "eval_loss": 1.8102548122406006, - "eval_runtime": 111.5475, - "eval_samples_per_second": 1.56, - "eval_steps_per_second": 0.197, + "eval_accuracy": 0.3275862068965517, + "eval_loss": 1.7468079328536987, + "eval_runtime": 141.1574, + "eval_samples_per_second": 1.233, + "eval_steps_per_second": 0.156, "step": 80 }, { "epoch": 0.33, - "learning_rate": 0.0001958109833971903, - "loss": 1.6039, + "learning_rate": 0.00019586206896551723, + "loss": 1.5665, "step": 85 }, { "epoch": 0.33, - "eval_accuracy": 0.21839080459770116, - "eval_loss": 1.9069994688034058, - "eval_runtime": 109.692, - "eval_samples_per_second": 1.586, - "eval_steps_per_second": 0.201, + "eval_accuracy": 0.3218390804597701, + "eval_loss": 1.797639012336731, + "eval_runtime": 141.5066, + "eval_samples_per_second": 1.23, + "eval_steps_per_second": 0.155, "step": 85 }, { "epoch": 0.34, - "learning_rate": 0.00019555555555555556, - "loss": 1.5797, + "learning_rate": 0.00019560664112388252, + "loss": 1.5756, "step": 90 }, { "epoch": 0.34, - "eval_accuracy": 0.3620689655172414, - "eval_loss": 1.955863118171692, - "eval_runtime": 110.8668, - "eval_samples_per_second": 1.569, - "eval_steps_per_second": 0.198, + "eval_accuracy": 0.4367816091954023, + "eval_loss": 1.6475659608840942, + "eval_runtime": 141.7711, + "eval_samples_per_second": 1.227, + "eval_steps_per_second": 0.155, "step": 90 }, { "epoch": 0.36, - "learning_rate": 0.00019530012771392082, - "loss": 1.9217, + "learning_rate": 0.00019535121328224776, + "loss": 1.5938, "step": 95 }, { "epoch": 0.36, - "eval_accuracy": 0.28735632183908044, - "eval_loss": 1.7302300930023193, - "eval_runtime": 109.9193, - "eval_samples_per_second": 1.583, - "eval_steps_per_second": 0.2, + "eval_accuracy": 0.3333333333333333, + "eval_loss": 1.6348626613616943, + "eval_runtime": 141.8628, + "eval_samples_per_second": 1.227, + "eval_steps_per_second": 0.155, "step": 95 }, { "epoch": 0.38, - "learning_rate": 0.00019504469987228609, - "loss": 2.1192, + "learning_rate": 0.00019509578544061305, + "loss": 1.8887, "step": 100 }, { "epoch": 0.38, - "eval_accuracy": 0.28735632183908044, - "eval_loss": 2.121290445327759, - "eval_runtime": 114.0902, - "eval_samples_per_second": 1.525, - "eval_steps_per_second": 0.193, + "eval_accuracy": 0.3103448275862069, + "eval_loss": 1.6541311740875244, + "eval_runtime": 141.8903, + "eval_samples_per_second": 1.226, + "eval_steps_per_second": 0.155, "step": 100 }, { "epoch": 0.4, - "learning_rate": 0.00019478927203065135, - "loss": 1.991, + "learning_rate": 0.00019484035759897829, + "loss": 1.7326, "step": 105 }, { "epoch": 0.4, - "eval_accuracy": 0.25287356321839083, - "eval_loss": 1.8870444297790527, - "eval_runtime": 111.5449, - "eval_samples_per_second": 1.56, - "eval_steps_per_second": 0.197, + "eval_accuracy": 0.3275862068965517, + "eval_loss": 1.8677045106887817, + "eval_runtime": 141.7931, + "eval_samples_per_second": 1.227, + "eval_steps_per_second": 0.155, "step": 105 }, { "epoch": 0.42, - "learning_rate": 0.0001945338441890166, - "loss": 1.9855, + "learning_rate": 0.00019458492975734358, + "loss": 2.0043, "step": 110 }, { "epoch": 0.42, - "eval_accuracy": 0.3448275862068966, - "eval_loss": 1.853091835975647, - "eval_runtime": 108.3618, - "eval_samples_per_second": 1.606, - "eval_steps_per_second": 0.203, + "eval_accuracy": 0.2988505747126437, + "eval_loss": 2.1178438663482666, + "eval_runtime": 141.8724, + "eval_samples_per_second": 1.226, + "eval_steps_per_second": 0.155, "step": 110 }, { "epoch": 0.44, - "learning_rate": 0.00019427841634738188, - "loss": 1.6668, + "learning_rate": 0.0001943295019157088, + "loss": 1.4501, "step": 115 }, { "epoch": 0.44, - "eval_accuracy": 0.3218390804597701, - "eval_loss": 2.0078558921813965, - "eval_runtime": 90.0246, - "eval_samples_per_second": 1.933, - "eval_steps_per_second": 0.244, + "eval_accuracy": 0.43103448275862066, + "eval_loss": 1.6648517847061157, + "eval_runtime": 144.1195, + "eval_samples_per_second": 1.207, + "eval_steps_per_second": 0.153, "step": 115 }, { "epoch": 0.46, - "learning_rate": 0.00019402298850574714, - "loss": 1.4628, + "learning_rate": 0.00019407407407407408, + "loss": 1.2894, "step": 120 }, { "epoch": 0.46, - "eval_accuracy": 0.3793103448275862, - "eval_loss": 1.891345500946045, - "eval_runtime": 90.081, - "eval_samples_per_second": 1.932, - "eval_steps_per_second": 0.244, + "eval_accuracy": 0.4425287356321839, + "eval_loss": 1.594812273979187, + "eval_runtime": 137.6698, + "eval_samples_per_second": 1.264, + "eval_steps_per_second": 0.16, "step": 120 }, { "epoch": 0.48, - "learning_rate": 0.0001937675606641124, - "loss": 1.8827, + "learning_rate": 0.00019381864623243934, + "loss": 1.4052, "step": 125 }, { "epoch": 0.48, - "eval_accuracy": 0.2988505747126437, - "eval_loss": 1.7698206901550293, - "eval_runtime": 86.9617, - "eval_samples_per_second": 2.001, - "eval_steps_per_second": 0.253, + "eval_accuracy": 0.39655172413793105, + "eval_loss": 1.7239457368850708, + "eval_runtime": 143.1925, + "eval_samples_per_second": 1.215, + "eval_steps_per_second": 0.154, "step": 125 }, { "epoch": 0.5, - "learning_rate": 0.00019351213282247767, - "loss": 1.9941, + "learning_rate": 0.0001935632183908046, + "loss": 1.8974, "step": 130 }, { "epoch": 0.5, - "eval_accuracy": 0.29310344827586204, - "eval_loss": 1.7076359987258911, - "eval_runtime": 91.6164, - "eval_samples_per_second": 1.899, - "eval_steps_per_second": 0.24, + "eval_accuracy": 0.3218390804597701, + "eval_loss": 1.5980931520462036, + "eval_runtime": 140.1647, + "eval_samples_per_second": 1.241, + "eval_steps_per_second": 0.157, "step": 130 }, { "epoch": 0.52, - "learning_rate": 0.00019325670498084293, - "loss": 1.844, + "learning_rate": 0.00019330779054916987, + "loss": 1.7137, "step": 135 }, { "epoch": 0.52, - "eval_accuracy": 0.29310344827586204, - "eval_loss": 1.722959280014038, - "eval_runtime": 87.2052, - "eval_samples_per_second": 1.995, - "eval_steps_per_second": 0.252, + "eval_accuracy": 0.3793103448275862, + "eval_loss": 1.4834296703338623, + "eval_runtime": 140.4065, + "eval_samples_per_second": 1.239, + "eval_steps_per_second": 0.157, "step": 135 }, { "epoch": 0.54, - "learning_rate": 0.0001930012771392082, - "loss": 1.5423, + "learning_rate": 0.00019305236270753513, + "loss": 1.3159, "step": 140 }, { "epoch": 0.54, - "eval_accuracy": 0.3793103448275862, - "eval_loss": 1.6389808654785156, - "eval_runtime": 87.1239, - "eval_samples_per_second": 1.997, - "eval_steps_per_second": 0.253, + "eval_accuracy": 0.5114942528735632, + "eval_loss": 1.342922329902649, + "eval_runtime": 140.6082, + "eval_samples_per_second": 1.237, + "eval_steps_per_second": 0.156, "step": 140 }, { "epoch": 0.56, - "learning_rate": 0.00019274584929757346, - "loss": 1.9086, + "learning_rate": 0.0001927969348659004, + "loss": 1.4547, "step": 145 }, { "epoch": 0.56, - "eval_accuracy": 0.3620689655172414, - "eval_loss": 1.720744013786316, - "eval_runtime": 85.5891, - "eval_samples_per_second": 2.033, - "eval_steps_per_second": 0.257, + "eval_accuracy": 0.43103448275862066, + "eval_loss": 1.4868438243865967, + "eval_runtime": 142.2009, + "eval_samples_per_second": 1.224, + "eval_steps_per_second": 0.155, "step": 145 }, { "epoch": 0.57, - "learning_rate": 0.0001924904214559387, - "loss": 1.572, + "learning_rate": 0.00019254150702426566, + "loss": 1.103, "step": 150 }, { "epoch": 0.57, - "eval_accuracy": 0.3160919540229885, - "eval_loss": 1.8218252658843994, - "eval_runtime": 88.7779, - "eval_samples_per_second": 1.96, - "eval_steps_per_second": 0.248, + "eval_accuracy": 0.40229885057471265, + "eval_loss": 1.5722764730453491, + "eval_runtime": 140.4303, + "eval_samples_per_second": 1.239, + "eval_steps_per_second": 0.157, "step": 150 }, { "epoch": 0.59, - "learning_rate": 0.00019223499361430398, - "loss": 1.8335, + "learning_rate": 0.00019228607918263092, + "loss": 1.8516, "step": 155 }, { "epoch": 0.59, - "eval_accuracy": 0.39080459770114945, - "eval_loss": 1.6242988109588623, - "eval_runtime": 87.9658, - "eval_samples_per_second": 1.978, - "eval_steps_per_second": 0.25, + "eval_accuracy": 0.47126436781609193, + "eval_loss": 1.3277612924575806, + "eval_runtime": 140.4968, + "eval_samples_per_second": 1.238, + "eval_steps_per_second": 0.157, "step": 155 }, { "epoch": 0.61, - "learning_rate": 0.00019197956577266922, - "loss": 1.5903, + "learning_rate": 0.00019203065134099618, + "loss": 1.3584, "step": 160 }, { "epoch": 0.61, - "eval_accuracy": 0.3850574712643678, - "eval_loss": 1.634774088859558, - "eval_runtime": 87.315, - "eval_samples_per_second": 1.993, - "eval_steps_per_second": 0.252, + "eval_accuracy": 0.4425287356321839, + "eval_loss": 1.4962925910949707, + "eval_runtime": 140.6072, + "eval_samples_per_second": 1.237, + "eval_steps_per_second": 0.156, "step": 160 }, { "epoch": 0.63, - "learning_rate": 0.0001917241379310345, - "loss": 1.7064, + "learning_rate": 0.00019177522349936145, + "loss": 1.7123, "step": 165 }, { "epoch": 0.63, "eval_accuracy": 0.42528735632183906, - "eval_loss": 1.5469759702682495, - "eval_runtime": 87.4736, - "eval_samples_per_second": 1.989, - "eval_steps_per_second": 0.252, + "eval_loss": 1.4932773113250732, + "eval_runtime": 137.6936, + "eval_samples_per_second": 1.264, + "eval_steps_per_second": 0.16, "step": 165 }, { "epoch": 0.65, - "learning_rate": 0.00019146871008939975, - "loss": 1.3325, + "learning_rate": 0.0001915197956577267, + "loss": 1.123, "step": 170 }, { "epoch": 0.65, - "eval_accuracy": 0.43103448275862066, - "eval_loss": 1.5237237215042114, - "eval_runtime": 88.9581, - "eval_samples_per_second": 1.956, - "eval_steps_per_second": 0.247, + "eval_accuracy": 0.45977011494252873, + "eval_loss": 1.396934986114502, + "eval_runtime": 137.8732, + "eval_samples_per_second": 1.262, + "eval_steps_per_second": 0.16, "step": 170 }, { "epoch": 0.67, - "learning_rate": 0.00019121328224776504, - "loss": 1.5154, + "learning_rate": 0.00019126436781609197, + "loss": 1.3548, "step": 175 }, { "epoch": 0.67, - "eval_accuracy": 0.42528735632183906, - "eval_loss": 1.571236491203308, - "eval_runtime": 84.6969, - "eval_samples_per_second": 2.054, - "eval_steps_per_second": 0.26, + "eval_accuracy": 0.46551724137931033, + "eval_loss": 1.5320322513580322, + "eval_runtime": 133.0083, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 175 }, { "epoch": 0.69, - "learning_rate": 0.00019095785440613027, - "loss": 1.7564, + "learning_rate": 0.00019100893997445724, + "loss": 1.7216, "step": 180 }, { "epoch": 0.69, - "eval_accuracy": 0.2988505747126437, - "eval_loss": 1.5861365795135498, - "eval_runtime": 89.2791, - "eval_samples_per_second": 1.949, - "eval_steps_per_second": 0.246, + "eval_accuracy": 0.39655172413793105, + "eval_loss": 1.4706974029541016, + "eval_runtime": 134.1308, + "eval_samples_per_second": 1.297, + "eval_steps_per_second": 0.164, "step": 180 }, { "epoch": 0.71, - "learning_rate": 0.00019070242656449554, - "loss": 1.3903, + "learning_rate": 0.0001907535121328225, + "loss": 1.3303, "step": 185 }, { "epoch": 0.71, - "eval_accuracy": 0.25862068965517243, - "eval_loss": 1.8145408630371094, - "eval_runtime": 85.8558, - "eval_samples_per_second": 2.027, - "eval_steps_per_second": 0.256, + "eval_accuracy": 0.39655172413793105, + "eval_loss": 1.4342669248580933, + "eval_runtime": 133.9911, + "eval_samples_per_second": 1.299, + "eval_steps_per_second": 0.164, "step": 185 }, { "epoch": 0.73, - "learning_rate": 0.0001904469987228608, - "loss": 2.0853, + "learning_rate": 0.00019049808429118774, + "loss": 1.4668, "step": 190 }, { "epoch": 0.73, - "eval_accuracy": 0.367816091954023, - "eval_loss": 1.5079203844070435, - "eval_runtime": 89.4633, - "eval_samples_per_second": 1.945, - "eval_steps_per_second": 0.246, + "eval_accuracy": 0.4425287356321839, + "eval_loss": 1.375001311302185, + "eval_runtime": 133.25, + "eval_samples_per_second": 1.306, + "eval_steps_per_second": 0.165, "step": 190 }, { "epoch": 0.75, "learning_rate": 0.00019024265644955303, - "loss": 1.5312, + "loss": 1.5037, "step": 195 }, { "epoch": 0.75, - "eval_accuracy": 0.4482758620689655, - "eval_loss": 1.4408893585205078, - "eval_runtime": 87.6476, - "eval_samples_per_second": 1.985, - "eval_steps_per_second": 0.251, + "eval_accuracy": 0.4367816091954023, + "eval_loss": 1.3593645095825195, + "eval_runtime": 133.4768, + "eval_samples_per_second": 1.304, + "eval_steps_per_second": 0.165, "step": 195 }, { "epoch": 0.77, "learning_rate": 0.00018998722860791826, - "loss": 1.2828, + "loss": 1.2836, "step": 200 }, { "epoch": 0.77, - "eval_accuracy": 0.39655172413793105, - "eval_loss": 1.600140929222107, - "eval_runtime": 86.6087, - "eval_samples_per_second": 2.009, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.4827586206896552, + "eval_loss": 1.3348760604858398, + "eval_runtime": 133.1452, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 200 }, { "epoch": 0.79, "learning_rate": 0.00018973180076628355, - "loss": 1.9389, + "loss": 1.5183, "step": 205 }, { "epoch": 0.79, - "eval_accuracy": 0.367816091954023, - "eval_loss": 1.7927314043045044, - "eval_runtime": 90.3234, - "eval_samples_per_second": 1.926, - "eval_steps_per_second": 0.244, + "eval_accuracy": 0.3275862068965517, + "eval_loss": 1.6030648946762085, + "eval_runtime": 133.0355, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 205 }, { "epoch": 0.8, "learning_rate": 0.0001894763729246488, - "loss": 1.5486, + "loss": 1.4127, "step": 210 }, { "epoch": 0.8, - "eval_accuracy": 0.39080459770114945, - "eval_loss": 1.5749437808990479, - "eval_runtime": 86.7654, - "eval_samples_per_second": 2.005, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.5172413793103449, + "eval_loss": 1.4263323545455933, + "eval_runtime": 133.148, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 210 }, { "epoch": 0.82, "learning_rate": 0.00018922094508301408, - "loss": 1.4306, + "loss": 1.1152, "step": 215 }, { "epoch": 0.82, - "eval_accuracy": 0.28160919540229884, - "eval_loss": 1.7231699228286743, - "eval_runtime": 87.5623, - "eval_samples_per_second": 1.987, - "eval_steps_per_second": 0.251, + "eval_accuracy": 0.4540229885057471, + "eval_loss": 1.3836369514465332, + "eval_runtime": 136.7056, + "eval_samples_per_second": 1.273, + "eval_steps_per_second": 0.161, "step": 215 }, { "epoch": 0.84, "learning_rate": 0.00018896551724137932, - "loss": 1.814, + "loss": 1.5974, "step": 220 }, { "epoch": 0.84, - "eval_accuracy": 0.25862068965517243, - "eval_loss": 1.7349094152450562, - "eval_runtime": 87.3298, - "eval_samples_per_second": 1.992, - "eval_steps_per_second": 0.252, + "eval_accuracy": 0.47701149425287354, + "eval_loss": 1.433830738067627, + "eval_runtime": 132.8306, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 220 }, { "epoch": 0.86, "learning_rate": 0.00018871008939974458, - "loss": 1.7483, + "loss": 1.6355, "step": 225 }, { "epoch": 0.86, - "eval_accuracy": 0.4482758620689655, - "eval_loss": 1.606767177581787, - "eval_runtime": 88.6573, - "eval_samples_per_second": 1.963, - "eval_steps_per_second": 0.248, + "eval_accuracy": 0.5632183908045977, + "eval_loss": 1.3150310516357422, + "eval_runtime": 133.4297, + "eval_samples_per_second": 1.304, + "eval_steps_per_second": 0.165, "step": 225 }, { "epoch": 0.88, "learning_rate": 0.00018845466155810984, - "loss": 1.836, + "loss": 1.3566, "step": 230 }, { "epoch": 0.88, - "eval_accuracy": 0.43103448275862066, - "eval_loss": 1.6200461387634277, - "eval_runtime": 86.663, - "eval_samples_per_second": 2.008, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.4482758620689655, + "eval_loss": 1.5567545890808105, + "eval_runtime": 132.1689, + "eval_samples_per_second": 1.316, + "eval_steps_per_second": 0.166, "step": 230 }, { "epoch": 0.9, "learning_rate": 0.0001881992337164751, - "loss": 1.8752, + "loss": 1.9474, "step": 235 }, { "epoch": 0.9, - "eval_accuracy": 0.41954022988505746, - "eval_loss": 1.4891613721847534, - "eval_runtime": 87.2585, - "eval_samples_per_second": 1.994, - "eval_steps_per_second": 0.252, + "eval_accuracy": 0.47701149425287354, + "eval_loss": 1.5523122549057007, + "eval_runtime": 132.1649, + "eval_samples_per_second": 1.317, + "eval_steps_per_second": 0.166, "step": 235 }, { "epoch": 0.92, "learning_rate": 0.00018794380587484037, - "loss": 1.3274, + "loss": 1.0851, "step": 240 }, { "epoch": 0.92, - "eval_accuracy": 0.4482758620689655, - "eval_loss": 1.5101828575134277, - "eval_runtime": 89.5236, - "eval_samples_per_second": 1.944, - "eval_steps_per_second": 0.246, + "eval_accuracy": 0.47701149425287354, + "eval_loss": 1.5121805667877197, + "eval_runtime": 131.6317, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 240 }, { "epoch": 0.94, "learning_rate": 0.00018768837803320563, - "loss": 1.1109, + "loss": 1.1129, "step": 245 }, { "epoch": 0.94, - "eval_accuracy": 0.45977011494252873, - "eval_loss": 1.6440746784210205, - "eval_runtime": 86.2143, - "eval_samples_per_second": 2.018, - "eval_steps_per_second": 0.255, + "eval_accuracy": 0.41954022988505746, + "eval_loss": 1.6791616678237915, + "eval_runtime": 131.7928, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 245 }, { "epoch": 0.96, "learning_rate": 0.0001874329501915709, - "loss": 1.137, + "loss": 0.9682, "step": 250 }, { "epoch": 0.96, - "eval_accuracy": 0.4885057471264368, - "eval_loss": 1.6819708347320557, - "eval_runtime": 86.701, - "eval_samples_per_second": 2.007, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.4827586206896552, + "eval_loss": 1.5817574262619019, + "eval_runtime": 131.5171, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 250 }, { "epoch": 0.98, "learning_rate": 0.00018717752234993616, - "loss": 2.029, + "loss": 1.9887, "step": 255 }, { "epoch": 0.98, - "eval_accuracy": 0.4425287356321839, - "eval_loss": 1.5568251609802246, - "eval_runtime": 89.066, - "eval_samples_per_second": 1.954, - "eval_steps_per_second": 0.247, + "eval_accuracy": 0.5402298850574713, + "eval_loss": 1.4138919115066528, + "eval_runtime": 131.7638, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 255 }, { "epoch": 1.0, "learning_rate": 0.0001869220945083014, - "loss": 1.3499, + "loss": 1.3416, "step": 260 }, { "epoch": 1.0, - "eval_accuracy": 0.47126436781609193, - "eval_loss": 1.5453182458877563, - "eval_runtime": 88.7892, - "eval_samples_per_second": 1.96, - "eval_steps_per_second": 0.248, + "eval_accuracy": 0.46551724137931033, + "eval_loss": 1.4067350625991821, + "eval_runtime": 131.7587, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 260 }, { "epoch": 1.02, "learning_rate": 0.0001866666666666667, - "loss": 1.6062, + "loss": 1.4848, "step": 265 }, { "epoch": 1.02, - "eval_accuracy": 0.3850574712643678, - "eval_loss": 1.7614227533340454, - "eval_runtime": 88.9814, - "eval_samples_per_second": 1.955, - "eval_steps_per_second": 0.247, + "eval_accuracy": 0.4425287356321839, + "eval_loss": 1.7022287845611572, + "eval_runtime": 131.6479, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 265 }, { "epoch": 1.03, "learning_rate": 0.00018641123882503192, - "loss": 1.2653, + "loss": 1.1562, "step": 270 }, { "epoch": 1.03, - "eval_accuracy": 0.40804597701149425, - "eval_loss": 1.680598497390747, - "eval_runtime": 89.0981, - "eval_samples_per_second": 1.953, - "eval_steps_per_second": 0.247, + "eval_accuracy": 0.4540229885057471, + "eval_loss": 1.6273655891418457, + "eval_runtime": 134.6063, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.163, "step": 270 }, { "epoch": 1.05, "learning_rate": 0.0001861558109833972, - "loss": 1.5162, + "loss": 1.3024, "step": 275 }, { "epoch": 1.05, - "eval_accuracy": 0.3160919540229885, - "eval_loss": 2.1192500591278076, - "eval_runtime": 87.4979, - "eval_samples_per_second": 1.989, - "eval_steps_per_second": 0.251, + "eval_accuracy": 0.41954022988505746, + "eval_loss": 1.60133695602417, + "eval_runtime": 131.5705, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 275 }, { "epoch": 1.07, "learning_rate": 0.00018590038314176245, - "loss": 1.8098, + "loss": 1.2188, "step": 280 }, { "epoch": 1.07, - "eval_accuracy": 0.4540229885057471, - "eval_loss": 1.5041106939315796, - "eval_runtime": 86.6499, - "eval_samples_per_second": 2.008, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.5172413793103449, + "eval_loss": 1.4648607969284058, + "eval_runtime": 131.6668, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 280 }, { "epoch": 1.09, "learning_rate": 0.00018564495530012774, - "loss": 1.5888, + "loss": 1.5389, "step": 285 }, { "epoch": 1.09, - "eval_accuracy": 0.3735632183908046, - "eval_loss": 1.6979694366455078, - "eval_runtime": 84.5366, - "eval_samples_per_second": 2.058, - "eval_steps_per_second": 0.26, + "eval_accuracy": 0.4540229885057471, + "eval_loss": 1.4482660293579102, + "eval_runtime": 131.5739, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 285 }, { "epoch": 1.11, "learning_rate": 0.00018538952745849298, - "loss": 1.57, + "loss": 1.4023, "step": 290 }, { "epoch": 1.11, - "eval_accuracy": 0.3850574712643678, - "eval_loss": 1.5114161968231201, - "eval_runtime": 84.8072, - "eval_samples_per_second": 2.052, - "eval_steps_per_second": 0.259, + "eval_accuracy": 0.5114942528735632, + "eval_loss": 1.3087146282196045, + "eval_runtime": 134.5049, + "eval_samples_per_second": 1.294, + "eval_steps_per_second": 0.164, "step": 290 }, { "epoch": 1.13, "learning_rate": 0.00018513409961685824, - "loss": 1.3931, + "loss": 1.3401, "step": 295 }, { "epoch": 1.13, - "eval_accuracy": 0.40804597701149425, - "eval_loss": 1.5041882991790771, - "eval_runtime": 84.3882, - "eval_samples_per_second": 2.062, - "eval_steps_per_second": 0.261, + "eval_accuracy": 0.4942528735632184, + "eval_loss": 1.3514604568481445, + "eval_runtime": 133.6657, + "eval_samples_per_second": 1.302, + "eval_steps_per_second": 0.165, "step": 295 }, { "epoch": 1.15, "learning_rate": 0.0001848786717752235, - "loss": 1.494, + "loss": 1.6219, "step": 300 }, { "epoch": 1.15, - "eval_accuracy": 0.4425287356321839, - "eval_loss": 1.4944647550582886, - "eval_runtime": 87.1429, - "eval_samples_per_second": 1.997, - "eval_steps_per_second": 0.252, + "eval_accuracy": 0.45977011494252873, + "eval_loss": 1.301418423652649, + "eval_runtime": 137.1049, + "eval_samples_per_second": 1.269, + "eval_steps_per_second": 0.16, "step": 300 }, { "epoch": 1.17, "learning_rate": 0.00018462324393358877, - "loss": 1.2355, + "loss": 1.2755, "step": 305 }, { "epoch": 1.17, - "eval_accuracy": 0.47701149425287354, - "eval_loss": 1.4152653217315674, - "eval_runtime": 84.3818, - "eval_samples_per_second": 2.062, - "eval_steps_per_second": 0.261, + "eval_accuracy": 0.4942528735632184, + "eval_loss": 1.3619904518127441, + "eval_runtime": 133.0134, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 305 }, { "epoch": 1.19, "learning_rate": 0.00018436781609195403, - "loss": 1.9234, + "loss": 1.2092, "step": 310 }, { "epoch": 1.19, - "eval_accuracy": 0.46551724137931033, - "eval_loss": 1.3996832370758057, - "eval_runtime": 87.1832, - "eval_samples_per_second": 1.996, - "eval_steps_per_second": 0.252, + "eval_accuracy": 0.5689655172413793, + "eval_loss": 1.1019761562347412, + "eval_runtime": 136.2481, + "eval_samples_per_second": 1.277, + "eval_steps_per_second": 0.161, "step": 310 }, { "epoch": 1.21, "learning_rate": 0.0001841123882503193, - "loss": 1.2396, + "loss": 1.0885, "step": 315 }, { "epoch": 1.21, - "eval_accuracy": 0.46551724137931033, - "eval_loss": 1.3299652338027954, - "eval_runtime": 84.5376, - "eval_samples_per_second": 2.058, - "eval_steps_per_second": 0.26, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.226359248161316, + "eval_runtime": 132.4179, + "eval_samples_per_second": 1.314, + "eval_steps_per_second": 0.166, "step": 315 }, { "epoch": 1.23, "learning_rate": 0.00018385696040868456, - "loss": 1.8784, + "loss": 1.7791, "step": 320 }, { "epoch": 1.23, - "eval_accuracy": 0.4425287356321839, - "eval_loss": 1.5061637163162231, - "eval_runtime": 84.8318, - "eval_samples_per_second": 2.051, - "eval_steps_per_second": 0.259, + "eval_accuracy": 0.4885057471264368, + "eval_loss": 1.3002170324325562, + "eval_runtime": 132.5532, + "eval_samples_per_second": 1.313, + "eval_steps_per_second": 0.166, "step": 320 }, { "epoch": 1.25, "learning_rate": 0.00018360153256704982, - "loss": 1.2335, + "loss": 1.1917, "step": 325 }, { "epoch": 1.25, "eval_accuracy": 0.46551724137931033, - "eval_loss": 1.3658419847488403, - "eval_runtime": 86.8661, - "eval_samples_per_second": 2.003, - "eval_steps_per_second": 0.253, + "eval_loss": 1.3063517808914185, + "eval_runtime": 136.0253, + "eval_samples_per_second": 1.279, + "eval_steps_per_second": 0.162, "step": 325 }, { "epoch": 1.26, "learning_rate": 0.00018334610472541506, - "loss": 1.2988, + "loss": 1.2198, "step": 330 }, { "epoch": 1.26, - "eval_accuracy": 0.5114942528735632, - "eval_loss": 1.4404170513153076, - "eval_runtime": 84.7349, - "eval_samples_per_second": 2.053, - "eval_steps_per_second": 0.26, + "eval_accuracy": 0.5, + "eval_loss": 1.2917166948318481, + "eval_runtime": 132.7461, + "eval_samples_per_second": 1.311, + "eval_steps_per_second": 0.166, "step": 330 }, { "epoch": 1.28, "learning_rate": 0.00018309067688378035, - "loss": 1.6458, + "loss": 1.1334, "step": 335 }, { "epoch": 1.28, - "eval_accuracy": 0.47701149425287354, - "eval_loss": 1.389655590057373, - "eval_runtime": 86.6616, - "eval_samples_per_second": 2.008, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.4827586206896552, + "eval_loss": 1.4044328927993774, + "eval_runtime": 134.1548, + "eval_samples_per_second": 1.297, + "eval_steps_per_second": 0.164, "step": 335 }, { "epoch": 1.3, "learning_rate": 0.00018283524904214558, - "loss": 1.4325, + "loss": 0.7824, "step": 340 }, { "epoch": 1.3, - "eval_accuracy": 0.3390804597701149, - "eval_loss": 1.943527102470398, - "eval_runtime": 84.9859, - "eval_samples_per_second": 2.047, - "eval_steps_per_second": 0.259, + "eval_accuracy": 0.5344827586206896, + "eval_loss": 1.2563246488571167, + "eval_runtime": 134.0894, + "eval_samples_per_second": 1.298, + "eval_steps_per_second": 0.164, "step": 340 }, { "epoch": 1.32, "learning_rate": 0.00018257982120051087, - "loss": 1.8258, + "loss": 1.5005, "step": 345 }, { "epoch": 1.32, - "eval_accuracy": 0.41379310344827586, - "eval_loss": 1.674710988998413, - "eval_runtime": 85.9104, - "eval_samples_per_second": 2.025, - "eval_steps_per_second": 0.256, + "eval_accuracy": 0.5402298850574713, + "eval_loss": 1.3932344913482666, + "eval_runtime": 132.8468, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 345 }, { "epoch": 1.34, "learning_rate": 0.0001823243933588761, - "loss": 1.6398, + "loss": 1.3661, "step": 350 }, { "epoch": 1.34, - "eval_accuracy": 0.43103448275862066, - "eval_loss": 1.537279486656189, - "eval_runtime": 87.0005, - "eval_samples_per_second": 2.0, - "eval_steps_per_second": 0.253, + "eval_accuracy": 0.5057471264367817, + "eval_loss": 1.5287795066833496, + "eval_runtime": 133.3154, + "eval_samples_per_second": 1.305, + "eval_steps_per_second": 0.165, "step": 350 }, { "epoch": 1.36, "learning_rate": 0.0001820689655172414, - "loss": 1.3836, + "loss": 1.0608, "step": 355 }, { "epoch": 1.36, - "eval_accuracy": 0.4540229885057471, - "eval_loss": 1.530836820602417, - "eval_runtime": 85.6209, - "eval_samples_per_second": 2.032, - "eval_steps_per_second": 0.257, + "eval_accuracy": 0.4827586206896552, + "eval_loss": 1.6409446001052856, + "eval_runtime": 136.3939, + "eval_samples_per_second": 1.276, + "eval_steps_per_second": 0.161, "step": 355 }, { "epoch": 1.38, "learning_rate": 0.00018181353767560664, - "loss": 1.1067, + "loss": 1.3781, "step": 360 }, { "epoch": 1.38, - "eval_accuracy": 0.45977011494252873, - "eval_loss": 1.5031547546386719, - "eval_runtime": 86.5596, - "eval_samples_per_second": 2.01, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.41379310344827586, + "eval_loss": 1.4301259517669678, + "eval_runtime": 132.5893, + "eval_samples_per_second": 1.312, + "eval_steps_per_second": 0.166, "step": 360 }, { "epoch": 1.4, "learning_rate": 0.0001815581098339719, - "loss": 1.4948, + "loss": 1.3579, "step": 365 }, { "epoch": 1.4, - "eval_accuracy": 0.47701149425287354, - "eval_loss": 1.4820023775100708, - "eval_runtime": 87.8688, - "eval_samples_per_second": 1.98, - "eval_steps_per_second": 0.25, + "eval_accuracy": 0.47126436781609193, + "eval_loss": 1.3473687171936035, + "eval_runtime": 135.8729, + "eval_samples_per_second": 1.281, + "eval_steps_per_second": 0.162, "step": 365 }, { "epoch": 1.42, "learning_rate": 0.00018130268199233716, - "loss": 1.3582, + "loss": 1.536, "step": 370 }, { "epoch": 1.42, - "eval_accuracy": 0.41954022988505746, - "eval_loss": 1.455491304397583, - "eval_runtime": 85.3532, - "eval_samples_per_second": 2.039, - "eval_steps_per_second": 0.258, + "eval_accuracy": 0.43103448275862066, + "eval_loss": 1.419403076171875, + "eval_runtime": 133.0684, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 370 }, { "epoch": 1.44, "learning_rate": 0.00018104725415070243, - "loss": 1.2616, + "loss": 1.3282, "step": 375 }, { "epoch": 1.44, - "eval_accuracy": 0.46551724137931033, - "eval_loss": 1.3622076511383057, - "eval_runtime": 85.6174, - "eval_samples_per_second": 2.032, - "eval_steps_per_second": 0.257, + "eval_accuracy": 0.5057471264367817, + "eval_loss": 1.365115761756897, + "eval_runtime": 132.7207, + "eval_samples_per_second": 1.311, + "eval_steps_per_second": 0.166, "step": 375 }, { "epoch": 1.46, "learning_rate": 0.0001807918263090677, - "loss": 1.4582, + "loss": 1.4472, "step": 380 }, { "epoch": 1.46, - "eval_accuracy": 0.4942528735632184, - "eval_loss": 1.2709373235702515, - "eval_runtime": 87.9037, - "eval_samples_per_second": 1.979, - "eval_steps_per_second": 0.25, + "eval_accuracy": 0.5402298850574713, + "eval_loss": 1.1580694913864136, + "eval_runtime": 133.2179, + "eval_samples_per_second": 1.306, + "eval_steps_per_second": 0.165, "step": 380 }, { "epoch": 1.48, "learning_rate": 0.00018053639846743295, - "loss": 1.7958, + "loss": 1.3804, "step": 385 }, { "epoch": 1.48, - "eval_accuracy": 0.3620689655172414, - "eval_loss": 1.5655514001846313, - "eval_runtime": 89.7184, - "eval_samples_per_second": 1.939, - "eval_steps_per_second": 0.245, + "eval_accuracy": 0.43103448275862066, + "eval_loss": 1.3844115734100342, + "eval_runtime": 131.9177, + "eval_samples_per_second": 1.319, + "eval_steps_per_second": 0.167, "step": 385 }, { "epoch": 1.49, "learning_rate": 0.00018028097062579822, - "loss": 1.4743, + "loss": 1.3419, "step": 390 }, { "epoch": 1.49, - "eval_accuracy": 0.4367816091954023, - "eval_loss": 1.3905311822891235, - "eval_runtime": 87.4724, - "eval_samples_per_second": 1.989, - "eval_steps_per_second": 0.252, + "eval_accuracy": 0.5114942528735632, + "eval_loss": 1.1873112916946411, + "eval_runtime": 131.4991, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 390 }, { "epoch": 1.51, "learning_rate": 0.00018002554278416348, - "loss": 1.3111, + "loss": 0.9519, "step": 395 }, { "epoch": 1.51, - "eval_accuracy": 0.5287356321839081, - "eval_loss": 1.3618022203445435, - "eval_runtime": 85.8256, - "eval_samples_per_second": 2.027, - "eval_steps_per_second": 0.256, + "eval_accuracy": 0.5402298850574713, + "eval_loss": 1.1380841732025146, + "eval_runtime": 131.7687, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 395 }, { "epoch": 1.53, "learning_rate": 0.00017977011494252874, - "loss": 1.1186, + "loss": 0.9128, "step": 400 }, { "epoch": 1.53, - "eval_accuracy": 0.5, - "eval_loss": 1.4678940773010254, - "eval_runtime": 87.4839, - "eval_samples_per_second": 1.989, - "eval_steps_per_second": 0.251, + "eval_accuracy": 0.47126436781609193, + "eval_loss": 1.3454612493515015, + "eval_runtime": 132.2394, + "eval_samples_per_second": 1.316, + "eval_steps_per_second": 0.166, "step": 400 }, { "epoch": 1.55, "learning_rate": 0.000179514687100894, - "loss": 1.3566, + "loss": 1.4765, "step": 405 }, { "epoch": 1.55, - "eval_accuracy": 0.47701149425287354, - "eval_loss": 1.6265980005264282, - "eval_runtime": 85.8882, - "eval_samples_per_second": 2.026, - "eval_steps_per_second": 0.256, + "eval_accuracy": 0.4885057471264368, + "eval_loss": 1.3226633071899414, + "eval_runtime": 131.5246, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 405 }, { "epoch": 1.57, "learning_rate": 0.00017925925925925927, - "loss": 1.4949, + "loss": 1.3164, "step": 410 }, { "epoch": 1.57, - "eval_accuracy": 0.5057471264367817, - "eval_loss": 1.4489529132843018, - "eval_runtime": 84.8544, - "eval_samples_per_second": 2.051, - "eval_steps_per_second": 0.259, + "eval_accuracy": 0.5344827586206896, + "eval_loss": 1.489040732383728, + "eval_runtime": 131.6528, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 410 }, { "epoch": 1.59, "learning_rate": 0.00017900383141762453, - "loss": 1.2182, + "loss": 1.1459, "step": 415 }, { "epoch": 1.59, - "eval_accuracy": 0.5459770114942529, - "eval_loss": 1.152848720550537, - "eval_runtime": 86.7262, - "eval_samples_per_second": 2.006, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.603448275862069, + "eval_loss": 0.9928140044212341, + "eval_runtime": 132.4729, + "eval_samples_per_second": 1.313, + "eval_steps_per_second": 0.166, "step": 415 }, { "epoch": 1.61, "learning_rate": 0.0001787484035759898, - "loss": 1.1455, + "loss": 0.8739, "step": 420 }, { "epoch": 1.61, - "eval_accuracy": 0.4540229885057471, - "eval_loss": 1.2889221906661987, - "eval_runtime": 89.054, - "eval_samples_per_second": 1.954, - "eval_steps_per_second": 0.247, + "eval_accuracy": 0.5459770114942529, + "eval_loss": 1.1961568593978882, + "eval_runtime": 131.6996, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 420 }, { "epoch": 1.63, "learning_rate": 0.00017849297573435506, - "loss": 1.0795, + "loss": 1.0313, "step": 425 }, { "epoch": 1.63, - "eval_accuracy": 0.4827586206896552, - "eval_loss": 1.4589430093765259, - "eval_runtime": 85.7656, - "eval_samples_per_second": 2.029, - "eval_steps_per_second": 0.257, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.2308257818222046, + "eval_runtime": 131.6254, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 425 }, { "epoch": 1.65, "learning_rate": 0.00017823754789272032, - "loss": 1.2771, + "loss": 1.1317, "step": 430 }, { "epoch": 1.65, - "eval_accuracy": 0.5114942528735632, - "eval_loss": 1.5259735584259033, - "eval_runtime": 85.8919, - "eval_samples_per_second": 2.026, - "eval_steps_per_second": 0.256, + "eval_accuracy": 0.5459770114942529, + "eval_loss": 1.2264448404312134, + "eval_runtime": 131.6761, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 430 }, { "epoch": 1.67, "learning_rate": 0.0001779821200510856, - "loss": 1.2858, + "loss": 1.022, "step": 435 }, { "epoch": 1.67, - "eval_accuracy": 0.4885057471264368, - "eval_loss": 1.4051034450531006, - "eval_runtime": 86.1444, - "eval_samples_per_second": 2.02, - "eval_steps_per_second": 0.255, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.1023842096328735, + "eval_runtime": 131.5135, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 435 }, { "epoch": 1.69, "learning_rate": 0.00017772669220945085, - "loss": 1.0829, + "loss": 0.9706, "step": 440 }, { "epoch": 1.69, - "eval_accuracy": 0.5344827586206896, - "eval_loss": 1.3036625385284424, - "eval_runtime": 85.2368, - "eval_samples_per_second": 2.041, - "eval_steps_per_second": 0.258, + "eval_accuracy": 0.5517241379310345, + "eval_loss": 1.372861623764038, + "eval_runtime": 131.6429, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 440 }, { "epoch": 1.7, "learning_rate": 0.00017747126436781609, - "loss": 1.5866, + "loss": 1.7438, "step": 445 }, { "epoch": 1.7, - "eval_accuracy": 0.5229885057471264, - "eval_loss": 1.328389286994934, - "eval_runtime": 85.533, - "eval_samples_per_second": 2.034, - "eval_steps_per_second": 0.257, + "eval_accuracy": 0.5, + "eval_loss": 1.346054196357727, + "eval_runtime": 131.6993, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 445 }, { "epoch": 1.72, "learning_rate": 0.00017721583652618138, - "loss": 1.2964, + "loss": 1.6391, "step": 450 }, { "epoch": 1.72, - "eval_accuracy": 0.5114942528735632, - "eval_loss": 1.2732243537902832, - "eval_runtime": 89.1901, - "eval_samples_per_second": 1.951, - "eval_steps_per_second": 0.247, + "eval_accuracy": 0.603448275862069, + "eval_loss": 1.139776587486267, + "eval_runtime": 131.5962, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 450 }, { "epoch": 1.74, "learning_rate": 0.0001769604086845466, - "loss": 1.1894, + "loss": 0.824, "step": 455 }, { "epoch": 1.74, - "eval_accuracy": 0.42528735632183906, - "eval_loss": 1.4987748861312866, - "eval_runtime": 85.0991, - "eval_samples_per_second": 2.045, - "eval_steps_per_second": 0.259, + "eval_accuracy": 0.45977011494252873, + "eval_loss": 1.4202344417572021, + "eval_runtime": 131.5281, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 455 }, { "epoch": 1.76, "learning_rate": 0.0001767049808429119, - "loss": 1.5003, + "loss": 1.6423, "step": 460 }, { "epoch": 1.76, - "eval_accuracy": 0.46551724137931033, - "eval_loss": 1.329574704170227, - "eval_runtime": 86.0734, - "eval_samples_per_second": 2.022, - "eval_steps_per_second": 0.256, + "eval_accuracy": 0.5574712643678161, + "eval_loss": 1.1726796627044678, + "eval_runtime": 131.5031, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 460 }, { "epoch": 1.78, "learning_rate": 0.00017644955300127714, - "loss": 1.1569, + "loss": 1.0754, "step": 465 }, { "epoch": 1.78, - "eval_accuracy": 0.5, - "eval_loss": 1.428106665611267, - "eval_runtime": 83.952, - "eval_samples_per_second": 2.073, - "eval_steps_per_second": 0.262, + "eval_accuracy": 0.5287356321839081, + "eval_loss": 1.1190135478973389, + "eval_runtime": 131.6364, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 465 }, { "epoch": 1.8, "learning_rate": 0.00017619412515964243, - "loss": 1.0751, + "loss": 1.0556, "step": 470 }, { "epoch": 1.8, - "eval_accuracy": 0.47126436781609193, - "eval_loss": 1.5787663459777832, - "eval_runtime": 85.6942, - "eval_samples_per_second": 2.03, - "eval_steps_per_second": 0.257, + "eval_accuracy": 0.45977011494252873, + "eval_loss": 1.3851169347763062, + "eval_runtime": 132.8284, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 470 }, { "epoch": 1.82, "learning_rate": 0.00017593869731800767, - "loss": 1.592, + "loss": 1.0506, "step": 475 }, { "epoch": 1.82, - "eval_accuracy": 0.5, - "eval_loss": 1.2109367847442627, - "eval_runtime": 84.8219, - "eval_samples_per_second": 2.051, - "eval_steps_per_second": 0.259, + "eval_accuracy": 0.4367816091954023, + "eval_loss": 1.5636210441589355, + "eval_runtime": 131.5546, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 475 }, { "epoch": 1.84, "learning_rate": 0.00017568326947637293, - "loss": 1.4279, + "loss": 1.6747, "step": 480 }, { "epoch": 1.84, - "eval_accuracy": 0.5287356321839081, - "eval_loss": 1.207238793373108, - "eval_runtime": 86.384, - "eval_samples_per_second": 2.014, - "eval_steps_per_second": 0.255, + "eval_accuracy": 0.47701149425287354, + "eval_loss": 1.355259895324707, + "eval_runtime": 131.7784, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 480 }, { "epoch": 1.86, "learning_rate": 0.0001754278416347382, - "loss": 1.19, + "loss": 1.3546, "step": 485 }, { "epoch": 1.86, - "eval_accuracy": 0.5402298850574713, - "eval_loss": 1.276541829109192, - "eval_runtime": 85.9845, - "eval_samples_per_second": 2.024, - "eval_steps_per_second": 0.256, + "eval_accuracy": 0.5172413793103449, + "eval_loss": 1.2008614540100098, + "eval_runtime": 131.685, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 485 }, { "epoch": 1.88, "learning_rate": 0.00017517241379310346, - "loss": 1.3807, + "loss": 1.1251, "step": 490 }, { "epoch": 1.88, - "eval_accuracy": 0.5229885057471264, - "eval_loss": 1.3951900005340576, - "eval_runtime": 86.4759, - "eval_samples_per_second": 2.012, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.0508471727371216, + "eval_runtime": 134.9935, + "eval_samples_per_second": 1.289, + "eval_steps_per_second": 0.163, "step": 490 }, { "epoch": 1.9, "learning_rate": 0.00017491698595146872, - "loss": 1.2857, + "loss": 1.1712, "step": 495 }, { "epoch": 1.9, - "eval_accuracy": 0.5057471264367817, - "eval_loss": 1.2476894855499268, - "eval_runtime": 85.1521, - "eval_samples_per_second": 2.043, - "eval_steps_per_second": 0.258, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.0730408430099487, + "eval_runtime": 131.6035, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 495 }, { "epoch": 1.92, "learning_rate": 0.00017466155810983398, - "loss": 1.229, + "loss": 1.1422, "step": 500 }, { "epoch": 1.92, - "eval_accuracy": 0.5229885057471264, - "eval_loss": 1.2090439796447754, - "eval_runtime": 86.6818, - "eval_samples_per_second": 2.007, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.9529359936714172, + "eval_runtime": 131.4783, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 500 }, { "epoch": 1.93, "learning_rate": 0.00017440613026819925, - "loss": 0.9704, + "loss": 0.7611, "step": 505 }, { "epoch": 1.93, - "eval_accuracy": 0.5862068965517241, - "eval_loss": 1.0011608600616455, - "eval_runtime": 87.3092, - "eval_samples_per_second": 1.993, - "eval_steps_per_second": 0.252, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 0.8788040280342102, + "eval_runtime": 131.3992, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 505 }, { "epoch": 1.95, "learning_rate": 0.0001741507024265645, - "loss": 1.2077, + "loss": 0.8955, "step": 510 }, { "epoch": 1.95, - "eval_accuracy": 0.6091954022988506, - "eval_loss": 1.0754402875900269, - "eval_runtime": 85.1447, - "eval_samples_per_second": 2.044, - "eval_steps_per_second": 0.258, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 0.9696133732795715, + "eval_runtime": 131.6903, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 510 }, { "epoch": 1.97, "learning_rate": 0.00017389527458492975, - "loss": 1.31, + "loss": 1.1928, "step": 515 }, { "epoch": 1.97, - "eval_accuracy": 0.5747126436781609, - "eval_loss": 1.1699658632278442, - "eval_runtime": 85.8978, - "eval_samples_per_second": 2.026, - "eval_steps_per_second": 0.256, + "eval_accuracy": 0.5977011494252874, + "eval_loss": 0.977599561214447, + "eval_runtime": 131.6782, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 515 }, { "epoch": 1.99, "learning_rate": 0.00017363984674329504, - "loss": 0.9374, + "loss": 1.0149, "step": 520 }, { "epoch": 1.99, - "eval_accuracy": 0.5402298850574713, - "eval_loss": 1.1662834882736206, - "eval_runtime": 84.761, - "eval_samples_per_second": 2.053, - "eval_steps_per_second": 0.26, + "eval_accuracy": 0.5172413793103449, + "eval_loss": 1.3859111070632935, + "eval_runtime": 134.5921, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.163, "step": 520 }, { "epoch": 2.01, "learning_rate": 0.00017338441890166027, - "loss": 1.3027, + "loss": 1.3012, "step": 525 }, { "epoch": 2.01, - "eval_accuracy": 0.5057471264367817, - "eval_loss": 1.2674177885055542, - "eval_runtime": 87.1265, - "eval_samples_per_second": 1.997, - "eval_steps_per_second": 0.253, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.179227352142334, + "eval_runtime": 131.5454, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 525 }, { "epoch": 2.03, "learning_rate": 0.00017312899106002556, - "loss": 1.2744, + "loss": 1.1046, "step": 530 }, { "epoch": 2.03, - "eval_accuracy": 0.5229885057471264, - "eval_loss": 1.1397805213928223, - "eval_runtime": 87.5192, - "eval_samples_per_second": 1.988, - "eval_steps_per_second": 0.251, + "eval_accuracy": 0.5402298850574713, + "eval_loss": 1.3357328176498413, + "eval_runtime": 135.5522, + "eval_samples_per_second": 1.284, + "eval_steps_per_second": 0.162, "step": 530 }, { "epoch": 2.05, "learning_rate": 0.0001728735632183908, - "loss": 0.6539, + "loss": 0.818, "step": 535 }, { "epoch": 2.05, - "eval_accuracy": 0.5114942528735632, - "eval_loss": 1.3558400869369507, - "eval_runtime": 87.7373, - "eval_samples_per_second": 1.983, - "eval_steps_per_second": 0.251, + "eval_accuracy": 0.5402298850574713, + "eval_loss": 1.2086304426193237, + "eval_runtime": 131.7474, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 535 }, { "epoch": 2.07, "learning_rate": 0.0001726181353767561, - "loss": 1.3282, + "loss": 0.9141, "step": 540 }, { "epoch": 2.07, - "eval_accuracy": 0.4885057471264368, - "eval_loss": 1.671147346496582, - "eval_runtime": 85.9997, - "eval_samples_per_second": 2.023, - "eval_steps_per_second": 0.256, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.3623217344284058, + "eval_runtime": 131.7011, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 540 }, { "epoch": 2.09, "learning_rate": 0.00017236270753512133, - "loss": 1.7389, + "loss": 1.3053, "step": 545 }, { "epoch": 2.09, - "eval_accuracy": 0.4885057471264368, - "eval_loss": 1.4171918630599976, - "eval_runtime": 86.1498, - "eval_samples_per_second": 2.02, - "eval_steps_per_second": 0.255, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.2260451316833496, + "eval_runtime": 131.6132, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 545 }, { "epoch": 2.11, "learning_rate": 0.0001721072796934866, - "loss": 0.8713, + "loss": 0.8546, "step": 550 }, { "epoch": 2.11, - "eval_accuracy": 0.45977011494252873, - "eval_loss": 1.4530651569366455, - "eval_runtime": 84.7588, - "eval_samples_per_second": 2.053, - "eval_steps_per_second": 0.26, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.1196646690368652, + "eval_runtime": 131.5362, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 550 }, { "epoch": 2.13, "learning_rate": 0.00017185185185185185, - "loss": 1.3864, + "loss": 1.0537, "step": 555 }, { "epoch": 2.13, - "eval_accuracy": 0.41954022988505746, - "eval_loss": 1.4029399156570435, - "eval_runtime": 86.0806, - "eval_samples_per_second": 2.021, - "eval_steps_per_second": 0.256, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.2455346584320068, + "eval_runtime": 131.6252, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 555 }, { "epoch": 2.15, "learning_rate": 0.00017159642401021712, - "loss": 1.3272, + "loss": 1.1647, "step": 560 }, { "epoch": 2.15, - "eval_accuracy": 0.4942528735632184, - "eval_loss": 1.2591314315795898, - "eval_runtime": 85.0541, - "eval_samples_per_second": 2.046, - "eval_steps_per_second": 0.259, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.205663800239563, + "eval_runtime": 131.6224, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 560 }, { "epoch": 2.16, "learning_rate": 0.00017134099616858238, - "loss": 1.4224, + "loss": 1.3766, "step": 565 }, { "epoch": 2.16, - "eval_accuracy": 0.47701149425287354, - "eval_loss": 1.4082285165786743, - "eval_runtime": 86.6165, - "eval_samples_per_second": 2.009, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 1.0733550786972046, + "eval_runtime": 131.7488, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 565 }, { "epoch": 2.18, "learning_rate": 0.00017108556832694764, - "loss": 1.2348, + "loss": 1.0225, "step": 570 }, { "epoch": 2.18, - "eval_accuracy": 0.4942528735632184, - "eval_loss": 1.2709393501281738, - "eval_runtime": 86.2781, - "eval_samples_per_second": 2.017, - "eval_steps_per_second": 0.255, + "eval_accuracy": 0.5689655172413793, + "eval_loss": 1.123944878578186, + "eval_runtime": 132.6409, + "eval_samples_per_second": 1.312, + "eval_steps_per_second": 0.166, "step": 570 }, { "epoch": 2.2, "learning_rate": 0.0001708301404853129, - "loss": 1.4141, + "loss": 1.6453, "step": 575 }, { "epoch": 2.2, - "eval_accuracy": 0.5057471264367817, - "eval_loss": 1.338424801826477, - "eval_runtime": 83.8318, - "eval_samples_per_second": 2.076, - "eval_steps_per_second": 0.262, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.2486422061920166, + "eval_runtime": 131.72, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 575 }, { "epoch": 2.22, "learning_rate": 0.00017057471264367817, - "loss": 1.3264, + "loss": 1.0113, "step": 580 }, { "epoch": 2.22, - "eval_accuracy": 0.5287356321839081, - "eval_loss": 1.2666399478912354, - "eval_runtime": 85.5732, - "eval_samples_per_second": 2.033, - "eval_steps_per_second": 0.257, + "eval_accuracy": 0.5459770114942529, + "eval_loss": 1.1964409351348877, + "eval_runtime": 131.7543, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 580 }, { "epoch": 2.24, "learning_rate": 0.00017031928480204343, - "loss": 1.1512, + "loss": 1.0514, "step": 585 }, { "epoch": 2.24, - "eval_accuracy": 0.5172413793103449, - "eval_loss": 1.2081302404403687, - "eval_runtime": 86.2749, - "eval_samples_per_second": 2.017, - "eval_steps_per_second": 0.255, + "eval_accuracy": 0.5287356321839081, + "eval_loss": 1.1057004928588867, + "eval_runtime": 131.7576, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 585 }, { "epoch": 2.26, "learning_rate": 0.0001700638569604087, - "loss": 1.0147, + "loss": 0.9404, "step": 590 }, { "epoch": 2.26, - "eval_accuracy": 0.5114942528735632, - "eval_loss": 1.1952056884765625, - "eval_runtime": 84.237, - "eval_samples_per_second": 2.066, - "eval_steps_per_second": 0.261, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 0.9724916219711304, + "eval_runtime": 132.3, + "eval_samples_per_second": 1.315, + "eval_steps_per_second": 0.166, "step": 590 }, { "epoch": 2.28, "learning_rate": 0.00016980842911877396, - "loss": 1.1854, + "loss": 1.0388, "step": 595 }, { "epoch": 2.28, - "eval_accuracy": 0.5459770114942529, - "eval_loss": 1.1515438556671143, - "eval_runtime": 89.8852, - "eval_samples_per_second": 1.936, - "eval_steps_per_second": 0.245, + "eval_accuracy": 0.5517241379310345, + "eval_loss": 1.1437703371047974, + "eval_runtime": 131.5125, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 595 }, { "epoch": 2.3, "learning_rate": 0.00016955300127713922, - "loss": 1.1736, + "loss": 1.0182, "step": 600 }, { "epoch": 2.3, - "eval_accuracy": 0.5517241379310345, - "eval_loss": 1.132529377937317, - "eval_runtime": 86.1072, - "eval_samples_per_second": 2.021, - "eval_steps_per_second": 0.255, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.0556267499923706, + "eval_runtime": 131.6259, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 600 }, { "epoch": 2.32, "learning_rate": 0.0001692975734355045, - "loss": 0.8421, + "loss": 0.894, "step": 605 }, { "epoch": 2.32, - "eval_accuracy": 0.47126436781609193, - "eval_loss": 1.3057594299316406, - "eval_runtime": 86.3601, - "eval_samples_per_second": 2.015, - "eval_steps_per_second": 0.255, + "eval_accuracy": 0.4885057471264368, + "eval_loss": 1.2667330503463745, + "eval_runtime": 134.6245, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 605 }, { "epoch": 2.34, "learning_rate": 0.00016904214559386975, - "loss": 1.0093, + "loss": 0.8542, "step": 610 }, { "epoch": 2.34, - "eval_accuracy": 0.5344827586206896, - "eval_loss": 1.1371407508850098, - "eval_runtime": 86.415, - "eval_samples_per_second": 2.014, - "eval_steps_per_second": 0.255, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.1101963520050049, + "eval_runtime": 131.7211, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 610 }, { "epoch": 2.36, "learning_rate": 0.000168786717752235, - "loss": 1.096, + "loss": 1.1322, "step": 615 }, { "epoch": 2.36, - "eval_accuracy": 0.5402298850574713, - "eval_loss": 1.1986898183822632, - "eval_runtime": 86.4602, - "eval_samples_per_second": 2.012, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.0164850950241089, + "eval_runtime": 131.7375, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 615 }, { "epoch": 2.38, "learning_rate": 0.00016853128991060025, - "loss": 1.1738, + "loss": 1.0992, "step": 620 }, { "epoch": 2.38, - "eval_accuracy": 0.5229885057471264, - "eval_loss": 1.163020372390747, - "eval_runtime": 85.7622, - "eval_samples_per_second": 2.029, - "eval_steps_per_second": 0.257, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.0806705951690674, + "eval_runtime": 131.6335, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 620 }, { "epoch": 2.39, "learning_rate": 0.00016827586206896554, - "loss": 0.7222, + "loss": 0.7137, "step": 625 }, { "epoch": 2.39, - "eval_accuracy": 0.45977011494252873, - "eval_loss": 1.3792271614074707, - "eval_runtime": 85.7187, - "eval_samples_per_second": 2.03, - "eval_steps_per_second": 0.257, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.1090716123580933, + "eval_runtime": 135.3852, + "eval_samples_per_second": 1.285, + "eval_steps_per_second": 0.162, "step": 625 }, { "epoch": 2.41, "learning_rate": 0.00016802043422733078, - "loss": 1.259, + "loss": 0.8266, "step": 630 }, { "epoch": 2.41, - "eval_accuracy": 0.5057471264367817, - "eval_loss": 1.4273347854614258, - "eval_runtime": 85.4147, - "eval_samples_per_second": 2.037, - "eval_steps_per_second": 0.258, + "eval_accuracy": 0.5517241379310345, + "eval_loss": 1.1446123123168945, + "eval_runtime": 131.8178, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 630 }, { "epoch": 2.43, "learning_rate": 0.00016776500638569607, - "loss": 0.8788, + "loss": 0.7162, "step": 635 }, { "epoch": 2.43, - "eval_accuracy": 0.5114942528735632, - "eval_loss": 1.4665697813034058, - "eval_runtime": 86.7626, - "eval_samples_per_second": 2.005, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.5574712643678161, + "eval_loss": 1.2635241746902466, + "eval_runtime": 131.6312, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 635 }, { "epoch": 2.45, "learning_rate": 0.0001675095785440613, - "loss": 1.527, + "loss": 1.3462, "step": 640 }, { "epoch": 2.45, - "eval_accuracy": 0.47701149425287354, - "eval_loss": 1.4998698234558105, - "eval_runtime": 85.5223, - "eval_samples_per_second": 2.035, - "eval_steps_per_second": 0.257, + "eval_accuracy": 0.5632183908045977, + "eval_loss": 1.2049150466918945, + "eval_runtime": 131.4819, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 640 }, { "epoch": 2.47, "learning_rate": 0.0001672541507024266, - "loss": 1.1176, + "loss": 1.1599, "step": 645 }, { "epoch": 2.47, - "eval_accuracy": 0.45977011494252873, - "eval_loss": 1.571102261543274, - "eval_runtime": 84.3165, - "eval_samples_per_second": 2.064, - "eval_steps_per_second": 0.261, + "eval_accuracy": 0.5459770114942529, + "eval_loss": 1.0467811822891235, + "eval_runtime": 131.4566, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 645 }, { "epoch": 2.49, "learning_rate": 0.00016699872286079183, - "loss": 0.9834, + "loss": 0.9418, "step": 650 }, { "epoch": 2.49, - "eval_accuracy": 0.5, - "eval_loss": 1.5396226644515991, - "eval_runtime": 83.6612, - "eval_samples_per_second": 2.08, - "eval_steps_per_second": 0.263, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.0609161853790283, + "eval_runtime": 131.6905, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 650 }, { "epoch": 2.51, "learning_rate": 0.0001667432950191571, - "loss": 1.046, + "loss": 0.6358, "step": 655 }, { "epoch": 2.51, - "eval_accuracy": 0.5344827586206896, - "eval_loss": 1.2918277978897095, - "eval_runtime": 84.0945, - "eval_samples_per_second": 2.069, - "eval_steps_per_second": 0.262, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.0026150941848755, + "eval_runtime": 131.8875, + "eval_samples_per_second": 1.319, + "eval_steps_per_second": 0.167, "step": 655 }, { "epoch": 2.53, "learning_rate": 0.00016648786717752236, - "loss": 1.2347, + "loss": 0.9866, "step": 660 }, { "epoch": 2.53, - "eval_accuracy": 0.5459770114942529, - "eval_loss": 1.3252575397491455, - "eval_runtime": 86.0389, - "eval_samples_per_second": 2.022, - "eval_steps_per_second": 0.256, + "eval_accuracy": 0.5287356321839081, + "eval_loss": 1.2802002429962158, + "eval_runtime": 131.604, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 660 }, { "epoch": 2.55, "learning_rate": 0.00016623243933588762, - "loss": 0.7441, + "loss": 0.7078, "step": 665 }, { "epoch": 2.55, - "eval_accuracy": 0.5344827586206896, - "eval_loss": 1.4183677434921265, - "eval_runtime": 84.6125, - "eval_samples_per_second": 2.056, - "eval_steps_per_second": 0.26, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.2605894804000854, + "eval_runtime": 131.9489, + "eval_samples_per_second": 1.319, + "eval_steps_per_second": 0.167, "step": 665 }, { "epoch": 2.57, "learning_rate": 0.00016597701149425288, - "loss": 1.3217, + "loss": 1.317, "step": 670 }, { "epoch": 2.57, - "eval_accuracy": 0.5689655172413793, - "eval_loss": 1.2241300344467163, - "eval_runtime": 84.8503, - "eval_samples_per_second": 2.051, - "eval_steps_per_second": 0.259, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.24140465259552, + "eval_runtime": 133.607, + "eval_samples_per_second": 1.302, + "eval_steps_per_second": 0.165, "step": 670 }, { "epoch": 2.59, "learning_rate": 0.00016572158365261815, - "loss": 1.517, + "loss": 1.725, "step": 675 }, { "epoch": 2.59, - "eval_accuracy": 0.4482758620689655, - "eval_loss": 1.3935520648956299, - "eval_runtime": 85.2745, - "eval_samples_per_second": 2.04, - "eval_steps_per_second": 0.258, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 0.9985790252685547, + "eval_runtime": 132.9682, + "eval_samples_per_second": 1.309, + "eval_steps_per_second": 0.165, "step": 675 }, { "epoch": 2.61, "learning_rate": 0.0001654661558109834, - "loss": 1.5603, + "loss": 1.1582, "step": 680 }, { "epoch": 2.61, - "eval_accuracy": 0.5114942528735632, - "eval_loss": 1.2586129903793335, - "eval_runtime": 87.4344, - "eval_samples_per_second": 1.99, - "eval_steps_per_second": 0.252, + "eval_accuracy": 0.5574712643678161, + "eval_loss": 1.0938341617584229, + "eval_runtime": 133.1611, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 680 }, { "epoch": 2.62, "learning_rate": 0.00016521072796934867, - "loss": 1.0121, + "loss": 0.9204, "step": 685 }, { "epoch": 2.62, - "eval_accuracy": 0.5689655172413793, - "eval_loss": 1.1527811288833618, - "eval_runtime": 83.8274, - "eval_samples_per_second": 2.076, - "eval_steps_per_second": 0.262, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.0171951055526733, + "eval_runtime": 133.1482, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 685 }, { "epoch": 2.64, "learning_rate": 0.0001649553001277139, - "loss": 0.9157, + "loss": 0.8029, "step": 690 }, { "epoch": 2.64, - "eval_accuracy": 0.5862068965517241, - "eval_loss": 1.1511900424957275, - "eval_runtime": 85.1952, - "eval_samples_per_second": 2.042, - "eval_steps_per_second": 0.258, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.9562932252883911, + "eval_runtime": 135.7074, + "eval_samples_per_second": 1.282, + "eval_steps_per_second": 0.162, "step": 690 }, { "epoch": 2.66, "learning_rate": 0.0001646998722860792, - "loss": 1.0072, + "loss": 1.0699, "step": 695 }, { "epoch": 2.66, - "eval_accuracy": 0.5574712643678161, - "eval_loss": 1.2195591926574707, - "eval_runtime": 84.6368, - "eval_samples_per_second": 2.056, - "eval_steps_per_second": 0.26, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 0.8641893863677979, + "eval_runtime": 137.258, + "eval_samples_per_second": 1.268, + "eval_steps_per_second": 0.16, "step": 695 }, { "epoch": 2.68, "learning_rate": 0.00016444444444444444, - "loss": 1.1345, + "loss": 0.8689, "step": 700 }, { "epoch": 2.68, - "eval_accuracy": 0.5632183908045977, - "eval_loss": 1.088049054145813, - "eval_runtime": 84.7373, - "eval_samples_per_second": 2.053, - "eval_steps_per_second": 0.26, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.8628016710281372, + "eval_runtime": 136.3069, + "eval_samples_per_second": 1.277, + "eval_steps_per_second": 0.161, "step": 700 }, { "epoch": 2.7, "learning_rate": 0.00016418901660280973, - "loss": 1.1517, + "loss": 0.8868, "step": 705 }, { "epoch": 2.7, - "eval_accuracy": 0.5287356321839081, - "eval_loss": 1.1540721654891968, - "eval_runtime": 86.2918, - "eval_samples_per_second": 2.016, - "eval_steps_per_second": 0.255, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 0.924765944480896, + "eval_runtime": 133.9642, + "eval_samples_per_second": 1.299, + "eval_steps_per_second": 0.164, "step": 705 }, { "epoch": 2.72, "learning_rate": 0.00016393358876117496, - "loss": 1.2588, + "loss": 1.1199, "step": 710 }, { "epoch": 2.72, - "eval_accuracy": 0.6264367816091954, - "eval_loss": 1.1188956499099731, - "eval_runtime": 83.958, - "eval_samples_per_second": 2.072, - "eval_steps_per_second": 0.262, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.87518310546875, + "eval_runtime": 134.5558, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.164, "step": 710 }, { "epoch": 2.74, "learning_rate": 0.00016367816091954025, - "loss": 0.9318, + "loss": 0.8855, "step": 715 }, { "epoch": 2.74, - "eval_accuracy": 0.5574712643678161, - "eval_loss": 1.1550383567810059, - "eval_runtime": 83.2348, - "eval_samples_per_second": 2.09, - "eval_steps_per_second": 0.264, + "eval_accuracy": 0.4942528735632184, + "eval_loss": 1.2723013162612915, + "eval_runtime": 133.0158, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 715 }, { "epoch": 2.76, "learning_rate": 0.0001634227330779055, - "loss": 0.7644, + "loss": 0.8273, "step": 720 }, { "epoch": 2.76, - "eval_accuracy": 0.5747126436781609, - "eval_loss": 1.1556813716888428, - "eval_runtime": 83.996, - "eval_samples_per_second": 2.072, - "eval_steps_per_second": 0.262, + "eval_accuracy": 0.5804597701149425, + "eval_loss": 1.177869200706482, + "eval_runtime": 133.1085, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 720 }, { "epoch": 2.78, "learning_rate": 0.00016316730523627075, - "loss": 1.2642, + "loss": 1.1579, "step": 725 }, { "epoch": 2.78, - "eval_accuracy": 0.6206896551724138, - "eval_loss": 1.0784178972244263, - "eval_runtime": 86.6539, - "eval_samples_per_second": 2.008, - "eval_steps_per_second": 0.254, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 0.9605039358139038, + "eval_runtime": 133.5111, + "eval_samples_per_second": 1.303, + "eval_steps_per_second": 0.165, "step": 725 }, { "epoch": 2.8, "learning_rate": 0.00016291187739463602, - "loss": 1.3219, + "loss": 1.2568, "step": 730 }, { "epoch": 2.8, - "eval_accuracy": 0.5862068965517241, - "eval_loss": 1.0925211906433105, - "eval_runtime": 73.4182, - "eval_samples_per_second": 2.37, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.0269392728805542, + "eval_runtime": 136.0303, + "eval_samples_per_second": 1.279, + "eval_steps_per_second": 0.162, "step": 730 }, { "epoch": 2.82, "learning_rate": 0.00016265644955300128, - "loss": 1.2288, + "loss": 1.4258, "step": 735 }, { "epoch": 2.82, - "eval_accuracy": 0.6091954022988506, - "eval_loss": 1.0299782752990723, - "eval_runtime": 72.0457, - "eval_samples_per_second": 2.415, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.103205919265747, + "eval_runtime": 133.1609, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 735 }, { "epoch": 2.84, "learning_rate": 0.00016240102171136654, - "loss": 0.8471, + "loss": 0.9129, "step": 740 }, { "epoch": 2.84, - "eval_accuracy": 0.6091954022988506, - "eval_loss": 1.0471221208572388, - "eval_runtime": 70.8776, - "eval_samples_per_second": 2.455, - "eval_steps_per_second": 0.31, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.9196251034736633, + "eval_runtime": 132.8322, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 740 }, { "epoch": 2.85, "learning_rate": 0.0001621455938697318, - "loss": 0.9432, + "loss": 1.1291, "step": 745 }, { "epoch": 2.85, - "eval_accuracy": 0.5689655172413793, - "eval_loss": 1.133671760559082, - "eval_runtime": 73.7879, - "eval_samples_per_second": 2.358, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.5804597701149425, + "eval_loss": 1.008170485496521, + "eval_runtime": 132.7388, + "eval_samples_per_second": 1.311, + "eval_steps_per_second": 0.166, "step": 745 }, { "epoch": 2.87, "learning_rate": 0.00016189016602809707, - "loss": 0.8942, + "loss": 0.679, "step": 750 }, { "epoch": 2.87, - "eval_accuracy": 0.5977011494252874, - "eval_loss": 1.034655213356018, - "eval_runtime": 73.4056, - "eval_samples_per_second": 2.37, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 0.9392971992492676, + "eval_runtime": 133.0347, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 750 }, { "epoch": 2.89, "learning_rate": 0.00016163473818646233, - "loss": 0.8582, + "loss": 0.8524, "step": 755 }, { "epoch": 2.89, - "eval_accuracy": 0.6551724137931034, - "eval_loss": 0.9947394132614136, - "eval_runtime": 73.3937, - "eval_samples_per_second": 2.371, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.632183908045977, + "eval_loss": 0.9525014162063599, + "eval_runtime": 133.0788, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 755 }, { "epoch": 2.91, "learning_rate": 0.0001613793103448276, - "loss": 0.7802, + "loss": 0.8104, "step": 760 }, { "epoch": 2.91, - "eval_accuracy": 0.5862068965517241, - "eval_loss": 1.0483365058898926, - "eval_runtime": 73.5783, - "eval_samples_per_second": 2.365, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.5977011494252874, + "eval_loss": 1.1698288917541504, + "eval_runtime": 131.9602, + "eval_samples_per_second": 1.319, + "eval_steps_per_second": 0.167, "step": 760 }, { "epoch": 2.93, "learning_rate": 0.00016112388250319286, - "loss": 0.9362, + "loss": 0.8986, "step": 765 }, { "epoch": 2.93, - "eval_accuracy": 0.603448275862069, - "eval_loss": 1.1573561429977417, - "eval_runtime": 74.7479, - "eval_samples_per_second": 2.328, - "eval_steps_per_second": 0.294, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 1.038144826889038, + "eval_runtime": 131.7177, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 765 }, { "epoch": 2.95, "learning_rate": 0.00016086845466155812, - "loss": 0.7052, + "loss": 0.7805, "step": 770 }, { "epoch": 2.95, - "eval_accuracy": 0.6666666666666666, - "eval_loss": 0.9469316005706787, - "eval_runtime": 71.6241, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 0.8878708481788635, + "eval_runtime": 131.7896, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 770 }, { "epoch": 2.97, "learning_rate": 0.0001606130268199234, - "loss": 0.9615, + "loss": 0.9715, "step": 775 }, { "epoch": 2.97, - "eval_accuracy": 0.5689655172413793, - "eval_loss": 1.171260952949524, - "eval_runtime": 74.1608, - "eval_samples_per_second": 2.346, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 0.8562762141227722, + "eval_runtime": 132.4485, + "eval_samples_per_second": 1.314, + "eval_steps_per_second": 0.166, "step": 775 }, { "epoch": 2.99, "learning_rate": 0.00016035759897828865, - "loss": 1.3158, + "loss": 0.9432, "step": 780 }, { "epoch": 2.99, - "eval_accuracy": 0.603448275862069, - "eval_loss": 1.0248513221740723, - "eval_runtime": 71.7492, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.8954753875732422, + "eval_runtime": 132.4602, + "eval_samples_per_second": 1.314, + "eval_steps_per_second": 0.166, "step": 780 }, { "epoch": 3.01, "learning_rate": 0.0001601021711366539, - "loss": 1.4599, + "loss": 1.0624, "step": 785 }, { "epoch": 3.01, - "eval_accuracy": 0.5574712643678161, - "eval_loss": 1.2699826955795288, - "eval_runtime": 72.4519, - "eval_samples_per_second": 2.402, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.632183908045977, + "eval_loss": 0.9727337956428528, + "eval_runtime": 134.6432, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 785 }, { "epoch": 3.03, "learning_rate": 0.00015984674329501918, - "loss": 1.3217, + "loss": 0.9685, "step": 790 }, { "epoch": 3.03, - "eval_accuracy": 0.5632183908045977, - "eval_loss": 1.1198772192001343, - "eval_runtime": 71.6, - "eval_samples_per_second": 2.43, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.0076591968536377, + "eval_runtime": 131.6852, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 790 }, { "epoch": 3.05, "learning_rate": 0.0001595913154533844, - "loss": 0.8367, + "loss": 0.7053, "step": 795 }, { "epoch": 3.05, - "eval_accuracy": 0.5689655172413793, - "eval_loss": 1.045596957206726, - "eval_runtime": 72.1733, - "eval_samples_per_second": 2.411, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.068995475769043, + "eval_runtime": 134.9079, + "eval_samples_per_second": 1.29, + "eval_steps_per_second": 0.163, "step": 795 }, { "epoch": 3.07, "learning_rate": 0.0001593358876117497, - "loss": 0.7845, + "loss": 0.7795, "step": 800 }, { "epoch": 3.07, - "eval_accuracy": 0.5632183908045977, - "eval_loss": 1.0649093389511108, - "eval_runtime": 73.3982, - "eval_samples_per_second": 2.371, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 0.9434211254119873, + "eval_runtime": 131.5148, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 800 }, { "epoch": 3.08, "learning_rate": 0.00015908045977011494, - "loss": 0.8503, + "loss": 0.7404, "step": 805 }, { "epoch": 3.08, - "eval_accuracy": 0.6206896551724138, - "eval_loss": 0.9966627359390259, - "eval_runtime": 72.2401, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 0.9875686764717102, + "eval_runtime": 131.6597, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 805 }, { "epoch": 3.1, "learning_rate": 0.00015882503192848023, - "loss": 0.6945, + "loss": 0.6817, "step": 810 }, { "epoch": 3.1, - "eval_accuracy": 0.6436781609195402, - "eval_loss": 0.9507883191108704, - "eval_runtime": 71.8074, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.9223694801330566, + "eval_runtime": 131.6839, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 810 }, { "epoch": 3.12, "learning_rate": 0.00015856960408684547, - "loss": 0.916, + "loss": 0.4526, "step": 815 }, { "epoch": 3.12, "eval_accuracy": 0.6436781609195402, - "eval_loss": 1.0732645988464355, - "eval_runtime": 72.2273, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_loss": 1.0036951303482056, + "eval_runtime": 131.8156, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 815 }, { "epoch": 3.14, "learning_rate": 0.00015831417624521076, - "loss": 0.9392, + "loss": 0.8705, "step": 820 }, { "epoch": 3.14, - "eval_accuracy": 0.5862068965517241, - "eval_loss": 1.0749492645263672, - "eval_runtime": 71.7733, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.1535873413085938, + "eval_runtime": 131.6451, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 820 }, { "epoch": 3.16, "learning_rate": 0.000158058748403576, - "loss": 1.0007, + "loss": 0.9392, "step": 825 }, { "epoch": 3.16, - "eval_accuracy": 0.5862068965517241, - "eval_loss": 1.1707144975662231, - "eval_runtime": 72.8924, - "eval_samples_per_second": 2.387, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.0609076023101807, + "eval_runtime": 131.5131, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 825 }, { "epoch": 3.18, "learning_rate": 0.00015780332056194128, - "loss": 1.2342, + "loss": 1.0781, "step": 830 }, { "epoch": 3.18, - "eval_accuracy": 0.5862068965517241, - "eval_loss": 1.2817833423614502, - "eval_runtime": 74.9744, - "eval_samples_per_second": 2.321, - "eval_steps_per_second": 0.293, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 0.9658277034759521, + "eval_runtime": 131.5389, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 830 }, { "epoch": 3.2, "learning_rate": 0.00015754789272030652, - "loss": 0.9419, + "loss": 0.7314, "step": 835 }, { "epoch": 3.2, - "eval_accuracy": 0.6609195402298851, - "eval_loss": 0.9361924529075623, - "eval_runtime": 75.7884, - "eval_samples_per_second": 2.296, - "eval_steps_per_second": 0.29, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 1.0462124347686768, + "eval_runtime": 131.6181, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 835 }, { "epoch": 3.22, "learning_rate": 0.00015729246487867178, - "loss": 0.7473, + "loss": 0.8784, "step": 840 }, { "epoch": 3.22, - "eval_accuracy": 0.5517241379310345, - "eval_loss": 1.2352019548416138, - "eval_runtime": 73.6409, - "eval_samples_per_second": 2.363, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.9317790269851685, + "eval_runtime": 131.6835, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 840 }, { "epoch": 3.24, "learning_rate": 0.00015703703703703705, - "loss": 1.3524, + "loss": 1.361, "step": 845 }, { "epoch": 3.24, - "eval_accuracy": 0.5574712643678161, - "eval_loss": 1.2916122674942017, - "eval_runtime": 73.9743, - "eval_samples_per_second": 2.352, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.1364516019821167, + "eval_runtime": 131.5188, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 845 }, { "epoch": 3.26, "learning_rate": 0.0001567816091954023, - "loss": 0.7914, + "loss": 0.6983, "step": 850 }, { "epoch": 3.26, - "eval_accuracy": 0.603448275862069, - "eval_loss": 1.0420141220092773, - "eval_runtime": 73.374, - "eval_samples_per_second": 2.371, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.5689655172413793, + "eval_loss": 1.2935972213745117, + "eval_runtime": 131.8128, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 850 }, { "epoch": 3.28, "learning_rate": 0.00015652618135376757, - "loss": 0.9782, + "loss": 1.051, "step": 855 }, { "epoch": 3.28, - "eval_accuracy": 0.6206896551724138, - "eval_loss": 1.0024324655532837, - "eval_runtime": 73.5739, - "eval_samples_per_second": 2.365, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.5459770114942529, + "eval_loss": 1.2600092887878418, + "eval_runtime": 131.621, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 855 }, { "epoch": 3.3, "learning_rate": 0.00015627075351213284, - "loss": 0.6729, + "loss": 0.6664, "step": 860 }, { "epoch": 3.3, - "eval_accuracy": 0.632183908045977, - "eval_loss": 1.0617214441299438, - "eval_runtime": 73.2116, - "eval_samples_per_second": 2.377, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.603448275862069, + "eval_loss": 1.1138763427734375, + "eval_runtime": 131.7652, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 860 }, { "epoch": 3.31, "learning_rate": 0.0001560153256704981, - "loss": 1.1071, + "loss": 1.077, "step": 865 }, { "epoch": 3.31, - "eval_accuracy": 0.5, - "eval_loss": 1.5525238513946533, - "eval_runtime": 74.0895, - "eval_samples_per_second": 2.349, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.6091954022988506, + "eval_loss": 1.1129592657089233, + "eval_runtime": 131.519, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 865 }, { "epoch": 3.33, "learning_rate": 0.00015575989782886336, - "loss": 1.6989, + "loss": 1.2009, "step": 870 }, { "epoch": 3.33, - "eval_accuracy": 0.6206896551724138, - "eval_loss": 1.0040068626403809, - "eval_runtime": 75.3064, - "eval_samples_per_second": 2.311, - "eval_steps_per_second": 0.292, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 0.9561058282852173, + "eval_runtime": 131.4719, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 870 }, { "epoch": 3.35, "learning_rate": 0.0001555044699872286, - "loss": 0.7271, + "loss": 0.8938, "step": 875 }, { "epoch": 3.35, - "eval_accuracy": 0.5632183908045977, - "eval_loss": 1.2051146030426025, - "eval_runtime": 73.7475, - "eval_samples_per_second": 2.359, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 0.999401867389679, + "eval_runtime": 134.2227, + "eval_samples_per_second": 1.296, + "eval_steps_per_second": 0.164, "step": 875 }, { "epoch": 3.37, "learning_rate": 0.0001552490421455939, - "loss": 0.8168, + "loss": 0.6466, "step": 880 }, { "epoch": 3.37, - "eval_accuracy": 0.6091954022988506, - "eval_loss": 1.0161758661270142, - "eval_runtime": 74.8589, - "eval_samples_per_second": 2.324, - "eval_steps_per_second": 0.294, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 0.9206087589263916, + "eval_runtime": 131.6728, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 880 }, { "epoch": 3.39, "learning_rate": 0.00015499361430395913, - "loss": 1.0251, + "loss": 1.0424, "step": 885 }, { "epoch": 3.39, - "eval_accuracy": 0.5402298850574713, - "eval_loss": 1.2411690950393677, - "eval_runtime": 74.1808, - "eval_samples_per_second": 2.346, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 0.9985089302062988, + "eval_runtime": 135.0805, + "eval_samples_per_second": 1.288, + "eval_steps_per_second": 0.163, "step": 885 }, { "epoch": 3.41, "learning_rate": 0.00015473818646232442, - "loss": 1.262, + "loss": 0.9582, "step": 890 }, { "epoch": 3.41, - "eval_accuracy": 0.632183908045977, - "eval_loss": 1.0844931602478027, - "eval_runtime": 74.7088, - "eval_samples_per_second": 2.329, - "eval_steps_per_second": 0.294, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.9164769649505615, + "eval_runtime": 134.7066, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 890 }, { "epoch": 3.43, "learning_rate": 0.00015448275862068965, - "loss": 0.752, + "loss": 0.5003, "step": 895 }, { "epoch": 3.43, - "eval_accuracy": 0.632183908045977, - "eval_loss": 1.1151267290115356, - "eval_runtime": 73.9476, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.906107485294342, + "eval_runtime": 131.7569, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 895 }, { "epoch": 3.45, "learning_rate": 0.00015422733077905494, - "loss": 0.781, + "loss": 0.5526, "step": 900 }, { "epoch": 3.45, - "eval_accuracy": 0.6436781609195402, - "eval_loss": 1.1569331884384155, - "eval_runtime": 73.2377, - "eval_samples_per_second": 2.376, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.9473897218704224, + "eval_runtime": 131.708, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 900 }, { "epoch": 3.47, "learning_rate": 0.00015397190293742018, - "loss": 0.689, + "loss": 0.5755, "step": 905 }, { "epoch": 3.47, - "eval_accuracy": 0.5804597701149425, - "eval_loss": 1.1399520635604858, - "eval_runtime": 74.5155, - "eval_samples_per_second": 2.335, - "eval_steps_per_second": 0.295, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.8715148568153381, + "eval_runtime": 131.756, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 905 }, { "epoch": 3.49, "learning_rate": 0.00015371647509578544, - "loss": 1.1359, + "loss": 1.1355, "step": 910 }, { "epoch": 3.49, - "eval_accuracy": 0.5747126436781609, - "eval_loss": 1.2362200021743774, - "eval_runtime": 73.5703, - "eval_samples_per_second": 2.365, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 0.8548762798309326, + "eval_runtime": 131.7286, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 910 }, { "epoch": 3.51, "learning_rate": 0.0001534610472541507, - "loss": 1.0256, + "loss": 1.3261, "step": 915 }, { "epoch": 3.51, - "eval_accuracy": 0.5, - "eval_loss": 1.3777178525924683, - "eval_runtime": 73.8937, - "eval_samples_per_second": 2.355, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.5229885057471264, + "eval_loss": 1.3770815134048462, + "eval_runtime": 131.6422, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 915 }, { "epoch": 3.52, "learning_rate": 0.00015320561941251597, - "loss": 1.0887, + "loss": 1.3727, "step": 920 }, { "epoch": 3.52, "eval_accuracy": 0.6206896551724138, - "eval_loss": 1.1072226762771606, - "eval_runtime": 73.4404, - "eval_samples_per_second": 2.369, - "eval_steps_per_second": 0.3, + "eval_loss": 0.981780469417572, + "eval_runtime": 131.4313, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 920 }, { "epoch": 3.54, "learning_rate": 0.00015295019157088123, - "loss": 0.6106, + "loss": 0.6203, "step": 925 }, { "epoch": 3.54, - "eval_accuracy": 0.5459770114942529, - "eval_loss": 1.234115481376648, - "eval_runtime": 74.0108, - "eval_samples_per_second": 2.351, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.9016448855400085, + "eval_runtime": 131.6346, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 925 }, { "epoch": 3.56, "learning_rate": 0.0001526947637292465, - "loss": 1.3364, + "loss": 1.0178, "step": 930 }, { "epoch": 3.56, - "eval_accuracy": 0.5747126436781609, - "eval_loss": 1.1364753246307373, - "eval_runtime": 73.2306, - "eval_samples_per_second": 2.376, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 0.9804055094718933, + "eval_runtime": 131.7044, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 930 }, { "epoch": 3.58, "learning_rate": 0.00015243933588761176, - "loss": 0.7705, + "loss": 0.5602, "step": 935 }, { "epoch": 3.58, - "eval_accuracy": 0.5919540229885057, - "eval_loss": 1.230558156967163, - "eval_runtime": 74.0579, - "eval_samples_per_second": 2.35, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.017970085144043, + "eval_runtime": 131.7459, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 935 }, { "epoch": 3.6, "learning_rate": 0.00015218390804597702, - "loss": 0.8368, + "loss": 0.9365, "step": 940 }, { "epoch": 3.6, - "eval_accuracy": 0.632183908045977, - "eval_loss": 1.0860326290130615, - "eval_runtime": 72.9069, - "eval_samples_per_second": 2.387, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 0.9289452433586121, + "eval_runtime": 135.6119, + "eval_samples_per_second": 1.283, + "eval_steps_per_second": 0.162, "step": 940 }, { "epoch": 3.62, "learning_rate": 0.00015192848020434226, - "loss": 0.8946, + "loss": 0.8331, "step": 945 }, { "epoch": 3.62, - "eval_accuracy": 0.5804597701149425, - "eval_loss": 1.2349048852920532, - "eval_runtime": 76.0014, - "eval_samples_per_second": 2.289, - "eval_steps_per_second": 0.289, + "eval_accuracy": 0.6091954022988506, + "eval_loss": 1.1076228618621826, + "eval_runtime": 135.5696, + "eval_samples_per_second": 1.283, + "eval_steps_per_second": 0.162, "step": 945 }, { "epoch": 3.64, "learning_rate": 0.00015167305236270755, - "loss": 1.226, + "loss": 0.8512, "step": 950 }, { "epoch": 3.64, - "eval_accuracy": 0.5919540229885057, - "eval_loss": 1.2052266597747803, - "eval_runtime": 74.5817, - "eval_samples_per_second": 2.333, - "eval_steps_per_second": 0.295, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.8155642747879028, + "eval_runtime": 131.7614, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 950 }, { "epoch": 3.66, "learning_rate": 0.00015141762452107279, - "loss": 1.1585, + "loss": 0.8797, "step": 955 }, { "epoch": 3.66, - "eval_accuracy": 0.6436781609195402, - "eval_loss": 0.9784772396087646, - "eval_runtime": 72.3958, - "eval_samples_per_second": 2.403, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.0940409898757935, + "eval_runtime": 132.2455, + "eval_samples_per_second": 1.316, + "eval_steps_per_second": 0.166, "step": 955 }, { "epoch": 3.68, "learning_rate": 0.00015116219667943808, - "loss": 0.7824, + "loss": 0.914, "step": 960 }, { "epoch": 3.68, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 0.8754329681396484, - "eval_runtime": 71.8422, - "eval_samples_per_second": 2.422, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.8316068053245544, + "eval_runtime": 131.4826, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 960 }, { "epoch": 3.7, "learning_rate": 0.0001509067688378033, - "loss": 0.6437, + "loss": 0.6969, "step": 965 }, { "epoch": 3.7, - "eval_accuracy": 0.6264367816091954, - "eval_loss": 0.9256632328033447, - "eval_runtime": 72.4538, - "eval_samples_per_second": 2.402, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 0.8965338468551636, + "eval_runtime": 131.5912, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 965 }, { "epoch": 3.72, "learning_rate": 0.0001506513409961686, - "loss": 0.8286, + "loss": 0.7973, "step": 970 }, { "epoch": 3.72, - "eval_accuracy": 0.6724137931034483, - "eval_loss": 0.9190903306007385, - "eval_runtime": 71.6686, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 0.9823299646377563, + "eval_runtime": 131.6359, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 970 }, { "epoch": 3.74, "learning_rate": 0.00015039591315453384, - "loss": 0.8671, + "loss": 0.8665, "step": 975 }, { "epoch": 3.74, - "eval_accuracy": 0.6609195402298851, - "eval_loss": 0.8495645523071289, - "eval_runtime": 73.9876, - "eval_samples_per_second": 2.352, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.632183908045977, + "eval_loss": 0.9927699565887451, + "eval_runtime": 131.5757, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 975 }, { "epoch": 3.75, "learning_rate": 0.0001501404853128991, - "loss": 0.5814, + "loss": 0.8057, "step": 980 }, { "epoch": 3.75, - "eval_accuracy": 0.6494252873563219, - "eval_loss": 0.9788767695426941, - "eval_runtime": 71.5848, - "eval_samples_per_second": 2.431, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 0.9669252634048462, + "eval_runtime": 131.6483, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 980 }, { "epoch": 3.77, "learning_rate": 0.00014988505747126437, - "loss": 0.7895, + "loss": 0.8764, "step": 985 }, { "epoch": 3.77, - "eval_accuracy": 0.6379310344827587, - "eval_loss": 1.032570719718933, - "eval_runtime": 72.0801, - "eval_samples_per_second": 2.414, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.1127873659133911, + "eval_runtime": 131.6238, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 985 }, { "epoch": 3.79, "learning_rate": 0.00014962962962962963, - "loss": 0.8104, + "loss": 0.9281, "step": 990 }, { "epoch": 3.79, - "eval_accuracy": 0.632183908045977, - "eval_loss": 0.9865307211875916, - "eval_runtime": 71.6806, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.9737154245376587, + "eval_runtime": 131.6838, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 990 }, { "epoch": 3.81, "learning_rate": 0.0001493742017879949, - "loss": 0.8277, + "loss": 0.7301, "step": 995 }, { "epoch": 3.81, - "eval_accuracy": 0.6091954022988506, - "eval_loss": 1.0854288339614868, - "eval_runtime": 72.3687, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.9511990547180176, + "eval_runtime": 131.6945, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 995 }, { "epoch": 3.83, "learning_rate": 0.00014911877394636016, - "loss": 0.5801, + "loss": 0.5308, "step": 1000 }, { "epoch": 3.83, - "eval_accuracy": 0.6264367816091954, - "eval_loss": 1.0070602893829346, - "eval_runtime": 72.8007, - "eval_samples_per_second": 2.39, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 1.1504030227661133, + "eval_runtime": 131.6388, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1000 }, { "epoch": 3.85, "learning_rate": 0.00014886334610472542, - "loss": 0.6394, + "loss": 0.6637, "step": 1005 }, { "epoch": 3.85, - "eval_accuracy": 0.6264367816091954, - "eval_loss": 0.9902334809303284, - "eval_runtime": 72.164, - "eval_samples_per_second": 2.411, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 1.2047678232192993, + "eval_runtime": 131.9226, + "eval_samples_per_second": 1.319, + "eval_steps_per_second": 0.167, "step": 1005 }, { "epoch": 3.87, "learning_rate": 0.00014860791826309068, - "loss": 0.7135, + "loss": 0.6633, "step": 1010 }, { "epoch": 3.87, - "eval_accuracy": 0.5919540229885057, - "eval_loss": 1.1338768005371094, - "eval_runtime": 73.5721, - "eval_samples_per_second": 2.365, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.1038755178451538, + "eval_runtime": 131.5702, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1010 }, { "epoch": 3.89, "learning_rate": 0.00014835249042145595, - "loss": 1.3432, + "loss": 1.0671, "step": 1015 }, { "epoch": 3.89, - "eval_accuracy": 0.603448275862069, - "eval_loss": 1.016777515411377, - "eval_runtime": 72.2656, - "eval_samples_per_second": 2.408, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.5862068965517241, + "eval_loss": 1.1816108226776123, + "eval_runtime": 131.3997, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 1015 }, { "epoch": 3.91, "learning_rate": 0.0001480970625798212, - "loss": 0.8375, + "loss": 0.7547, "step": 1020 }, { "epoch": 3.91, - "eval_accuracy": 0.6436781609195402, - "eval_loss": 0.8976129293441772, - "eval_runtime": 71.6885, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 0.9733718633651733, + "eval_runtime": 131.6592, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1020 }, { "epoch": 3.93, "learning_rate": 0.00014784163473818647, - "loss": 1.2419, + "loss": 1.3147, "step": 1025 }, { "epoch": 3.93, - "eval_accuracy": 0.6149425287356322, - "eval_loss": 1.0345048904418945, - "eval_runtime": 72.2899, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 0.8259694576263428, + "eval_runtime": 132.7872, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 1025 }, { "epoch": 3.95, "learning_rate": 0.00014758620689655174, - "loss": 1.2465, + "loss": 1.0075, "step": 1030 }, { "epoch": 3.95, - "eval_accuracy": 0.6666666666666666, - "eval_loss": 0.9065479636192322, - "eval_runtime": 71.7202, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.8831397891044617, + "eval_runtime": 131.4769, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1030 }, { "epoch": 3.97, "learning_rate": 0.000147330779054917, - "loss": 0.8987, + "loss": 0.5925, "step": 1035 }, { "epoch": 3.97, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 0.8305312395095825, - "eval_runtime": 74.2714, - "eval_samples_per_second": 2.343, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 1.0289727449417114, + "eval_runtime": 131.759, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1035 }, { "epoch": 3.98, "learning_rate": 0.00014707535121328226, - "loss": 0.6279, + "loss": 0.8289, "step": 1040 }, { "epoch": 3.98, - "eval_accuracy": 0.6666666666666666, - "eval_loss": 0.8689811825752258, - "eval_runtime": 73.642, - "eval_samples_per_second": 2.363, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 0.9349720478057861, + "eval_runtime": 133.5695, + "eval_samples_per_second": 1.303, + "eval_steps_per_second": 0.165, "step": 1040 }, { "epoch": 4.0, "learning_rate": 0.00014681992337164753, - "loss": 1.1026, + "loss": 1.0505, "step": 1045 }, { "epoch": 4.0, - "eval_accuracy": 0.6724137931034483, - "eval_loss": 0.8575055599212646, - "eval_runtime": 72.2634, - "eval_samples_per_second": 2.408, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.9338142275810242, + "eval_runtime": 135.7737, + "eval_samples_per_second": 1.282, + "eval_steps_per_second": 0.162, "step": 1045 }, { "epoch": 4.02, "learning_rate": 0.00014656449553001276, - "loss": 0.5617, + "loss": 0.329, "step": 1050 }, { "epoch": 4.02, - "eval_accuracy": 0.6436781609195402, - "eval_loss": 0.8988448977470398, - "eval_runtime": 73.0503, - "eval_samples_per_second": 2.382, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 0.9524909853935242, + "eval_runtime": 134.3827, + "eval_samples_per_second": 1.295, + "eval_steps_per_second": 0.164, "step": 1050 }, { "epoch": 4.04, "learning_rate": 0.00014630906768837805, - "loss": 0.5318, + "loss": 0.3039, "step": 1055 }, { "epoch": 4.04, - "eval_accuracy": 0.6264367816091954, - "eval_loss": 1.0455691814422607, - "eval_runtime": 72.3237, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 1.0120184421539307, + "eval_runtime": 133.1888, + "eval_samples_per_second": 1.306, + "eval_steps_per_second": 0.165, "step": 1055 }, { "epoch": 4.06, "learning_rate": 0.0001460536398467433, - "loss": 0.7041, + "loss": 0.5585, "step": 1060 }, { "epoch": 4.06, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 0.9058274626731873, - "eval_runtime": 71.8667, - "eval_samples_per_second": 2.421, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 1.1452168226242065, + "eval_runtime": 132.8053, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 1060 }, { "epoch": 4.08, "learning_rate": 0.00014579821200510858, - "loss": 0.688, + "loss": 0.6825, "step": 1065 }, { "epoch": 4.08, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 1.0057711601257324, - "eval_runtime": 72.4073, - "eval_samples_per_second": 2.403, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 1.0479168891906738, + "eval_runtime": 137.784, + "eval_samples_per_second": 1.263, + "eval_steps_per_second": 0.16, "step": 1065 }, { "epoch": 4.1, "learning_rate": 0.00014554278416347382, - "loss": 0.8286, + "loss": 0.5304, "step": 1070 }, { "epoch": 4.1, - "eval_accuracy": 0.6379310344827587, - "eval_loss": 1.0452172756195068, - "eval_runtime": 71.8108, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 1.1510097980499268, + "eval_runtime": 133.2682, + "eval_samples_per_second": 1.306, + "eval_steps_per_second": 0.165, "step": 1070 }, { "epoch": 4.12, "learning_rate": 0.0001452873563218391, - "loss": 1.2596, + "loss": 1.0301, "step": 1075 }, { "epoch": 4.12, - "eval_accuracy": 0.5862068965517241, - "eval_loss": 0.9942687749862671, - "eval_runtime": 73.3993, - "eval_samples_per_second": 2.371, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 0.9221396446228027, + "eval_runtime": 134.0831, + "eval_samples_per_second": 1.298, + "eval_steps_per_second": 0.164, "step": 1075 }, { "epoch": 4.14, "learning_rate": 0.00014503192848020434, - "loss": 0.8448, + "loss": 0.3273, "step": 1080 }, { "epoch": 4.14, - "eval_accuracy": 0.6264367816091954, - "eval_loss": 1.044135570526123, - "eval_runtime": 71.6723, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.9179468154907227, + "eval_runtime": 136.8309, + "eval_samples_per_second": 1.272, + "eval_steps_per_second": 0.161, "step": 1080 }, { "epoch": 4.16, "learning_rate": 0.0001447765006385696, - "loss": 0.9666, + "loss": 0.7577, "step": 1085 }, { "epoch": 4.16, - "eval_accuracy": 0.6551724137931034, - "eval_loss": 1.0021615028381348, - "eval_runtime": 72.2989, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6091954022988506, + "eval_loss": 1.1157200336456299, + "eval_runtime": 132.9971, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 1085 }, { "epoch": 4.18, "learning_rate": 0.00014452107279693487, - "loss": 1.0548, + "loss": 1.0743, "step": 1090 }, { "epoch": 4.18, - "eval_accuracy": 0.6724137931034483, - "eval_loss": 0.8905701041221619, - "eval_runtime": 71.7468, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.9640018939971924, + "eval_runtime": 133.1707, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 1090 }, { "epoch": 4.2, "learning_rate": 0.00014426564495530013, - "loss": 0.823, + "loss": 0.7973, "step": 1095 }, { "epoch": 4.2, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 0.835205614566803, - "eval_runtime": 72.2334, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6264367816091954, + "eval_loss": 1.0484741926193237, + "eval_runtime": 133.7107, + "eval_samples_per_second": 1.301, + "eval_steps_per_second": 0.165, "step": 1095 }, { "epoch": 4.21, "learning_rate": 0.0001440102171136654, - "loss": 0.7588, + "loss": 0.5881, "step": 1100 }, { "epoch": 4.21, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 0.8358584046363831, - "eval_runtime": 71.7162, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 0.947981059551239, + "eval_runtime": 136.1859, + "eval_samples_per_second": 1.278, + "eval_steps_per_second": 0.162, "step": 1100 }, { "epoch": 4.23, "learning_rate": 0.00014375478927203066, - "loss": 0.5306, + "loss": 0.5461, "step": 1105 }, { "epoch": 4.23, - "eval_accuracy": 0.6666666666666666, - "eval_loss": 0.8987478613853455, - "eval_runtime": 72.2456, - "eval_samples_per_second": 2.408, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.017594814300537, + "eval_runtime": 133.6971, + "eval_samples_per_second": 1.301, + "eval_steps_per_second": 0.165, "step": 1105 }, { "epoch": 4.25, "learning_rate": 0.00014349936143039592, - "loss": 1.059, + "loss": 0.997, "step": 1110 }, { "epoch": 4.25, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 0.8400871753692627, - "eval_runtime": 71.6522, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.9593090415000916, + "eval_runtime": 133.0295, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 1110 }, { "epoch": 4.27, "learning_rate": 0.00014324393358876119, - "loss": 0.6116, + "loss": 0.7955, "step": 1115 }, { "epoch": 4.27, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 0.9104363322257996, - "eval_runtime": 72.2465, - "eval_samples_per_second": 2.408, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 1.090934157371521, + "eval_runtime": 133.0528, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 1115 }, { "epoch": 4.29, "learning_rate": 0.00014298850574712642, - "loss": 0.7483, + "loss": 0.6282, "step": 1120 }, { "epoch": 4.29, - "eval_accuracy": 0.6494252873563219, - "eval_loss": 1.0067737102508545, - "eval_runtime": 71.6939, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 0.9476028680801392, + "eval_runtime": 136.0334, + "eval_samples_per_second": 1.279, + "eval_steps_per_second": 0.162, "step": 1120 }, { "epoch": 4.31, "learning_rate": 0.0001427330779054917, - "loss": 0.5231, + "loss": 0.4928, "step": 1125 }, { "epoch": 4.31, - "eval_accuracy": 0.6666666666666666, - "eval_loss": 0.9476281404495239, - "eval_runtime": 72.2862, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.8954192399978638, + "eval_runtime": 135.9877, + "eval_samples_per_second": 1.28, + "eval_steps_per_second": 0.162, "step": 1125 }, { "epoch": 4.33, "learning_rate": 0.00014247765006385695, - "loss": 0.5667, + "loss": 0.6038, "step": 1130 }, { "epoch": 4.33, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 0.9047439694404602, - "eval_runtime": 71.7674, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.013425588607788, + "eval_runtime": 135.7265, + "eval_samples_per_second": 1.282, + "eval_steps_per_second": 0.162, "step": 1130 }, { "epoch": 4.35, "learning_rate": 0.00014222222222222224, - "loss": 0.7785, + "loss": 0.5476, "step": 1135 }, { "epoch": 4.35, - "eval_accuracy": 0.6149425287356322, - "eval_loss": 1.0280640125274658, - "eval_runtime": 72.438, - "eval_samples_per_second": 2.402, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.1263020038604736, + "eval_runtime": 131.5323, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1135 }, { "epoch": 4.37, "learning_rate": 0.00014196679438058748, - "loss": 1.0404, + "loss": 0.6955, "step": 1140 }, { "epoch": 4.37, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 0.9103832840919495, - "eval_runtime": 73.4365, - "eval_samples_per_second": 2.369, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 0.9447872042655945, + "eval_runtime": 131.7146, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1140 }, { "epoch": 4.39, "learning_rate": 0.00014171136653895277, - "loss": 0.5523, + "loss": 0.5385, "step": 1145 }, { "epoch": 4.39, - "eval_accuracy": 0.6436781609195402, - "eval_loss": 1.0259004831314087, - "eval_runtime": 72.2901, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.9447667002677917, + "eval_runtime": 131.6399, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1145 }, { "epoch": 4.41, "learning_rate": 0.000141455938697318, - "loss": 0.6387, + "loss": 0.4345, "step": 1150 }, { "epoch": 4.41, - "eval_accuracy": 0.5747126436781609, - "eval_loss": 1.1877542734146118, - "eval_runtime": 71.8711, - "eval_samples_per_second": 2.421, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.9276512265205383, + "eval_runtime": 131.3205, + "eval_samples_per_second": 1.325, + "eval_steps_per_second": 0.168, "step": 1150 }, { "epoch": 4.43, "learning_rate": 0.00014120051085568327, - "loss": 1.038, + "loss": 0.7115, "step": 1155 }, { "epoch": 4.43, "eval_accuracy": 0.6609195402298851, - "eval_loss": 1.065651774406433, - "eval_runtime": 72.3814, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_loss": 1.017700433731079, + "eval_runtime": 131.4128, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 1155 }, { "epoch": 4.44, "learning_rate": 0.00014094508301404853, - "loss": 0.7566, + "loss": 0.6605, "step": 1160 }, { "epoch": 4.44, - "eval_accuracy": 0.6609195402298851, - "eval_loss": 0.8948299288749695, - "eval_runtime": 71.6321, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.289931058883667, + "eval_runtime": 131.6646, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1160 }, { "epoch": 4.46, "learning_rate": 0.0001406896551724138, - "loss": 0.5505, + "loss": 0.7257, "step": 1165 }, { "epoch": 4.46, - "eval_accuracy": 0.6609195402298851, - "eval_loss": 0.9844233393669128, - "eval_runtime": 74.1566, - "eval_samples_per_second": 2.346, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.603448275862069, + "eval_loss": 1.2731506824493408, + "eval_runtime": 131.6448, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1165 }, { "epoch": 4.48, "learning_rate": 0.00014043422733077906, - "loss": 1.0917, + "loss": 0.8842, "step": 1170 }, { "epoch": 4.48, - "eval_accuracy": 0.6724137931034483, - "eval_loss": 0.9192268252372742, - "eval_runtime": 71.6227, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.1442424058914185, + "eval_runtime": 131.62, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1170 }, { "epoch": 4.5, "learning_rate": 0.00014017879948914432, - "loss": 0.8048, + "loss": 0.7097, "step": 1175 }, { "epoch": 4.5, - "eval_accuracy": 0.6091954022988506, - "eval_loss": 1.1002541780471802, - "eval_runtime": 73.6156, - "eval_samples_per_second": 2.364, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 0.9837198257446289, + "eval_runtime": 131.6089, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1175 }, { "epoch": 4.52, "learning_rate": 0.00013992337164750958, - "loss": 0.5951, + "loss": 0.6335, "step": 1180 }, { "epoch": 4.52, - "eval_accuracy": 0.6494252873563219, - "eval_loss": 0.9471919536590576, - "eval_runtime": 71.689, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.9646813273429871, + "eval_runtime": 134.7732, + "eval_samples_per_second": 1.291, + "eval_steps_per_second": 0.163, "step": 1180 }, { "epoch": 4.54, "learning_rate": 0.00013966794380587485, - "loss": 0.603, + "loss": 0.7804, "step": 1185 }, { "epoch": 4.54, - "eval_accuracy": 0.5632183908045977, - "eval_loss": 1.3395264148712158, - "eval_runtime": 72.507, - "eval_samples_per_second": 2.4, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.8581375479698181, + "eval_runtime": 134.6733, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 1185 }, { "epoch": 4.56, "learning_rate": 0.0001394125159642401, - "loss": 1.0802, + "loss": 0.5464, "step": 1190 }, { "epoch": 4.56, - "eval_accuracy": 0.6149425287356322, - "eval_loss": 1.052255392074585, - "eval_runtime": 71.8669, - "eval_samples_per_second": 2.421, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 0.8662963509559631, + "eval_runtime": 131.6239, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1190 }, { "epoch": 4.58, "learning_rate": 0.00013915708812260537, - "loss": 0.3689, + "loss": 0.2931, "step": 1195 }, { "epoch": 4.58, - "eval_accuracy": 0.6264367816091954, - "eval_loss": 1.0053684711456299, - "eval_runtime": 72.2348, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 1.0258806943893433, + "eval_runtime": 131.4426, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 1195 }, { "epoch": 4.6, "learning_rate": 0.00013890166028097064, - "loss": 0.5565, + "loss": 0.5038, "step": 1200 }, { "epoch": 4.6, - "eval_accuracy": 0.5862068965517241, - "eval_loss": 1.400984525680542, - "eval_runtime": 71.9603, - "eval_samples_per_second": 2.418, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.5689655172413793, + "eval_loss": 1.886447548866272, + "eval_runtime": 134.426, + "eval_samples_per_second": 1.294, + "eval_steps_per_second": 0.164, "step": 1200 }, { "epoch": 4.62, "learning_rate": 0.0001386462324393359, - "loss": 0.7934, + "loss": 1.0206, "step": 1205 }, { "epoch": 4.62, - "eval_accuracy": 0.5919540229885057, - "eval_loss": 1.4771628379821777, - "eval_runtime": 72.4137, - "eval_samples_per_second": 2.403, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.5747126436781609, + "eval_loss": 1.2909624576568604, + "eval_runtime": 134.5453, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.164, "step": 1205 }, { "epoch": 4.64, "learning_rate": 0.00013839080459770116, - "loss": 0.8364, + "loss": 0.5508, "step": 1210 }, { "epoch": 4.64, - "eval_accuracy": 0.632183908045977, - "eval_loss": 1.2308330535888672, - "eval_runtime": 71.7192, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.5632183908045977, + "eval_loss": 1.152716040611267, + "eval_runtime": 131.5963, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1210 }, { "epoch": 4.66, "learning_rate": 0.00013813537675606643, - "loss": 0.8967, + "loss": 1.2308, "step": 1215 }, { "epoch": 4.66, - "eval_accuracy": 0.6666666666666666, - "eval_loss": 0.8904043436050415, - "eval_runtime": 72.373, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6091954022988506, + "eval_loss": 1.0279332399368286, + "eval_runtime": 135.4076, + "eval_samples_per_second": 1.285, + "eval_steps_per_second": 0.162, "step": 1215 }, { "epoch": 4.67, "learning_rate": 0.0001378799489144317, - "loss": 0.5255, + "loss": 0.388, "step": 1220 }, { "epoch": 4.67, - "eval_accuracy": 0.6379310344827587, - "eval_loss": 1.006020188331604, - "eval_runtime": 73.6542, - "eval_samples_per_second": 2.362, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.632183908045977, + "eval_loss": 0.9292969703674316, + "eval_runtime": 135.6227, + "eval_samples_per_second": 1.283, + "eval_steps_per_second": 0.162, "step": 1220 }, { "epoch": 4.69, "learning_rate": 0.00013762452107279695, - "loss": 0.693, + "loss": 0.4747, "step": 1225 }, { "epoch": 4.69, - "eval_accuracy": 0.6609195402298851, - "eval_loss": 0.9818925261497498, - "eval_runtime": 72.3307, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 1.0777442455291748, + "eval_runtime": 131.6533, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1225 }, { "epoch": 4.71, "learning_rate": 0.00013736909323116222, - "loss": 1.1102, + "loss": 1.0655, "step": 1230 }, { "epoch": 4.71, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 0.8632426857948303, - "eval_runtime": 71.648, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 0.9733009934425354, + "eval_runtime": 131.6904, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1230 }, { "epoch": 4.73, "learning_rate": 0.00013711366538952745, - "loss": 0.7586, + "loss": 0.7551, "step": 1235 }, { "epoch": 4.73, - "eval_accuracy": 0.6609195402298851, - "eval_loss": 0.8827661871910095, - "eval_runtime": 72.3427, - "eval_samples_per_second": 2.405, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.8783059120178223, + "eval_runtime": 131.5129, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1235 }, { "epoch": 4.75, "learning_rate": 0.00013685823754789274, - "loss": 0.5332, + "loss": 0.5262, "step": 1240 }, { "epoch": 4.75, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 0.8106628656387329, - "eval_runtime": 73.4839, - "eval_samples_per_second": 2.368, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 0.9284844994544983, + "eval_runtime": 131.5523, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1240 }, { "epoch": 4.77, "learning_rate": 0.00013660280970625798, - "loss": 0.7737, + "loss": 0.6098, "step": 1245 }, { "epoch": 4.77, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 0.7830407023429871, - "eval_runtime": 72.337, - "eval_samples_per_second": 2.405, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 0.841589629650116, + "eval_runtime": 131.461, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 1245 }, { "epoch": 4.79, "learning_rate": 0.00013634738186462327, - "loss": 0.8634, + "loss": 0.8836, "step": 1250 }, { "epoch": 4.79, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 0.799767792224884, - "eval_runtime": 71.8401, - "eval_samples_per_second": 2.422, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.8214066624641418, + "eval_runtime": 131.6847, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1250 }, { "epoch": 4.81, "learning_rate": 0.0001360919540229885, - "loss": 0.8592, + "loss": 0.5507, "step": 1255 }, { "epoch": 4.81, - "eval_accuracy": 0.6666666666666666, - "eval_loss": 0.8830769658088684, - "eval_runtime": 73.1202, - "eval_samples_per_second": 2.38, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.735632183908046, + "eval_loss": 0.7416496872901917, + "eval_runtime": 131.6767, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1255 }, { "epoch": 4.83, "learning_rate": 0.0001358365261813538, - "loss": 0.3591, + "loss": 0.1952, "step": 1260 }, { "epoch": 4.83, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 0.7183188796043396, - "eval_runtime": 71.7735, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.8377039432525635, + "eval_runtime": 131.5954, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1260 }, { "epoch": 4.85, "learning_rate": 0.00013558109833971903, - "loss": 0.7214, + "loss": 0.7898, "step": 1265 }, { "epoch": 4.85, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 0.7452751398086548, - "eval_runtime": 72.2803, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.735632183908046, + "eval_loss": 0.7278856039047241, + "eval_runtime": 131.6799, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1265 }, { "epoch": 4.87, "learning_rate": 0.0001353256704980843, - "loss": 0.6555, + "loss": 0.7374, "step": 1270 }, { "epoch": 4.87, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 0.8549041748046875, - "eval_runtime": 71.6872, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 0.9924662113189697, + "eval_runtime": 131.6929, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1270 }, { "epoch": 4.89, "learning_rate": 0.00013507024265644956, - "loss": 0.3736, + "loss": 0.5315, "step": 1275 }, { "epoch": 4.89, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 0.8064850568771362, - "eval_runtime": 72.2268, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 0.9290440082550049, + "eval_runtime": 131.6471, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1275 }, { "epoch": 4.9, "learning_rate": 0.00013481481481481482, - "loss": 0.6139, + "loss": 0.3167, "step": 1280 }, { "epoch": 4.9, "eval_accuracy": 0.7011494252873564, - "eval_loss": 0.7727733850479126, - "eval_runtime": 71.6667, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_loss": 0.7473086714744568, + "eval_runtime": 131.7751, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 1280 }, { "epoch": 4.92, "learning_rate": 0.00013455938697318009, - "loss": 1.4349, + "loss": 0.9122, "step": 1285 }, { "epoch": 4.92, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 0.7865081429481506, - "eval_runtime": 72.3204, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 0.8101300001144409, + "eval_runtime": 131.6511, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1285 }, { "epoch": 4.94, "learning_rate": 0.00013430395913154535, - "loss": 0.6336, + "loss": 0.5306, "step": 1290 }, { "epoch": 4.94, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 0.7983749508857727, - "eval_runtime": 71.6401, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 0.7988009452819824, + "eval_runtime": 131.5511, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1290 }, { "epoch": 4.96, "learning_rate": 0.0001340485312899106, - "loss": 0.667, + "loss": 0.9036, "step": 1295 }, { "epoch": 4.96, - "eval_accuracy": 0.6494252873563219, - "eval_loss": 1.0624412298202515, - "eval_runtime": 72.1869, - "eval_samples_per_second": 2.41, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.9747323393821716, + "eval_runtime": 131.6304, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1295 }, { "epoch": 4.98, "learning_rate": 0.00013379310344827588, - "loss": 0.4108, + "loss": 0.4449, "step": 1300 }, { "epoch": 4.98, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 0.9411900043487549, - "eval_runtime": 72.8779, - "eval_samples_per_second": 2.388, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.7531276345252991, + "eval_runtime": 131.6404, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1300 }, { "epoch": 5.0, "learning_rate": 0.0001335376756066411, - "loss": 1.0572, + "loss": 0.761, "step": 1305 }, { "epoch": 5.0, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 0.8950245380401611, - "eval_runtime": 72.2902, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 0.8166816234588623, + "eval_runtime": 131.8052, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 1305 }, { "epoch": 5.02, "learning_rate": 0.0001332822477650064, - "loss": 0.3411, + "loss": 0.5163, "step": 1310 }, { "epoch": 5.02, - "eval_accuracy": 0.735632183908046, - "eval_loss": 0.8750669360160828, - "eval_runtime": 73.4412, - "eval_samples_per_second": 2.369, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 1.006870985031128, + "eval_runtime": 131.6867, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1310 }, { "epoch": 5.04, "learning_rate": 0.00013302681992337164, - "loss": 0.3644, + "loss": 0.2701, "step": 1315 }, { "epoch": 5.04, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 0.8155695796012878, - "eval_runtime": 72.387, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 0.8416351675987244, + "eval_runtime": 131.8363, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 1315 }, { "epoch": 5.06, "learning_rate": 0.00013277139208173693, - "loss": 0.2643, + "loss": 0.1513, "step": 1320 }, { "epoch": 5.06, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 0.7830978631973267, - "eval_runtime": 71.6267, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 0.6530519723892212, + "eval_runtime": 134.8529, + "eval_samples_per_second": 1.29, + "eval_steps_per_second": 0.163, "step": 1320 }, { "epoch": 5.08, "learning_rate": 0.00013251596424010217, - "loss": 0.3629, + "loss": 0.3641, "step": 1325 }, { "epoch": 5.08, - "eval_accuracy": 0.6264367816091954, - "eval_loss": 1.644027829170227, - "eval_runtime": 72.3202, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 0.8022194504737854, + "eval_runtime": 131.7261, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1325 }, { "epoch": 5.1, "learning_rate": 0.00013226053639846746, - "loss": 1.5008, + "loss": 0.6804, "step": 1330 }, { "epoch": 5.1, - "eval_accuracy": 0.6609195402298851, - "eval_loss": 1.36289381980896, - "eval_runtime": 73.5741, - "eval_samples_per_second": 2.365, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 0.9907580614089966, + "eval_runtime": 131.5946, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1330 }, { "epoch": 5.11, "learning_rate": 0.0001320051085568327, - "loss": 1.1648, + "loss": 0.9176, "step": 1335 }, { "epoch": 5.11, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 0.9745023250579834, - "eval_runtime": 73.9076, - "eval_samples_per_second": 2.354, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.8896319270133972, + "eval_runtime": 131.5913, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1335 }, { "epoch": 5.13, "learning_rate": 0.00013174968071519796, - "loss": 0.842, + "loss": 0.4943, "step": 1340 }, { "epoch": 5.13, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 0.9471400380134583, - "eval_runtime": 73.3831, - "eval_samples_per_second": 2.371, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.8112825155258179, + "eval_runtime": 132.8289, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 1340 }, { "epoch": 5.15, "learning_rate": 0.00013149425287356322, - "loss": 0.4963, + "loss": 0.4788, "step": 1345 }, { "epoch": 5.15, - "eval_accuracy": 0.6379310344827587, - "eval_loss": 1.1134085655212402, - "eval_runtime": 74.2914, - "eval_samples_per_second": 2.342, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 0.9182735681533813, + "eval_runtime": 131.6068, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1345 }, { "epoch": 5.17, "learning_rate": 0.00013123882503192848, - "loss": 0.6819, + "loss": 0.2654, "step": 1350 }, { "epoch": 5.17, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.0473263263702393, - "eval_runtime": 73.934, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 0.8970803618431091, + "eval_runtime": 131.7165, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1350 }, { "epoch": 5.19, "learning_rate": 0.00013098339719029375, - "loss": 0.2688, + "loss": 0.5567, "step": 1355 }, { "epoch": 5.19, - "eval_accuracy": 0.6091954022988506, - "eval_loss": 1.323175311088562, - "eval_runtime": 73.8927, - "eval_samples_per_second": 2.355, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.0155576467514038, + "eval_runtime": 131.4848, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1355 }, { "epoch": 5.21, "learning_rate": 0.000130727969348659, - "loss": 0.7999, + "loss": 0.5702, "step": 1360 }, { "epoch": 5.21, - "eval_accuracy": 0.6206896551724138, - "eval_loss": 1.2077127695083618, - "eval_runtime": 72.9338, - "eval_samples_per_second": 2.386, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 0.8736016154289246, + "eval_runtime": 134.812, + "eval_samples_per_second": 1.291, + "eval_steps_per_second": 0.163, "step": 1360 }, { "epoch": 5.23, "learning_rate": 0.00013047254150702427, - "loss": 0.9447, + "loss": 0.7055, "step": 1365 }, { "epoch": 5.23, - "eval_accuracy": 0.632183908045977, - "eval_loss": 1.119039535522461, - "eval_runtime": 75.1877, - "eval_samples_per_second": 2.314, - "eval_steps_per_second": 0.293, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.0454132556915283, + "eval_runtime": 131.6517, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1365 }, { "epoch": 5.25, "learning_rate": 0.00013021711366538954, - "loss": 0.4866, + "loss": 0.5203, "step": 1370 }, { "epoch": 5.25, - "eval_accuracy": 0.5977011494252874, - "eval_loss": 1.1412699222564697, - "eval_runtime": 74.8277, - "eval_samples_per_second": 2.325, - "eval_steps_per_second": 0.294, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.1284202337265015, + "eval_runtime": 131.6682, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1370 }, { "epoch": 5.27, "learning_rate": 0.0001299616858237548, - "loss": 0.6268, + "loss": 0.512, "step": 1375 }, { "epoch": 5.27, - "eval_accuracy": 0.6551724137931034, - "eval_loss": 1.1648024320602417, - "eval_runtime": 74.1075, - "eval_samples_per_second": 2.348, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.632183908045977, + "eval_loss": 1.0251713991165161, + "eval_runtime": 134.5714, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.163, "step": 1375 }, { "epoch": 5.29, "learning_rate": 0.00012970625798212006, - "loss": 1.0332, + "loss": 0.8081, "step": 1380 }, { "epoch": 5.29, - "eval_accuracy": 0.6264367816091954, - "eval_loss": 1.0704519748687744, - "eval_runtime": 73.409, - "eval_samples_per_second": 2.37, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 0.8887814283370972, + "eval_runtime": 131.5304, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1380 }, { "epoch": 5.31, "learning_rate": 0.00012945083014048533, - "loss": 0.508, + "loss": 0.3513, "step": 1385 }, { "epoch": 5.31, - "eval_accuracy": 0.6666666666666666, - "eval_loss": 0.9115325212478638, - "eval_runtime": 75.2816, - "eval_samples_per_second": 2.311, - "eval_steps_per_second": 0.292, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.7672654986381531, + "eval_runtime": 131.5428, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1385 }, { "epoch": 5.33, "learning_rate": 0.0001291954022988506, - "loss": 0.4844, + "loss": 0.3606, "step": 1390 }, { "epoch": 5.33, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 0.7564892768859863, - "eval_runtime": 73.4276, - "eval_samples_per_second": 2.37, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 0.7865743637084961, + "eval_runtime": 131.6146, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1390 }, { "epoch": 5.34, "learning_rate": 0.00012893997445721583, - "loss": 0.6529, + "loss": 0.539, "step": 1395 }, { "epoch": 5.34, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 0.7532500624656677, - "eval_runtime": 74.281, - "eval_samples_per_second": 2.342, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 0.8202821612358093, + "eval_runtime": 131.7497, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1395 }, { "epoch": 5.36, "learning_rate": 0.00012868454661558112, - "loss": 0.674, + "loss": 0.5642, "step": 1400 }, { "epoch": 5.36, "eval_accuracy": 0.6436781609195402, - "eval_loss": 0.9024896621704102, - "eval_runtime": 74.2674, - "eval_samples_per_second": 2.343, - "eval_steps_per_second": 0.296, + "eval_loss": 1.1715248823165894, + "eval_runtime": 131.6954, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1400 }, { "epoch": 5.38, "learning_rate": 0.00012842911877394635, - "loss": 0.9236, + "loss": 0.5947, "step": 1405 }, { "epoch": 5.38, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 0.7951949834823608, - "eval_runtime": 73.902, - "eval_samples_per_second": 2.354, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 0.9037488102912903, + "eval_runtime": 134.8795, + "eval_samples_per_second": 1.29, + "eval_steps_per_second": 0.163, "step": 1405 }, { "epoch": 5.4, "learning_rate": 0.00012817369093231162, - "loss": 0.523, + "loss": 0.4159, "step": 1410 }, { "epoch": 5.4, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 0.7487069368362427, - "eval_runtime": 74.6134, - "eval_samples_per_second": 2.332, - "eval_steps_per_second": 0.295, + "eval_accuracy": 0.735632183908046, + "eval_loss": 0.8044033050537109, + "eval_runtime": 131.6438, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1410 }, { "epoch": 5.42, "learning_rate": 0.00012791826309067688, - "loss": 0.3512, + "loss": 0.537, "step": 1415 }, { "epoch": 5.42, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 0.9300501942634583, - "eval_runtime": 74.1082, - "eval_samples_per_second": 2.348, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.9017471075057983, + "eval_runtime": 131.673, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1415 }, { "epoch": 5.44, "learning_rate": 0.00012766283524904214, - "loss": 0.5621, + "loss": 0.3371, "step": 1420 }, { "epoch": 5.44, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 0.9454444050788879, - "eval_runtime": 75.121, - "eval_samples_per_second": 2.316, - "eval_steps_per_second": 0.293, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.0497161149978638, + "eval_runtime": 131.6194, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1420 }, { "epoch": 5.46, "learning_rate": 0.0001274074074074074, - "loss": 0.3093, + "loss": 0.5586, "step": 1425 }, { "epoch": 5.46, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 0.8725366592407227, - "eval_runtime": 74.2816, - "eval_samples_per_second": 2.342, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.735632183908046, + "eval_loss": 0.8958852291107178, + "eval_runtime": 131.522, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1425 }, { "epoch": 5.48, "learning_rate": 0.00012715197956577267, - "loss": 0.5955, + "loss": 0.7869, "step": 1430 }, { "epoch": 5.48, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 0.8809516429901123, - "eval_runtime": 73.6012, - "eval_samples_per_second": 2.364, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 0.8901216387748718, + "eval_runtime": 131.4981, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1430 }, { "epoch": 5.5, "learning_rate": 0.00012689655172413793, - "loss": 0.2704, + "loss": 0.2459, "step": 1435 }, { "epoch": 5.5, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 0.9333508610725403, - "eval_runtime": 74.0008, - "eval_samples_per_second": 2.351, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 0.7955420613288879, + "eval_runtime": 131.5057, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1435 }, { "epoch": 5.52, "learning_rate": 0.0001266411238825032, - "loss": 0.3438, + "loss": 0.0927, "step": 1440 }, { "epoch": 5.52, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 0.9162984490394592, - "eval_runtime": 73.7475, - "eval_samples_per_second": 2.359, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 0.9924725294113159, + "eval_runtime": 131.6887, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1440 }, { "epoch": 5.54, "learning_rate": 0.00012638569604086846, - "loss": 0.2719, + "loss": 0.4005, "step": 1445 }, { "epoch": 5.54, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 0.9165803790092468, - "eval_runtime": 73.1738, - "eval_samples_per_second": 2.378, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.1416829824447632, + "eval_runtime": 131.6825, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1445 }, { "epoch": 5.56, "learning_rate": 0.00012613026819923372, - "loss": 0.4038, + "loss": 0.4054, "step": 1450 }, { "epoch": 5.56, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 0.951847493648529, - "eval_runtime": 73.7477, - "eval_samples_per_second": 2.359, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 0.9588910937309265, + "eval_runtime": 131.6228, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1450 }, { "epoch": 5.57, "learning_rate": 0.00012587484035759899, - "loss": 0.8747, + "loss": 0.5116, "step": 1455 }, { "epoch": 5.57, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.006085753440857, - "eval_runtime": 73.6005, - "eval_samples_per_second": 2.364, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.0598994493484497, + "eval_runtime": 131.7436, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1455 }, { "epoch": 5.59, "learning_rate": 0.00012561941251596425, - "loss": 0.5741, + "loss": 0.257, "step": 1460 }, { "epoch": 5.59, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.0438010692596436, - "eval_runtime": 71.8134, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 1.162794828414917, + "eval_runtime": 131.7132, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1460 }, { "epoch": 5.61, "learning_rate": 0.0001253639846743295, - "loss": 0.732, + "loss": 0.2287, "step": 1465 }, { "epoch": 5.61, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 1.0875169038772583, - "eval_runtime": 72.3827, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 1.2925313711166382, + "eval_runtime": 131.7156, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1465 }, { "epoch": 5.63, "learning_rate": 0.00012510855683269478, - "loss": 0.5971, + "loss": 0.8024, "step": 1470 }, { "epoch": 5.63, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.062251091003418, - "eval_runtime": 71.8803, - "eval_samples_per_second": 2.421, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.0764447450637817, + "eval_runtime": 134.9292, + "eval_samples_per_second": 1.29, + "eval_steps_per_second": 0.163, "step": 1470 }, { "epoch": 5.65, "learning_rate": 0.00012485312899106004, - "loss": 0.6264, + "loss": 0.4949, "step": 1475 }, { "epoch": 5.65, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 0.9514502882957458, - "eval_runtime": 72.7337, - "eval_samples_per_second": 2.392, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.1142551898956299, + "eval_runtime": 131.5409, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1475 }, { "epoch": 5.67, "learning_rate": 0.00012459770114942528, - "loss": 0.681, + "loss": 0.4655, "step": 1480 }, { "epoch": 5.67, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 1.0149379968643188, - "eval_runtime": 71.7065, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 1.353607177734375, + "eval_runtime": 131.5492, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1480 }, { "epoch": 5.69, "learning_rate": 0.00012434227330779057, - "loss": 0.8418, + "loss": 0.7536, "step": 1485 }, { "epoch": 5.69, - "eval_accuracy": 0.6206896551724138, - "eval_loss": 1.1708879470825195, - "eval_runtime": 74.2125, - "eval_samples_per_second": 2.345, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.0325322151184082, + "eval_runtime": 134.5218, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.164, "step": 1485 }, { "epoch": 5.71, "learning_rate": 0.0001240868454661558, - "loss": 0.5313, + "loss": 0.326, "step": 1490 }, { "epoch": 5.71, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 1.0242153406143188, - "eval_runtime": 71.6182, - "eval_samples_per_second": 2.43, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.112859845161438, + "eval_runtime": 131.7407, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1490 }, { "epoch": 5.73, "learning_rate": 0.0001238314176245211, - "loss": 0.5052, + "loss": 0.5502, "step": 1495 }, { "epoch": 5.73, "eval_accuracy": 0.6551724137931034, - "eval_loss": 1.254513144493103, - "eval_runtime": 72.1503, - "eval_samples_per_second": 2.412, - "eval_steps_per_second": 0.305, + "eval_loss": 1.3975075483322144, + "eval_runtime": 131.6069, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1495 }, { "epoch": 5.75, "learning_rate": 0.00012357598978288633, - "loss": 1.002, + "loss": 0.7814, "step": 1500 }, { "epoch": 5.75, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.1145654916763306, - "eval_runtime": 71.7734, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6436781609195402, + "eval_loss": 1.479273796081543, + "eval_runtime": 132.0973, + "eval_samples_per_second": 1.317, + "eval_steps_per_second": 0.167, "step": 1500 }, { "epoch": 5.77, "learning_rate": 0.00012332056194125162, - "loss": 0.4721, + "loss": 0.8521, "step": 1505 }, { "epoch": 5.77, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.091664433479309, - "eval_runtime": 74.1208, - "eval_samples_per_second": 2.348, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 1.2535182237625122, + "eval_runtime": 131.6457, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1505 }, { "epoch": 5.79, "learning_rate": 0.00012306513409961686, - "loss": 0.3988, + "loss": 0.452, "step": 1510 }, { "epoch": 5.79, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 1.0722365379333496, - "eval_runtime": 73.3159, - "eval_samples_per_second": 2.373, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.3425843715667725, + "eval_runtime": 135.2782, + "eval_samples_per_second": 1.286, + "eval_steps_per_second": 0.163, "step": 1510 }, { "epoch": 5.8, "learning_rate": 0.00012280970625798212, - "loss": 0.8568, + "loss": 1.1244, "step": 1515 }, { "epoch": 5.8, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 0.8946850895881653, - "eval_runtime": 72.2801, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.1074714660644531, + "eval_runtime": 131.544, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1515 }, { "epoch": 5.82, "learning_rate": 0.00012255427841634738, - "loss": 0.4908, + "loss": 0.5211, "step": 1520 }, { "epoch": 5.82, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 0.8863809704780579, - "eval_runtime": 71.7599, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 1.0719375610351562, + "eval_runtime": 131.5325, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1520 }, { "epoch": 5.84, "learning_rate": 0.00012229885057471265, - "loss": 0.5216, + "loss": 0.4944, "step": 1525 }, { "epoch": 5.84, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 0.9632396697998047, - "eval_runtime": 72.267, - "eval_samples_per_second": 2.408, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 1.1987359523773193, + "eval_runtime": 131.5625, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1525 }, { "epoch": 5.86, "learning_rate": 0.00012204342273307792, - "loss": 0.7425, + "loss": 0.619, "step": 1530 }, { "epoch": 5.86, - "eval_accuracy": 0.735632183908046, - "eval_loss": 0.828774631023407, - "eval_runtime": 73.4716, - "eval_samples_per_second": 2.368, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.0625687837600708, + "eval_runtime": 131.5921, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1530 }, { "epoch": 5.88, "learning_rate": 0.00012178799489144317, - "loss": 0.5288, + "loss": 0.3932, "step": 1535 }, { "epoch": 5.88, - "eval_accuracy": 0.6609195402298851, - "eval_loss": 1.1222902536392212, - "eval_runtime": 72.267, - "eval_samples_per_second": 2.408, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6149425287356322, + "eval_loss": 1.371453881263733, + "eval_runtime": 134.6534, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 1535 }, { "epoch": 5.9, "learning_rate": 0.00012153256704980845, - "loss": 0.7604, + "loss": 1.001, "step": 1540 }, { "epoch": 5.9, - "eval_accuracy": 0.6436781609195402, - "eval_loss": 1.065313458442688, - "eval_runtime": 71.7733, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6206896551724138, + "eval_loss": 1.1620148420333862, + "eval_runtime": 131.6762, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1540 }, { "epoch": 5.92, "learning_rate": 0.0001212771392081737, - "loss": 0.4761, + "loss": 0.4258, "step": 1545 }, { "epoch": 5.92, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 0.9595795273780823, - "eval_runtime": 72.1868, - "eval_samples_per_second": 2.41, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.0935604572296143, + "eval_runtime": 131.6743, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1545 }, { "epoch": 5.94, "learning_rate": 0.00012102171136653895, - "loss": 0.7556, + "loss": 0.6611, "step": 1550 }, { "epoch": 5.94, - "eval_accuracy": 0.6609195402298851, - "eval_loss": 0.9294580817222595, - "eval_runtime": 71.6133, - "eval_samples_per_second": 2.43, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 1.1292699575424194, + "eval_runtime": 134.8793, + "eval_samples_per_second": 1.29, + "eval_steps_per_second": 0.163, "step": 1550 }, { "epoch": 5.96, "learning_rate": 0.00012076628352490423, - "loss": 0.7834, + "loss": 0.6265, "step": 1555 }, { "epoch": 5.96, - "eval_accuracy": 0.6551724137931034, - "eval_loss": 0.9481978416442871, - "eval_runtime": 72.222, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.937195360660553, + "eval_runtime": 131.5531, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1555 }, { "epoch": 5.98, "learning_rate": 0.00012051085568326948, - "loss": 0.8625, + "loss": 0.5634, "step": 1560 }, { "epoch": 5.98, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 0.9149760007858276, - "eval_runtime": 71.6268, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.8594533205032349, + "eval_runtime": 131.654, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1560 }, { "epoch": 6.0, "learning_rate": 0.00012025542784163475, - "loss": 0.4823, + "loss": 0.3986, "step": 1565 }, { "epoch": 6.0, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 0.8527363538742065, - "eval_runtime": 72.4135, - "eval_samples_per_second": 2.403, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 0.9495770931243896, + "eval_runtime": 131.7261, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1565 }, { "epoch": 6.02, "learning_rate": 0.00012, - "loss": 0.6611, + "loss": 0.2532, "step": 1570 }, { "epoch": 6.02, - "eval_accuracy": 0.6609195402298851, - "eval_loss": 1.1094207763671875, - "eval_runtime": 71.6394, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.0734517574310303, + "eval_runtime": 131.5139, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1570 }, { "epoch": 6.03, "learning_rate": 0.00011974457215836528, - "loss": 0.4618, + "loss": 0.1247, "step": 1575 }, { "epoch": 6.03, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 0.9192668199539185, - "eval_runtime": 74.1876, - "eval_samples_per_second": 2.345, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 1.0141799449920654, + "eval_runtime": 131.6291, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1575 }, { "epoch": 6.05, "learning_rate": 0.00011948914431673053, - "loss": 0.3332, + "loss": 0.2655, "step": 1580 }, { "epoch": 6.05, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 0.8720999360084534, - "eval_runtime": 73.8274, - "eval_samples_per_second": 2.357, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 0.9764288663864136, + "eval_runtime": 131.78, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 1580 }, { "epoch": 6.07, "learning_rate": 0.00011923371647509578, - "loss": 0.4447, + "loss": 0.5073, "step": 1585 }, { "epoch": 6.07, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 0.8002211451530457, - "eval_runtime": 74.081, - "eval_samples_per_second": 2.349, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.0659440755844116, + "eval_runtime": 131.6962, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1585 }, { "epoch": 6.09, "learning_rate": 0.00011897828863346106, - "loss": 0.4332, + "loss": 0.5426, "step": 1590 }, { "epoch": 6.09, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 0.8471765518188477, - "eval_runtime": 71.7734, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 1.1733648777008057, + "eval_runtime": 131.6005, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1590 }, { "epoch": 6.11, "learning_rate": 0.0001187228607918263, - "loss": 0.3504, + "loss": 0.3363, "step": 1595 }, { "epoch": 6.11, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 0.8792366981506348, - "eval_runtime": 72.3203, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 1.0996015071868896, + "eval_runtime": 131.7781, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 1595 }, { "epoch": 6.13, "learning_rate": 0.00011846743295019158, - "loss": 0.1991, + "loss": 0.1399, "step": 1600 }, { "epoch": 6.13, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 0.9491644501686096, - "eval_runtime": 71.7555, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 1.1617628335952759, + "eval_runtime": 131.6546, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1600 }, { "epoch": 6.15, "learning_rate": 0.00011821200510855683, - "loss": 0.2901, + "loss": 0.1777, "step": 1605 }, { "epoch": 6.15, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.0009914636611938, - "eval_runtime": 72.2758, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.0963499546051025, + "eval_runtime": 131.6556, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1605 }, { "epoch": 6.17, "learning_rate": 0.00011795657726692211, - "loss": 0.6851, + "loss": 0.2072, "step": 1610 }, { "epoch": 6.17, - "eval_accuracy": 0.6724137931034483, - "eval_loss": 1.0788379907608032, - "eval_runtime": 71.7844, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.0916483402252197, + "eval_runtime": 131.5815, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1610 }, { "epoch": 6.19, "learning_rate": 0.00011770114942528736, - "loss": 0.5596, + "loss": 0.3631, "step": 1615 }, { "epoch": 6.19, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.04649019241333, - "eval_runtime": 72.2936, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.227651596069336, + "eval_runtime": 135.4252, + "eval_samples_per_second": 1.285, + "eval_steps_per_second": 0.162, "step": 1615 }, { "epoch": 6.21, "learning_rate": 0.00011744572158365264, - "loss": 0.4059, + "loss": 0.2738, "step": 1620 }, { "epoch": 6.21, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 0.9852347373962402, - "eval_runtime": 71.7201, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.0606082677841187, + "eval_runtime": 131.4727, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1620 }, { "epoch": 6.23, "learning_rate": 0.00011719029374201789, - "loss": 0.4061, + "loss": 0.2884, "step": 1625 }, { "epoch": 6.23, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.0554276704788208, - "eval_runtime": 73.1205, - "eval_samples_per_second": 2.38, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.5919540229885057, + "eval_loss": 1.3723126649856567, + "eval_runtime": 131.7429, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1625 }, { "epoch": 6.25, "learning_rate": 0.00011693486590038314, - "loss": 0.2499, + "loss": 0.5021, "step": 1630 }, { "epoch": 6.25, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.0310966968536377, - "eval_runtime": 72.0535, - "eval_samples_per_second": 2.415, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.5977011494252874, + "eval_loss": 1.417180061340332, + "eval_runtime": 132.2401, + "eval_samples_per_second": 1.316, + "eval_steps_per_second": 0.166, "step": 1630 }, { "epoch": 6.26, "learning_rate": 0.00011667943805874841, - "loss": 0.3739, + "loss": 0.1844, "step": 1635 }, { "epoch": 6.26, - "eval_accuracy": 0.7701149425287356, - "eval_loss": 0.8703017234802246, - "eval_runtime": 74.24, - "eval_samples_per_second": 2.344, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.007380723953247, + "eval_runtime": 135.4247, + "eval_samples_per_second": 1.285, + "eval_steps_per_second": 0.162, "step": 1635 }, { "epoch": 6.28, "learning_rate": 0.00011642401021711366, - "loss": 0.4323, + "loss": 0.5767, "step": 1640 }, { "epoch": 6.28, "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.0342254638671875, - "eval_runtime": 71.7608, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_loss": 1.0852534770965576, + "eval_runtime": 131.63, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1640 }, { "epoch": 6.3, "learning_rate": 0.00011616858237547894, - "loss": 0.417, + "loss": 0.5302, "step": 1645 }, { "epoch": 6.3, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.004945993423462, - "eval_runtime": 72.2243, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.735632183908046, + "eval_loss": 0.877869188785553, + "eval_runtime": 131.6419, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1645 }, { "epoch": 6.32, "learning_rate": 0.00011591315453384419, - "loss": 0.4087, + "loss": 0.3801, "step": 1650 }, { "epoch": 6.32, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 1.0639106035232544, - "eval_runtime": 71.646, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 0.822873592376709, + "eval_runtime": 134.6907, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 1650 }, { "epoch": 6.34, "learning_rate": 0.00011565772669220947, - "loss": 0.3422, + "loss": 0.3961, "step": 1655 }, { "epoch": 6.34, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.1308996677398682, - "eval_runtime": 73.9785, - "eval_samples_per_second": 2.352, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 0.7614623308181763, + "eval_runtime": 131.6409, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1655 }, { "epoch": 6.36, "learning_rate": 0.00011540229885057472, - "loss": 0.4619, + "loss": 0.2774, "step": 1660 }, { "epoch": 6.36, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.0824979543685913, - "eval_runtime": 71.7053, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.8031173944473267, + "eval_runtime": 131.543, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1660 }, { "epoch": 6.38, "learning_rate": 0.00011514687100893997, - "loss": 0.4767, + "loss": 0.4219, "step": 1665 }, { "epoch": 6.38, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.046342134475708, - "eval_runtime": 72.3509, - "eval_samples_per_second": 2.405, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 0.8798435926437378, + "eval_runtime": 131.6434, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1665 }, { "epoch": 6.4, "learning_rate": 0.00011489144316730524, - "loss": 0.4737, + "loss": 0.4269, "step": 1670 }, { "epoch": 6.4, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 0.9138516783714294, - "eval_runtime": 71.5867, - "eval_samples_per_second": 2.431, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 0.7993461489677429, + "eval_runtime": 131.6926, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1670 }, { "epoch": 6.42, "learning_rate": 0.00011463601532567049, - "loss": 0.2711, + "loss": 0.0621, "step": 1675 }, { "epoch": 6.42, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 0.9746137857437134, - "eval_runtime": 72.2002, - "eval_samples_per_second": 2.41, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 0.8598664999008179, + "eval_runtime": 131.7999, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 1675 }, { "epoch": 6.44, "learning_rate": 0.00011438058748403577, - "loss": 0.3956, + "loss": 0.1985, "step": 1680 }, { "epoch": 6.44, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 1.1271107196807861, - "eval_runtime": 71.5339, - "eval_samples_per_second": 2.432, - "eval_steps_per_second": 0.308, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.0499699115753174, + "eval_runtime": 134.7139, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 1680 }, { "epoch": 6.46, "learning_rate": 0.00011412515964240102, - "loss": 0.5138, + "loss": 0.2481, "step": 1685 }, { "epoch": 6.46, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 1.204206109046936, - "eval_runtime": 73.8943, - "eval_samples_per_second": 2.355, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.6494252873563219, + "eval_loss": 1.2009177207946777, + "eval_runtime": 134.8296, + "eval_samples_per_second": 1.291, + "eval_steps_per_second": 0.163, "step": 1685 }, { "epoch": 6.48, "learning_rate": 0.0001138697318007663, - "loss": 0.3022, + "loss": 0.3036, "step": 1690 }, { "epoch": 6.48, - "eval_accuracy": 0.6436781609195402, - "eval_loss": 1.4267605543136597, - "eval_runtime": 73.7212, - "eval_samples_per_second": 2.36, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 1.1416090726852417, + "eval_runtime": 131.6305, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1690 }, { "epoch": 6.49, "learning_rate": 0.00011361430395913155, - "loss": 0.4592, + "loss": 0.4456, "step": 1695 }, { "epoch": 6.49, - "eval_accuracy": 0.6206896551724138, - "eval_loss": 1.6285024881362915, - "eval_runtime": 72.3063, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 0.9773580431938171, + "eval_runtime": 131.6527, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1695 }, { "epoch": 6.51, "learning_rate": 0.00011335887611749681, - "loss": 0.7306, + "loss": 0.1675, "step": 1700 }, { "epoch": 6.51, - "eval_accuracy": 0.6724137931034483, - "eval_loss": 1.4132676124572754, - "eval_runtime": 73.4541, - "eval_samples_per_second": 2.369, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.015581488609314, + "eval_runtime": 133.994, + "eval_samples_per_second": 1.299, + "eval_steps_per_second": 0.164, "step": 1700 }, { "epoch": 6.53, "learning_rate": 0.00011310344827586207, - "loss": 0.2819, + "loss": 0.0375, "step": 1705 }, { "epoch": 6.53, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 1.0855623483657837, - "eval_runtime": 72.8007, - "eval_samples_per_second": 2.39, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.1146481037139893, + "eval_runtime": 131.6013, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1705 }, { "epoch": 6.55, "learning_rate": 0.00011284802043422734, - "loss": 0.3341, + "loss": 0.4738, "step": 1710 }, { "epoch": 6.55, - "eval_accuracy": 0.6436781609195402, - "eval_loss": 1.190242052078247, - "eval_runtime": 73.4674, - "eval_samples_per_second": 2.368, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.221787691116333, + "eval_runtime": 131.6201, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1710 }, { "epoch": 6.57, "learning_rate": 0.0001125925925925926, - "loss": 0.2632, + "loss": 0.2477, "step": 1715 }, { "epoch": 6.57, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 1.2406810522079468, - "eval_runtime": 72.2134, - "eval_samples_per_second": 2.41, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.130721926689148, + "eval_runtime": 131.564, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1715 }, { "epoch": 6.59, "learning_rate": 0.00011233716475095786, - "loss": 0.3776, + "loss": 0.4616, "step": 1720 }, { "epoch": 6.59, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.3052959442138672, - "eval_runtime": 71.6934, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 1.2159687280654907, + "eval_runtime": 131.7356, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1720 }, { "epoch": 6.61, "learning_rate": 0.00011208173690932313, - "loss": 0.4002, + "loss": 0.2249, "step": 1725 }, { "epoch": 6.61, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.335128903388977, - "eval_runtime": 72.1244, - "eval_samples_per_second": 2.412, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.1174477338790894, + "eval_runtime": 131.6794, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1725 }, { "epoch": 6.63, "learning_rate": 0.00011182630906768839, - "loss": 0.4399, + "loss": 0.3522, "step": 1730 }, { "epoch": 6.63, - "eval_accuracy": 0.6494252873563219, - "eval_loss": 1.5893123149871826, - "eval_runtime": 71.5155, - "eval_samples_per_second": 2.433, - "eval_steps_per_second": 0.308, + "eval_accuracy": 0.6609195402298851, + "eval_loss": 1.5797643661499023, + "eval_runtime": 135.457, + "eval_samples_per_second": 1.285, + "eval_steps_per_second": 0.162, "step": 1730 }, { "epoch": 6.65, "learning_rate": 0.00011157088122605364, - "loss": 0.7733, + "loss": 1.0043, "step": 1735 }, { "epoch": 6.65, - "eval_accuracy": 0.6666666666666666, - "eval_loss": 1.5668152570724487, - "eval_runtime": 72.1994, - "eval_samples_per_second": 2.41, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.6363530158996582, + "eval_runtime": 131.5665, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1735 }, { "epoch": 6.67, "learning_rate": 0.00011131545338441892, - "loss": 0.5065, + "loss": 0.6924, "step": 1740 }, { "epoch": 6.67, - "eval_accuracy": 0.6494252873563219, - "eval_loss": 1.554033637046814, - "eval_runtime": 72.0935, - "eval_samples_per_second": 2.414, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.2205984592437744, + "eval_runtime": 131.6305, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1740 }, { "epoch": 6.69, "learning_rate": 0.00011106002554278417, - "loss": 0.8585, + "loss": 0.4349, "step": 1745 }, { "epoch": 6.69, - "eval_accuracy": 0.6206896551724138, - "eval_loss": 1.5956915616989136, - "eval_runtime": 72.2252, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6379310344827587, + "eval_loss": 1.386996865272522, + "eval_runtime": 131.7762, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 1745 }, { "epoch": 6.7, "learning_rate": 0.00011080459770114944, - "loss": 0.798, + "loss": 0.5771, "step": 1750 }, { "epoch": 6.7, - "eval_accuracy": 0.6551724137931034, - "eval_loss": 1.2651970386505127, - "eval_runtime": 71.7377, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.1229805946350098, + "eval_runtime": 131.6963, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1750 }, { "epoch": 6.72, "learning_rate": 0.00011054916985951469, - "loss": 0.6362, + "loss": 0.7905, "step": 1755 }, { "epoch": 6.72, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.0616276264190674, - "eval_runtime": 72.227, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 0.8128155469894409, + "eval_runtime": 131.306, + "eval_samples_per_second": 1.325, + "eval_steps_per_second": 0.168, "step": 1755 }, { "epoch": 6.74, "learning_rate": 0.00011029374201787997, - "loss": 0.6891, + "loss": 0.3866, "step": 1760 }, { "epoch": 6.74, - "eval_accuracy": 0.6551724137931034, - "eval_loss": 1.1532150506973267, - "eval_runtime": 73.3739, - "eval_samples_per_second": 2.371, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.735632183908046, + "eval_loss": 0.9595608711242676, + "eval_runtime": 131.4978, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1760 }, { "epoch": 6.76, "learning_rate": 0.00011003831417624522, - "loss": 0.4885, + "loss": 0.4893, "step": 1765 }, { "epoch": 6.76, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 0.964917778968811, - "eval_runtime": 72.3603, - "eval_samples_per_second": 2.405, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.735632183908046, + "eval_loss": 0.9293403029441833, + "eval_runtime": 131.571, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1765 }, { "epoch": 6.78, "learning_rate": 0.00010978288633461047, - "loss": 0.2212, + "loss": 0.0439, "step": 1770 }, { "epoch": 6.78, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 0.8999590873718262, - "eval_runtime": 71.6665, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.735632183908046, + "eval_loss": 0.8708416223526001, + "eval_runtime": 131.7352, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1770 }, { "epoch": 6.8, "learning_rate": 0.00010952745849297575, - "loss": 0.3838, + "loss": 0.1671, "step": 1775 }, { "epoch": 6.8, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 0.9244369268417358, - "eval_runtime": 72.3203, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 0.9392874836921692, + "eval_runtime": 131.5959, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1775 }, { "epoch": 6.82, "learning_rate": 0.000109272030651341, - "loss": 0.4909, + "loss": 0.3718, "step": 1780 }, { "epoch": 6.82, "eval_accuracy": 0.7298850574712644, - "eval_loss": 0.8321231603622437, - "eval_runtime": 73.7474, - "eval_samples_per_second": 2.359, - "eval_steps_per_second": 0.298, + "eval_loss": 1.0529624223709106, + "eval_runtime": 131.7631, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1780 }, { "epoch": 6.84, "learning_rate": 0.00010901660280970627, - "loss": 0.3924, + "loss": 0.5083, "step": 1785 }, { "epoch": 6.84, - "eval_accuracy": 0.764367816091954, - "eval_loss": 0.7822464108467102, - "eval_runtime": 72.3997, - "eval_samples_per_second": 2.403, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.0124198198318481, + "eval_runtime": 131.6354, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1785 }, { "epoch": 6.86, "learning_rate": 0.00010876117496807152, - "loss": 0.5337, + "loss": 0.4628, "step": 1790 }, { "epoch": 6.86, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 0.8443244099617004, - "eval_runtime": 71.6807, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.1633274555206299, + "eval_runtime": 135.5536, + "eval_samples_per_second": 1.284, + "eval_steps_per_second": 0.162, "step": 1790 }, { "epoch": 6.88, "learning_rate": 0.0001085057471264368, - "loss": 0.2258, + "loss": 0.3202, "step": 1795 }, { "epoch": 6.88, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 0.8795732259750366, - "eval_runtime": 72.2803, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 0.9682561159133911, + "eval_runtime": 131.5899, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1795 }, { "epoch": 6.9, "learning_rate": 0.00010825031928480205, - "loss": 0.496, + "loss": 0.1311, "step": 1800 }, { "epoch": 6.9, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 0.9166645407676697, - "eval_runtime": 73.3739, - "eval_samples_per_second": 2.371, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 0.9268329739570618, + "eval_runtime": 131.5503, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1800 }, { "epoch": 6.92, "learning_rate": 0.0001079948914431673, - "loss": 0.6166, + "loss": 0.4311, "step": 1805 }, { "epoch": 6.92, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 0.969086766242981, - "eval_runtime": 72.2001, - "eval_samples_per_second": 2.41, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 0.889406144618988, + "eval_runtime": 131.7168, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1805 }, { "epoch": 6.93, "learning_rate": 0.00010773946360153258, - "loss": 0.1913, + "loss": 0.4067, "step": 1810 }, { "epoch": 6.93, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 0.9656856060028076, - "eval_runtime": 73.6808, - "eval_samples_per_second": 2.362, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 0.9293746948242188, + "eval_runtime": 131.5506, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1810 }, { "epoch": 6.95, "learning_rate": 0.00010748403575989783, - "loss": 0.5242, + "loss": 0.1898, "step": 1815 }, { "epoch": 6.95, - "eval_accuracy": 0.6609195402298851, - "eval_loss": 1.2142490148544312, - "eval_runtime": 72.4203, - "eval_samples_per_second": 2.403, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.1521073579788208, + "eval_runtime": 131.7195, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1815 }, { "epoch": 6.97, "learning_rate": 0.0001072286079182631, - "loss": 0.8631, + "loss": 0.695, "step": 1820 }, { "epoch": 6.97, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 1.0861307382583618, - "eval_runtime": 71.8136, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.735632183908046, + "eval_loss": 0.9605730175971985, + "eval_runtime": 131.5115, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1820 }, { "epoch": 6.99, "learning_rate": 0.00010697318007662835, - "loss": 0.1719, + "loss": 0.0965, "step": 1825 }, { "epoch": 6.99, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.0733994245529175, - "eval_runtime": 72.1759, - "eval_samples_per_second": 2.411, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.0009174346923828, + "eval_runtime": 131.6509, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1825 }, { "epoch": 7.01, "learning_rate": 0.00010671775223499363, - "loss": 0.3511, + "loss": 0.5734, "step": 1830 }, { "epoch": 7.01, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 0.9313093423843384, - "eval_runtime": 71.8799, - "eval_samples_per_second": 2.421, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 0.9490659832954407, + "eval_runtime": 131.7585, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1830 }, { "epoch": 7.03, "learning_rate": 0.00010646232439335888, - "loss": 0.314, + "loss": 0.4251, "step": 1835 }, { "epoch": 7.03, "eval_accuracy": 0.7471264367816092, - "eval_loss": 0.9551235437393188, - "eval_runtime": 75.0945, - "eval_samples_per_second": 2.317, - "eval_steps_per_second": 0.293, + "eval_loss": 0.9266923666000366, + "eval_runtime": 131.6985, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1835 }, { "epoch": 7.05, "learning_rate": 0.00010620689655172413, - "loss": 0.1106, + "loss": 0.027, "step": 1840 }, { "epoch": 7.05, - "eval_accuracy": 0.7586206896551724, - "eval_loss": 0.9258528351783752, - "eval_runtime": 74.9492, - "eval_samples_per_second": 2.322, - "eval_steps_per_second": 0.294, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 0.8665022850036621, + "eval_runtime": 131.6926, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1840 }, { "epoch": 7.07, "learning_rate": 0.0001059514687100894, - "loss": 0.0608, + "loss": 0.0263, "step": 1845 }, { "epoch": 7.07, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 0.960231363773346, - "eval_runtime": 74.015, - "eval_samples_per_second": 2.351, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 0.9958654642105103, + "eval_runtime": 131.7555, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1845 }, { "epoch": 7.09, "learning_rate": 0.00010569604086845466, - "loss": 0.1119, + "loss": 0.0401, "step": 1850 }, { "epoch": 7.09, "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.0473183393478394, - "eval_runtime": 75.0678, - "eval_samples_per_second": 2.318, - "eval_steps_per_second": 0.293, + "eval_loss": 1.1048107147216797, + "eval_runtime": 131.5719, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1850 }, { "epoch": 7.11, "learning_rate": 0.00010544061302681993, - "loss": 0.153, + "loss": 0.1228, "step": 1855 }, { "epoch": 7.11, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.066941499710083, - "eval_runtime": 73.2783, - "eval_samples_per_second": 2.375, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.3274551630020142, + "eval_runtime": 131.6336, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 1855 }, { "epoch": 7.13, "learning_rate": 0.00010518518518518518, - "loss": 0.0751, + "loss": 0.0653, "step": 1860 }, { "epoch": 7.13, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.1262329816818237, - "eval_runtime": 73.4273, - "eval_samples_per_second": 2.37, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.5582555532455444, + "eval_runtime": 131.7409, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1860 }, { "epoch": 7.15, "learning_rate": 0.00010492975734355046, - "loss": 0.0407, + "loss": 0.3934, "step": 1865 }, { "epoch": 7.15, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.2633436918258667, - "eval_runtime": 75.5481, - "eval_samples_per_second": 2.303, - "eval_steps_per_second": 0.291, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.6236568689346313, + "eval_runtime": 131.8161, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 1865 }, { "epoch": 7.16, "learning_rate": 0.00010467432950191571, - "loss": 0.0824, + "loss": 0.0691, "step": 1870 }, { "epoch": 7.16, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.3817542791366577, - "eval_runtime": 75.1612, - "eval_samples_per_second": 2.315, - "eval_steps_per_second": 0.293, + "eval_accuracy": 0.6781609195402298, + "eval_loss": 1.592093586921692, + "eval_runtime": 131.5384, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1870 }, { "epoch": 7.18, "learning_rate": 0.00010441890166028096, - "loss": 0.6322, + "loss": 0.2809, "step": 1875 }, { "epoch": 7.18, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.281891942024231, - "eval_runtime": 76.8685, - "eval_samples_per_second": 2.264, - "eval_steps_per_second": 0.286, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.475704312324524, + "eval_runtime": 136.9559, + "eval_samples_per_second": 1.27, + "eval_steps_per_second": 0.161, "step": 1875 }, { "epoch": 7.2, "learning_rate": 0.00010416347381864624, - "loss": 0.1277, + "loss": 0.0979, "step": 1880 }, { "epoch": 7.2, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.1478445529937744, - "eval_runtime": 72.8672, - "eval_samples_per_second": 2.388, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.3467644453048706, + "eval_runtime": 135.9935, + "eval_samples_per_second": 1.279, + "eval_steps_per_second": 0.162, "step": 1880 }, { "epoch": 7.22, "learning_rate": 0.00010390804597701149, - "loss": 0.1752, + "loss": 0.1615, "step": 1885 }, { "epoch": 7.22, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 1.2275943756103516, - "eval_runtime": 74.0318, - "eval_samples_per_second": 2.35, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.4109445810317993, + "eval_runtime": 133.1481, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 1885 }, { "epoch": 7.24, "learning_rate": 0.00010365261813537676, - "loss": 0.4279, + "loss": 0.3491, "step": 1890 }, { "epoch": 7.24, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.32535719871521, - "eval_runtime": 73.7203, - "eval_samples_per_second": 2.36, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.2932188510894775, + "eval_runtime": 132.6976, + "eval_samples_per_second": 1.311, + "eval_steps_per_second": 0.166, "step": 1890 }, { "epoch": 7.26, "learning_rate": 0.00010339719029374201, - "loss": 0.1829, + "loss": 0.0214, "step": 1895 }, { "epoch": 7.26, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.1822688579559326, - "eval_runtime": 73.8624, - "eval_samples_per_second": 2.356, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.277075171470642, + "eval_runtime": 136.2776, + "eval_samples_per_second": 1.277, + "eval_steps_per_second": 0.161, "step": 1895 }, { "epoch": 7.28, "learning_rate": 0.00010314176245210729, - "loss": 0.7235, + "loss": 0.2161, "step": 1900 }, { "epoch": 7.28, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.2413686513900757, - "eval_runtime": 73.5875, - "eval_samples_per_second": 2.365, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.2838515043258667, + "eval_runtime": 135.9251, + "eval_samples_per_second": 1.28, + "eval_steps_per_second": 0.162, "step": 1900 }, { "epoch": 7.3, "learning_rate": 0.00010288633461047254, - "loss": 0.1977, + "loss": 0.129, "step": 1905 }, { "epoch": 7.3, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.193420648574829, - "eval_runtime": 75.4148, - "eval_samples_per_second": 2.307, - "eval_steps_per_second": 0.292, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.3630799055099487, + "eval_runtime": 134.44, + "eval_samples_per_second": 1.294, + "eval_steps_per_second": 0.164, "step": 1905 }, { "epoch": 7.32, "learning_rate": 0.0001026309067688378, - "loss": 0.2008, + "loss": 0.174, "step": 1910 }, { "epoch": 7.32, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.1319164037704468, - "eval_runtime": 73.4679, - "eval_samples_per_second": 2.368, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.1274833679199219, + "eval_runtime": 132.7649, + "eval_samples_per_second": 1.311, + "eval_steps_per_second": 0.166, "step": 1910 }, { "epoch": 7.34, "learning_rate": 0.00010237547892720307, - "loss": 0.3229, + "loss": 0.0376, "step": 1915 }, { "epoch": 7.34, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.2466601133346558, - "eval_runtime": 74.0142, - "eval_samples_per_second": 2.351, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.2306206226348877, + "eval_runtime": 133.5924, + "eval_samples_per_second": 1.302, + "eval_steps_per_second": 0.165, "step": 1915 }, { "epoch": 7.36, "learning_rate": 0.00010212005108556833, - "loss": 0.2794, + "loss": 0.7968, "step": 1920 }, { "epoch": 7.36, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.3975579738616943, - "eval_runtime": 73.5307, - "eval_samples_per_second": 2.366, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.3550326824188232, + "eval_runtime": 133.8914, + "eval_samples_per_second": 1.3, + "eval_steps_per_second": 0.164, "step": 1920 }, { "epoch": 7.38, "learning_rate": 0.00010186462324393359, - "loss": 0.6104, + "loss": 0.1575, "step": 1925 }, { "epoch": 7.38, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.4495618343353271, - "eval_runtime": 75.3871, - "eval_samples_per_second": 2.308, - "eval_steps_per_second": 0.292, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.4989960193634033, + "eval_runtime": 132.9481, + "eval_samples_per_second": 1.309, + "eval_steps_per_second": 0.165, "step": 1925 }, { "epoch": 7.39, "learning_rate": 0.00010160919540229886, - "loss": 0.3981, + "loss": 0.1735, "step": 1930 }, { "epoch": 7.39, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 1.4250156879425049, - "eval_runtime": 75.0034, - "eval_samples_per_second": 2.32, - "eval_steps_per_second": 0.293, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.6739168167114258, + "eval_runtime": 133.8236, + "eval_samples_per_second": 1.3, + "eval_steps_per_second": 0.164, "step": 1930 }, { "epoch": 7.41, "learning_rate": 0.00010135376756066412, - "loss": 0.4133, + "loss": 0.4192, "step": 1935 }, { "epoch": 7.41, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.3902997970581055, - "eval_runtime": 74.0941, - "eval_samples_per_second": 2.348, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.6551724137931034, + "eval_loss": 1.6104061603546143, + "eval_runtime": 133.0367, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 1935 }, { "epoch": 7.43, "learning_rate": 0.00010109833971902938, - "loss": 0.2211, + "loss": 0.3096, "step": 1940 }, { "epoch": 7.43, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.2912284135818481, - "eval_runtime": 75.5221, - "eval_samples_per_second": 2.304, - "eval_steps_per_second": 0.291, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.521593689918518, + "eval_runtime": 132.8765, + "eval_samples_per_second": 1.309, + "eval_steps_per_second": 0.166, "step": 1940 }, { "epoch": 7.45, "learning_rate": 0.00010084291187739463, - "loss": 0.5127, + "loss": 0.3327, "step": 1945 }, { "epoch": 7.45, "eval_accuracy": 0.6781609195402298, - "eval_loss": 1.1819477081298828, - "eval_runtime": 76.0017, - "eval_samples_per_second": 2.289, - "eval_steps_per_second": 0.289, + "eval_loss": 1.6830250024795532, + "eval_runtime": 133.0706, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 1945 }, { "epoch": 7.47, "learning_rate": 0.00010058748403575991, - "loss": 0.4466, + "loss": 0.276, "step": 1950 }, { "epoch": 7.47, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.1182750463485718, - "eval_runtime": 73.33, - "eval_samples_per_second": 2.373, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.4198530912399292, + "eval_runtime": 133.3314, + "eval_samples_per_second": 1.305, + "eval_steps_per_second": 0.165, "step": 1950 }, { "epoch": 7.49, "learning_rate": 0.00010033205619412516, - "loss": 0.3296, + "loss": 0.0554, "step": 1955 }, { "epoch": 7.49, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.1901663541793823, - "eval_runtime": 74.5747, - "eval_samples_per_second": 2.333, - "eval_steps_per_second": 0.295, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.2515894174575806, + "eval_runtime": 132.6084, + "eval_samples_per_second": 1.312, + "eval_steps_per_second": 0.166, "step": 1955 }, { "epoch": 7.51, "learning_rate": 0.00010007662835249044, - "loss": 0.2157, + "loss": 0.2187, "step": 1960 }, { "epoch": 7.51, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.127490520477295, - "eval_runtime": 71.6666, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.191728115081787, + "eval_runtime": 133.6638, + "eval_samples_per_second": 1.302, + "eval_steps_per_second": 0.165, "step": 1960 }, { "epoch": 7.53, "learning_rate": 9.982120051085569e-05, - "loss": 0.1349, + "loss": 0.2532, "step": 1965 }, { "epoch": 7.53, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.1302645206451416, - "eval_runtime": 72.3204, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.1683300733566284, + "eval_runtime": 131.8102, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 1965 }, { "epoch": 7.55, "learning_rate": 9.956577266922095e-05, - "loss": 0.2552, + "loss": 0.2872, "step": 1970 }, { "epoch": 7.55, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.1957188844680786, - "eval_runtime": 71.7737, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.311906337738037, + "eval_runtime": 131.5674, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1970 }, { "epoch": 7.57, "learning_rate": 9.931034482758621e-05, - "loss": 0.0794, + "loss": 0.1039, "step": 1975 }, { "epoch": 7.57, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.1891249418258667, - "eval_runtime": 72.3104, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.475795030593872, + "eval_runtime": 131.7442, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1975 }, { "epoch": 7.59, "learning_rate": 9.905491698595148e-05, - "loss": 0.3412, + "loss": 0.3423, "step": 1980 }, { "epoch": 7.59, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.1822692155838013, - "eval_runtime": 71.8274, - "eval_samples_per_second": 2.422, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.3955832719802856, + "eval_runtime": 131.4992, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1980 }, { "epoch": 7.61, "learning_rate": 9.879948914431674e-05, - "loss": 0.066, + "loss": 0.0705, "step": 1985 }, { "epoch": 7.61, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.2583097219467163, - "eval_runtime": 72.2537, - "eval_samples_per_second": 2.408, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.6385270357131958, + "eval_runtime": 131.4457, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 1985 }, { "epoch": 7.62, "learning_rate": 9.8544061302682e-05, - "loss": 0.1778, + "loss": 0.0537, "step": 1990 }, { "epoch": 7.62, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.46968412399292, - "eval_runtime": 71.9868, - "eval_samples_per_second": 2.417, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 1.8518561124801636, + "eval_runtime": 131.5342, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 1990 }, { "epoch": 7.64, "learning_rate": 9.828863346104727e-05, - "loss": 0.2946, + "loss": 0.3629, "step": 1995 }, { "epoch": 7.64, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.4956047534942627, - "eval_runtime": 72.3605, - "eval_samples_per_second": 2.405, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.6724137931034483, + "eval_loss": 1.7972184419631958, + "eval_runtime": 131.7087, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 1995 }, { "epoch": 7.66, "learning_rate": 9.803320561941252e-05, - "loss": 0.5025, + "loss": 0.7452, "step": 2000 }, { "epoch": 7.66, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.2947438955307007, - "eval_runtime": 73.1739, - "eval_samples_per_second": 2.378, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 1.5261802673339844, + "eval_runtime": 131.6943, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2000 }, { "epoch": 7.68, "learning_rate": 9.777777777777778e-05, - "loss": 0.1465, + "loss": 0.4275, "step": 2005 }, { "epoch": 7.68, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.4249370098114014, - "eval_runtime": 72.9688, - "eval_samples_per_second": 2.385, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.3043127059936523, + "eval_runtime": 131.6861, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2005 }, { "epoch": 7.7, "learning_rate": 9.752234993614304e-05, - "loss": 0.4608, + "loss": 0.1486, "step": 2010 }, { "epoch": 7.7, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.4163832664489746, - "eval_runtime": 71.6967, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.1594352722167969, + "eval_runtime": 131.7597, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2010 }, { "epoch": 7.72, "learning_rate": 9.72669220945083e-05, - "loss": 0.0132, + "loss": 0.1045, "step": 2015 }, { "epoch": 7.72, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.4231857061386108, - "eval_runtime": 72.3309, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.1598647832870483, + "eval_runtime": 131.5067, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2015 }, { "epoch": 7.74, "learning_rate": 9.701149425287357e-05, - "loss": 0.3514, + "loss": 0.5649, "step": 2020 }, { "epoch": 7.74, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.3790318965911865, - "eval_runtime": 71.6757, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.1549986600875854, + "eval_runtime": 131.7512, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2020 }, { "epoch": 7.76, "learning_rate": 9.675606641123883e-05, - "loss": 0.1211, + "loss": 0.1445, "step": 2025 }, { "epoch": 7.76, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.3964256048202515, - "eval_runtime": 72.213, - "eval_samples_per_second": 2.41, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.3097654581069946, + "eval_runtime": 131.7447, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2025 }, { "epoch": 7.78, "learning_rate": 9.65006385696041e-05, - "loss": 0.2947, + "loss": 0.0164, "step": 2030 }, { "epoch": 7.78, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.3286200761795044, - "eval_runtime": 71.7334, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.50651216506958, + "eval_runtime": 131.8848, + "eval_samples_per_second": 1.319, + "eval_steps_per_second": 0.167, "step": 2030 }, { "epoch": 7.8, "learning_rate": 9.624521072796935e-05, - "loss": 0.49, + "loss": 0.1815, "step": 2035 }, { "epoch": 7.8, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.0810390710830688, - "eval_runtime": 72.3336, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.5009325742721558, + "eval_runtime": 135.4923, + "eval_samples_per_second": 1.284, + "eval_steps_per_second": 0.162, "step": 2035 }, { "epoch": 7.82, "learning_rate": 9.598978288633461e-05, - "loss": 0.165, + "loss": 0.8265, "step": 2040 }, { "epoch": 7.82, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.024723768234253, - "eval_runtime": 71.7199, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.1351158618927002, + "eval_runtime": 131.6271, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2040 }, { "epoch": 7.84, "learning_rate": 9.573435504469987e-05, - "loss": 0.138, + "loss": 0.3273, "step": 2045 }, { "epoch": 7.84, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.224605679512024, - "eval_runtime": 72.3204, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.1325792074203491, + "eval_runtime": 131.7848, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2045 }, { "epoch": 7.85, "learning_rate": 9.547892720306514e-05, - "loss": 0.2485, + "loss": 0.0314, "step": 2050 }, { "epoch": 7.85, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.214393138885498, - "eval_runtime": 71.5732, - "eval_samples_per_second": 2.431, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.173128366470337, + "eval_runtime": 135.703, + "eval_samples_per_second": 1.282, + "eval_steps_per_second": 0.162, "step": 2050 }, { "epoch": 7.87, "learning_rate": 9.52234993614304e-05, - "loss": 0.2188, + "loss": 0.1498, "step": 2055 }, { "epoch": 7.87, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.2269409894943237, - "eval_runtime": 73.9874, - "eval_samples_per_second": 2.352, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.2958407402038574, + "eval_runtime": 132.023, + "eval_samples_per_second": 1.318, + "eval_steps_per_second": 0.167, "step": 2055 }, { "epoch": 7.89, "learning_rate": 9.496807151979566e-05, - "loss": 0.0995, + "loss": 0.0174, "step": 2060 }, { "epoch": 7.89, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.3358938694000244, - "eval_runtime": 73.7608, - "eval_samples_per_second": 2.359, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.4961018562316895, + "eval_runtime": 131.6124, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2060 }, { "epoch": 7.91, "learning_rate": 9.471264367816093e-05, - "loss": 0.4366, + "loss": 0.142, "step": 2065 }, { "epoch": 7.91, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.3700920343399048, - "eval_runtime": 72.2954, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.4059162139892578, + "eval_runtime": 134.8481, + "eval_samples_per_second": 1.29, + "eval_steps_per_second": 0.163, "step": 2065 }, { "epoch": 7.93, "learning_rate": 9.445721583652618e-05, - "loss": 0.2468, + "loss": 0.3848, "step": 2070 }, { "epoch": 7.93, - "eval_accuracy": 0.6724137931034483, - "eval_loss": 1.5623064041137695, - "eval_runtime": 71.8518, - "eval_samples_per_second": 2.422, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.1554484367370605, + "eval_runtime": 134.5446, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.164, "step": 2070 }, { "epoch": 7.95, "learning_rate": 9.420178799489144e-05, - "loss": 0.5595, + "loss": 0.1568, "step": 2075 }, { "epoch": 7.95, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.3248261213302612, - "eval_runtime": 72.2402, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.1746104955673218, + "eval_runtime": 131.6333, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2075 }, { "epoch": 7.97, "learning_rate": 9.39463601532567e-05, - "loss": 0.1288, + "loss": 0.0018, "step": 2080 }, { "epoch": 7.97, "eval_accuracy": 0.735632183908046, - "eval_loss": 1.1915298700332642, - "eval_runtime": 73.574, - "eval_samples_per_second": 2.365, - "eval_steps_per_second": 0.299, + "eval_loss": 1.2762763500213623, + "eval_runtime": 131.5438, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2080 }, { "epoch": 7.99, "learning_rate": 9.369093231162197e-05, - "loss": 0.6715, + "loss": 0.209, "step": 2085 }, { "epoch": 7.99, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.2690212726593018, - "eval_runtime": 72.3729, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.409183144569397, + "eval_runtime": 131.4653, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 2085 }, { "epoch": 8.01, "learning_rate": 9.343550446998723e-05, - "loss": 0.2428, + "loss": 0.1243, "step": 2090 }, { "epoch": 8.01, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.2000457048416138, - "eval_runtime": 73.1206, - "eval_samples_per_second": 2.38, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.392285943031311, + "eval_runtime": 131.5546, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2090 }, { "epoch": 8.03, "learning_rate": 9.318007662835249e-05, - "loss": 0.1369, + "loss": 0.1023, "step": 2095 }, { "epoch": 8.03, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 1.337141752243042, - "eval_runtime": 72.28, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.2922409772872925, + "eval_runtime": 131.7665, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2095 }, { "epoch": 8.05, "learning_rate": 9.292464878671776e-05, - "loss": 0.0289, + "loss": 0.0129, "step": 2100 }, { "epoch": 8.05, - "eval_accuracy": 0.6551724137931034, - "eval_loss": 1.6684503555297852, - "eval_runtime": 71.8209, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.3522768020629883, + "eval_runtime": 131.5422, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2100 }, { "epoch": 8.07, "learning_rate": 9.266922094508302e-05, - "loss": 0.2577, + "loss": 0.0224, "step": 2105 }, { "epoch": 8.07, - "eval_accuracy": 0.6494252873563219, - "eval_loss": 1.607564091682434, - "eval_runtime": 74.3746, - "eval_samples_per_second": 2.34, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.5482807159423828, + "eval_runtime": 131.7205, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2105 }, { "epoch": 8.08, "learning_rate": 9.241379310344827e-05, - "loss": 0.1756, + "loss": 0.2608, "step": 2110 }, { "epoch": 8.08, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.4137848615646362, - "eval_runtime": 71.6266, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.3694931268692017, + "eval_runtime": 131.5827, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2110 }, { "epoch": 8.1, "learning_rate": 9.215836526181353e-05, - "loss": 0.1628, + "loss": 0.0016, "step": 2115 }, { "epoch": 8.1, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.380110502243042, - "eval_runtime": 74.2276, - "eval_samples_per_second": 2.344, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.4523777961730957, + "eval_runtime": 131.7817, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2115 }, { "epoch": 8.12, "learning_rate": 9.19029374201788e-05, - "loss": 0.0913, + "loss": 0.0384, "step": 2120 }, { "epoch": 8.12, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.3470419645309448, - "eval_runtime": 73.3072, - "eval_samples_per_second": 2.374, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.5808136463165283, + "eval_runtime": 134.3004, + "eval_samples_per_second": 1.296, + "eval_steps_per_second": 0.164, "step": 2120 }, { "epoch": 8.14, "learning_rate": 9.164750957854406e-05, - "loss": 0.0892, + "loss": 0.1079, "step": 2125 }, { "epoch": 8.14, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.4106884002685547, - "eval_runtime": 73.9475, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.8028441667556763, + "eval_runtime": 131.7133, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2125 }, { "epoch": 8.16, "learning_rate": 9.139208173690932e-05, - "loss": 0.3874, + "loss": 0.314, "step": 2130 }, { "epoch": 8.16, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.3398877382278442, - "eval_runtime": 71.8799, - "eval_samples_per_second": 2.421, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.6954022988505747, + "eval_loss": 1.6579396724700928, + "eval_runtime": 131.38, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 2130 }, { "epoch": 8.18, "learning_rate": 9.113665389527459e-05, - "loss": 0.1405, + "loss": 0.0119, "step": 2135 }, { "epoch": 8.18, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.3665359020233154, - "eval_runtime": 74.1921, - "eval_samples_per_second": 2.345, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.4466235637664795, + "eval_runtime": 131.4616, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 2135 }, { "epoch": 8.2, "learning_rate": 9.088122605363985e-05, - "loss": 0.0921, + "loss": 0.0145, "step": 2140 }, { "epoch": 8.2, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.4718743562698364, - "eval_runtime": 71.6672, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.4529680013656616, + "eval_runtime": 131.7249, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2140 }, { "epoch": 8.22, "learning_rate": 9.062579821200511e-05, - "loss": 0.1429, + "loss": 0.2998, "step": 2145 }, { "epoch": 8.22, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.484999179840088, - "eval_runtime": 72.347, - "eval_samples_per_second": 2.405, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.4054944515228271, + "eval_runtime": 131.6772, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2145 }, { "epoch": 8.24, "learning_rate": 9.037037037037038e-05, - "loss": 0.1498, + "loss": 0.007, "step": 2150 }, { "epoch": 8.24, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.41448974609375, - "eval_runtime": 73.4407, - "eval_samples_per_second": 2.369, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.3814021348953247, + "eval_runtime": 131.7918, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2150 }, { "epoch": 8.26, "learning_rate": 9.011494252873564e-05, - "loss": 0.004, + "loss": 0.0243, "step": 2155 }, { "epoch": 8.26, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.3708915710449219, - "eval_runtime": 72.3246, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.4890694618225098, + "eval_runtime": 133.8246, + "eval_samples_per_second": 1.3, + "eval_steps_per_second": 0.164, "step": 2155 }, { "epoch": 8.28, "learning_rate": 8.98595146871009e-05, - "loss": 0.0364, + "loss": 0.0201, "step": 2160 }, { "epoch": 8.28, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.3031058311462402, - "eval_runtime": 71.8044, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.6387782096862793, + "eval_runtime": 135.8171, + "eval_samples_per_second": 1.281, + "eval_steps_per_second": 0.162, "step": 2160 }, { "epoch": 8.3, "learning_rate": 8.960408684546617e-05, - "loss": 0.0544, + "loss": 0.0394, "step": 2165 }, { "epoch": 8.3, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.41126549243927, - "eval_runtime": 74.1212, - "eval_samples_per_second": 2.348, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.6442043781280518, + "eval_runtime": 136.9375, + "eval_samples_per_second": 1.271, + "eval_steps_per_second": 0.161, "step": 2165 }, { "epoch": 8.31, "learning_rate": 8.934865900383143e-05, - "loss": 0.0952, + "loss": 0.049, "step": 2170 }, { "epoch": 8.31, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.532741904258728, - "eval_runtime": 71.6535, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.4437042474746704, + "eval_runtime": 136.8983, + "eval_samples_per_second": 1.271, + "eval_steps_per_second": 0.161, "step": 2170 }, { "epoch": 8.33, "learning_rate": 8.90932311621967e-05, - "loss": 0.0006, + "loss": 0.0045, "step": 2175 }, { "epoch": 8.33, - "eval_accuracy": 0.6724137931034483, - "eval_loss": 1.6938968896865845, - "eval_runtime": 72.2802, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.6671456098556519, + "eval_runtime": 133.0309, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 2175 }, { "epoch": 8.35, "learning_rate": 8.883780332056194e-05, - "loss": 0.2597, + "loss": 0.0227, "step": 2180 }, { "epoch": 8.35, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.5596327781677246, - "eval_runtime": 71.7186, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.811476230621338, + "eval_runtime": 133.9084, + "eval_samples_per_second": 1.299, + "eval_steps_per_second": 0.164, "step": 2180 }, { "epoch": 8.37, "learning_rate": 8.85823754789272e-05, - "loss": 0.2526, + "loss": 0.0484, "step": 2185 }, { "epoch": 8.37, "eval_accuracy": 0.735632183908046, - "eval_loss": 1.5019148588180542, - "eval_runtime": 73.0896, - "eval_samples_per_second": 2.381, - "eval_steps_per_second": 0.301, + "eval_loss": 1.5887384414672852, + "eval_runtime": 133.5744, + "eval_samples_per_second": 1.303, + "eval_steps_per_second": 0.165, "step": 2185 }, { "epoch": 8.39, "learning_rate": 8.832694763729247e-05, - "loss": 0.102, + "loss": 0.2142, "step": 2190 }, { "epoch": 8.39, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.612549901008606, - "eval_runtime": 71.7757, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.5692673921585083, + "eval_runtime": 134.0447, + "eval_samples_per_second": 1.298, + "eval_steps_per_second": 0.164, "step": 2190 }, { "epoch": 8.41, "learning_rate": 8.807151979565773e-05, - "loss": 0.02, + "loss": 0.038, "step": 2195 }, { "epoch": 8.41, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 1.7707464694976807, - "eval_runtime": 72.3245, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.6501433849334717, + "eval_runtime": 133.022, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 2195 }, { "epoch": 8.43, "learning_rate": 8.7816091954023e-05, - "loss": 0.0062, + "loss": 0.1527, "step": 2200 }, { "epoch": 8.43, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.5734792947769165, - "eval_runtime": 73.3522, - "eval_samples_per_second": 2.372, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.6354538202285767, + "eval_runtime": 135.8146, + "eval_samples_per_second": 1.281, + "eval_steps_per_second": 0.162, "step": 2200 }, { "epoch": 8.45, "learning_rate": 8.756066411238826e-05, - "loss": 0.0429, + "loss": 0.0143, "step": 2205 }, { "epoch": 8.45, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.4174237251281738, - "eval_runtime": 72.276, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.524357795715332, + "eval_runtime": 134.4439, + "eval_samples_per_second": 1.294, + "eval_steps_per_second": 0.164, "step": 2205 }, { "epoch": 8.47, "learning_rate": 8.730523627075352e-05, - "loss": 0.272, + "loss": 0.019, "step": 2210 }, { "epoch": 8.47, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.4774861335754395, - "eval_runtime": 72.0492, - "eval_samples_per_second": 2.415, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5004924535751343, + "eval_runtime": 136.0359, + "eval_samples_per_second": 1.279, + "eval_steps_per_second": 0.162, "step": 2210 }, { "epoch": 8.49, "learning_rate": 8.704980842911877e-05, - "loss": 0.1014, + "loss": 0.007, "step": 2215 }, { "epoch": 8.49, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.556422472000122, - "eval_runtime": 73.9875, - "eval_samples_per_second": 2.352, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.5427993535995483, + "eval_runtime": 133.7594, + "eval_samples_per_second": 1.301, + "eval_steps_per_second": 0.164, "step": 2215 }, { "epoch": 8.51, "learning_rate": 8.679438058748404e-05, - "loss": 0.1191, + "loss": 0.2862, "step": 2220 }, { "epoch": 8.51, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.6515696048736572, - "eval_runtime": 71.7867, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.5085382461547852, + "eval_runtime": 133.1685, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 2220 }, { "epoch": 8.52, "learning_rate": 8.65389527458493e-05, - "loss": 0.1466, + "loss": 0.1645, "step": 2225 }, { "epoch": 8.52, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.5438907146453857, - "eval_runtime": 72.413, - "eval_samples_per_second": 2.403, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.494451642036438, + "eval_runtime": 135.845, + "eval_samples_per_second": 1.281, + "eval_steps_per_second": 0.162, "step": 2225 }, { "epoch": 8.54, "learning_rate": 8.628352490421456e-05, - "loss": 0.033, + "loss": 0.0377, "step": 2230 }, { "epoch": 8.54, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.6586909294128418, - "eval_runtime": 71.5867, - "eval_samples_per_second": 2.431, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.435677409172058, + "eval_runtime": 137.2025, + "eval_samples_per_second": 1.268, + "eval_steps_per_second": 0.16, "step": 2230 }, { "epoch": 8.56, "learning_rate": 8.602809706257983e-05, - "loss": 0.4028, + "loss": 0.0011, "step": 2235 }, { "epoch": 8.56, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.5925533771514893, - "eval_runtime": 74.2608, - "eval_samples_per_second": 2.343, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.50213623046875, + "eval_runtime": 134.1377, + "eval_samples_per_second": 1.297, + "eval_steps_per_second": 0.164, "step": 2235 }, { "epoch": 8.58, "learning_rate": 8.577266922094509e-05, - "loss": 0.29, + "loss": 0.0195, "step": 2240 }, { "epoch": 8.58, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.4162274599075317, - "eval_runtime": 71.6928, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.4815926551818848, + "eval_runtime": 133.7184, + "eval_samples_per_second": 1.301, + "eval_steps_per_second": 0.165, "step": 2240 }, { "epoch": 8.6, "learning_rate": 8.551724137931035e-05, - "loss": 0.082, + "loss": 0.0038, "step": 2245 }, { "epoch": 8.6, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.4092352390289307, - "eval_runtime": 72.1601, - "eval_samples_per_second": 2.411, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.5892810821533203, + "eval_runtime": 131.8657, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2245 }, { "epoch": 8.62, "learning_rate": 8.52618135376756e-05, - "loss": 0.0273, + "loss": 0.0028, "step": 2250 }, { "epoch": 8.62, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.4052162170410156, - "eval_runtime": 71.6429, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6429439783096313, + "eval_runtime": 131.6556, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2250 }, { "epoch": 8.64, "learning_rate": 8.500638569604087e-05, - "loss": 0.2974, + "loss": 0.2262, "step": 2255 }, { "epoch": 8.64, - "eval_accuracy": 0.7701149425287356, - "eval_loss": 1.4225624799728394, - "eval_runtime": 72.3814, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6773968935012817, + "eval_runtime": 135.4188, + "eval_samples_per_second": 1.285, + "eval_steps_per_second": 0.162, "step": 2255 }, { "epoch": 8.66, "learning_rate": 8.475095785440613e-05, - "loss": 0.7249, + "loss": 0.4226, "step": 2260 }, { "epoch": 8.66, - "eval_accuracy": 0.7586206896551724, - "eval_loss": 1.3933783769607544, - "eval_runtime": 71.7734, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.4581010341644287, + "eval_runtime": 131.687, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2260 }, { "epoch": 8.68, "learning_rate": 8.449553001277139e-05, - "loss": 0.1874, + "loss": 0.0025, "step": 2265 }, { "epoch": 8.68, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.3755295276641846, - "eval_runtime": 73.9422, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.3616026639938354, + "eval_runtime": 131.5724, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2265 }, { "epoch": 8.7, "learning_rate": 8.424010217113666e-05, - "loss": 0.0365, + "loss": 0.0549, "step": 2270 }, { "epoch": 8.7, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.3688724040985107, - "eval_runtime": 71.7961, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.3944165706634521, + "eval_runtime": 135.3983, + "eval_samples_per_second": 1.285, + "eval_steps_per_second": 0.162, "step": 2270 }, { "epoch": 8.72, "learning_rate": 8.398467432950192e-05, - "loss": 0.1775, + "loss": 0.2475, "step": 2275 }, { "epoch": 8.72, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.3662774562835693, - "eval_runtime": 72.2725, - "eval_samples_per_second": 2.408, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.4397807121276855, + "eval_runtime": 131.6047, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2275 }, { "epoch": 8.74, "learning_rate": 8.372924648786718e-05, - "loss": 0.0974, + "loss": 0.0432, "step": 2280 }, { "epoch": 8.74, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.3852037191390991, - "eval_runtime": 71.6891, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.4615994691848755, + "eval_runtime": 134.6188, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.163, "step": 2280 }, { "epoch": 8.75, "learning_rate": 8.347381864623243e-05, - "loss": 0.1109, + "loss": 0.0076, "step": 2285 }, { "epoch": 8.75, - "eval_accuracy": 0.6724137931034483, - "eval_loss": 1.4648981094360352, - "eval_runtime": 72.2936, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.4265929460525513, + "eval_runtime": 131.5081, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2285 }, { "epoch": 8.77, "learning_rate": 8.32183908045977e-05, - "loss": 0.2778, + "loss": 0.1605, "step": 2290 }, { "epoch": 8.77, - "eval_accuracy": 0.6781609195402298, - "eval_loss": 1.435415267944336, - "eval_runtime": 71.7606, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.3677057027816772, + "eval_runtime": 134.1102, + "eval_samples_per_second": 1.297, + "eval_steps_per_second": 0.164, "step": 2290 }, { "epoch": 8.79, "learning_rate": 8.296296296296296e-05, - "loss": 0.0384, + "loss": 0.0192, "step": 2295 }, { "epoch": 8.79, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.3910033702850342, - "eval_runtime": 72.3796, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.3508528470993042, + "eval_runtime": 131.6443, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2295 }, { "epoch": 8.81, "learning_rate": 8.270753512132822e-05, - "loss": 0.1499, + "loss": 0.0123, "step": 2300 }, { "epoch": 8.81, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.4266196489334106, - "eval_runtime": 71.6688, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.3890247344970703, + "eval_runtime": 131.7602, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2300 }, { "epoch": 8.83, "learning_rate": 8.245210727969349e-05, - "loss": 0.0703, + "loss": 0.0292, "step": 2305 }, { "epoch": 8.83, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.3794021606445312, - "eval_runtime": 73.9299, - "eval_samples_per_second": 2.354, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.5051121711730957, + "eval_runtime": 131.7302, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2305 }, { "epoch": 8.85, "learning_rate": 8.219667943805875e-05, - "loss": 0.2238, + "loss": 0.1464, "step": 2310 }, { "epoch": 8.85, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.4704707860946655, - "eval_runtime": 71.631, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6223372220993042, + "eval_runtime": 131.7641, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2310 }, { "epoch": 8.87, "learning_rate": 8.194125159642401e-05, - "loss": 0.2418, + "loss": 0.3433, "step": 2315 }, { "epoch": 8.87, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.4477012157440186, - "eval_runtime": 73.9608, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.6563609838485718, + "eval_runtime": 131.5136, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2315 }, { "epoch": 8.89, "learning_rate": 8.168582375478928e-05, - "loss": 0.0854, + "loss": 0.1714, "step": 2320 }, { "epoch": 8.89, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.453401803970337, - "eval_runtime": 71.72, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.622618556022644, + "eval_runtime": 131.4805, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2320 }, { "epoch": 8.91, "learning_rate": 8.143039591315454e-05, - "loss": 0.1613, + "loss": 0.0059, "step": 2325 }, { "epoch": 8.91, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.5824450254440308, - "eval_runtime": 72.4535, - "eval_samples_per_second": 2.402, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.819935917854309, + "eval_runtime": 134.5852, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.163, "step": 2325 }, { "epoch": 8.93, "learning_rate": 8.11749680715198e-05, - "loss": 0.0599, + "loss": 0.0172, "step": 2330 }, { "epoch": 8.93, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.5845459699630737, - "eval_runtime": 73.534, - "eval_samples_per_second": 2.366, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 2.0601022243499756, + "eval_runtime": 131.6123, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2330 }, { "epoch": 8.95, "learning_rate": 8.091954022988507e-05, - "loss": 0.2216, + "loss": 0.9032, "step": 2335 }, { "epoch": 8.95, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.5560506582260132, - "eval_runtime": 72.8271, - "eval_samples_per_second": 2.389, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.6896551724137931, + "eval_loss": 1.911239743232727, + "eval_runtime": 132.3205, + "eval_samples_per_second": 1.315, + "eval_steps_per_second": 0.166, "step": 2335 }, { "epoch": 8.97, "learning_rate": 8.066411238825033e-05, - "loss": 0.023, + "loss": 0.2749, "step": 2340 }, { "epoch": 8.97, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.5869792699813843, - "eval_runtime": 71.7467, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.5849545001983643, + "eval_runtime": 131.5581, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2340 }, { "epoch": 8.98, "learning_rate": 8.04086845466156e-05, - "loss": 0.0166, + "loss": 0.0033, "step": 2345 }, { "epoch": 8.98, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.6614141464233398, - "eval_runtime": 72.2937, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.447026014328003, + "eval_runtime": 131.5482, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2345 }, { "epoch": 9.0, "learning_rate": 8.015325670498086e-05, - "loss": 0.1114, + "loss": 0.1976, "step": 2350 }, { "epoch": 9.0, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.5962401628494263, - "eval_runtime": 71.8444, - "eval_samples_per_second": 2.422, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5511088371276855, + "eval_runtime": 131.6876, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2350 }, { "epoch": 9.02, "learning_rate": 7.989782886334612e-05, - "loss": 0.0254, + "loss": 0.0023, "step": 2355 }, { "epoch": 9.02, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.578687310218811, - "eval_runtime": 73.975, - "eval_samples_per_second": 2.352, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6997263431549072, + "eval_runtime": 131.6445, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2355 }, { "epoch": 9.04, "learning_rate": 7.964240102171137e-05, - "loss": 0.0807, + "loss": 0.0125, "step": 2360 }, { "epoch": 9.04, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.6112695932388306, - "eval_runtime": 73.1873, - "eval_samples_per_second": 2.377, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.8850693702697754, + "eval_runtime": 131.5131, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2360 }, { "epoch": 9.06, "learning_rate": 7.938697318007663e-05, - "loss": 0.0151, + "loss": 0.0023, "step": 2365 }, { "epoch": 9.06, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.6175737380981445, - "eval_runtime": 72.2137, - "eval_samples_per_second": 2.41, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.6839080459770115, + "eval_loss": 2.049333333969116, + "eval_runtime": 134.4132, + "eval_samples_per_second": 1.295, + "eval_steps_per_second": 0.164, "step": 2365 }, { "epoch": 9.08, "learning_rate": 7.91315453384419e-05, - "loss": 0.0234, + "loss": 0.1226, "step": 2370 }, { "epoch": 9.08, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.6412409543991089, - "eval_runtime": 73.4407, - "eval_samples_per_second": 2.369, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6171669960021973, + "eval_runtime": 132.176, + "eval_samples_per_second": 1.316, + "eval_steps_per_second": 0.166, "step": 2370 }, { "epoch": 9.1, "learning_rate": 7.887611749680716e-05, - "loss": 0.0901, + "loss": 0.0011, "step": 2375 }, { "epoch": 9.1, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.6331058740615845, - "eval_runtime": 72.4003, - "eval_samples_per_second": 2.403, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6242951154708862, + "eval_runtime": 131.7057, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2375 }, { "epoch": 9.12, "learning_rate": 7.862068965517242e-05, - "loss": 0.0032, + "loss": 0.001, "step": 2380 }, { "epoch": 9.12, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.7730814218521118, - "eval_runtime": 71.8133, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.707185983657837, + "eval_runtime": 131.6149, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2380 }, { "epoch": 9.14, "learning_rate": 7.836526181353769e-05, - "loss": 0.0657, + "loss": 0.0176, "step": 2385 }, { "epoch": 9.14, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.7673834562301636, - "eval_runtime": 72.4937, - "eval_samples_per_second": 2.4, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7006449699401855, + "eval_runtime": 131.5584, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2385 }, { "epoch": 9.16, "learning_rate": 7.810983397190295e-05, - "loss": 0.0035, + "loss": 0.0291, "step": 2390 }, { "epoch": 9.16, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.7309309244155884, - "eval_runtime": 71.574, - "eval_samples_per_second": 2.431, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6260244846343994, + "eval_runtime": 131.6961, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2390 }, { "epoch": 9.18, "learning_rate": 7.78544061302682e-05, - "loss": 0.019, + "loss": 0.0003, "step": 2395 }, { "epoch": 9.18, "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.6792755126953125, - "eval_runtime": 74.3875, - "eval_samples_per_second": 2.339, - "eval_steps_per_second": 0.296, + "eval_loss": 1.6396585702896118, + "eval_runtime": 135.5626, + "eval_samples_per_second": 1.284, + "eval_steps_per_second": 0.162, "step": 2395 }, { "epoch": 9.2, "learning_rate": 7.759897828863346e-05, - "loss": 0.0038, + "loss": 0.0002, "step": 2400 }, { "epoch": 9.2, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.7092453241348267, - "eval_runtime": 71.7278, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.6500298976898193, + "eval_runtime": 131.8182, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2400 }, { "epoch": 9.21, "learning_rate": 7.734355044699873e-05, - "loss": 0.0061, + "loss": 0.0002, "step": 2405 }, { "epoch": 9.21, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.726812720298767, - "eval_runtime": 72.2916, - "eval_samples_per_second": 2.407, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.654776692390442, + "eval_runtime": 131.6817, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2405 }, { "epoch": 9.23, "learning_rate": 7.708812260536399e-05, - "loss": 0.0077, + "loss": 0.0002, "step": 2410 }, { "epoch": 9.23, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.6786487102508545, - "eval_runtime": 71.8439, - "eval_samples_per_second": 2.422, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.6623882055282593, + "eval_runtime": 131.6808, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2410 }, { "epoch": 9.25, "learning_rate": 7.683269476372925e-05, - "loss": 0.1325, + "loss": 0.0014, "step": 2415 }, { "epoch": 9.25, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7274757623672485, - "eval_runtime": 72.374, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.663640022277832, + "eval_runtime": 131.723, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2415 }, { "epoch": 9.27, "learning_rate": 7.657726692209452e-05, - "loss": 0.013, + "loss": 0.0007, "step": 2420 }, { "epoch": 9.27, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.8993436098098755, - "eval_runtime": 73.5113, - "eval_samples_per_second": 2.367, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.667425513267517, + "eval_runtime": 131.8231, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2420 }, { "epoch": 9.29, "learning_rate": 7.632183908045978e-05, - "loss": 0.2623, + "loss": 0.171, "step": 2425 }, { "epoch": 9.29, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.9386109113693237, - "eval_runtime": 74.2008, - "eval_samples_per_second": 2.345, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6711957454681396, + "eval_runtime": 135.4213, + "eval_samples_per_second": 1.285, + "eval_steps_per_second": 0.162, "step": 2425 }, { "epoch": 9.31, "learning_rate": 7.606641123882503e-05, - "loss": 0.1043, + "loss": 0.0019, "step": 2430 }, { "epoch": 9.31, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7973520755767822, - "eval_runtime": 71.6127, - "eval_samples_per_second": 2.43, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6811691522598267, + "eval_runtime": 131.6252, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2430 }, { "epoch": 9.33, "learning_rate": 7.581098339719029e-05, - "loss": 0.2192, + "loss": 0.1125, "step": 2435 }, { "epoch": 9.33, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.622019648551941, - "eval_runtime": 73.9624, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.6472193002700806, + "eval_runtime": 131.6396, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2435 }, { "epoch": 9.35, "learning_rate": 7.555555555555556e-05, - "loss": 0.0019, + "loss": 0.0084, "step": 2440 }, { "epoch": 9.35, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.6389633417129517, - "eval_runtime": 71.6133, - "eval_samples_per_second": 2.43, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.684321403503418, + "eval_runtime": 134.6915, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 2440 }, { "epoch": 9.37, "learning_rate": 7.530012771392082e-05, - "loss": 0.0072, + "loss": 0.0039, "step": 2445 }, { "epoch": 9.37, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.765933632850647, - "eval_runtime": 74.1608, - "eval_samples_per_second": 2.346, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7159420251846313, + "eval_runtime": 131.7929, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2445 }, { "epoch": 9.39, "learning_rate": 7.504469987228608e-05, - "loss": 0.1955, + "loss": 0.1233, "step": 2450 }, { "epoch": 9.39, - "eval_accuracy": 0.6724137931034483, - "eval_loss": 1.865466594696045, - "eval_runtime": 71.8, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.693913221359253, + "eval_runtime": 131.6221, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2450 }, { "epoch": 9.41, "learning_rate": 7.478927203065135e-05, - "loss": 0.0232, + "loss": 0.0015, "step": 2455 }, { "epoch": 9.41, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.654495120048523, - "eval_runtime": 74.2804, - "eval_samples_per_second": 2.342, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6599289178848267, + "eval_runtime": 131.5324, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2455 }, { "epoch": 9.43, "learning_rate": 7.453384418901661e-05, - "loss": 0.0479, + "loss": 0.0031, "step": 2460 }, { "epoch": 9.43, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.589800238609314, - "eval_runtime": 73.7881, - "eval_samples_per_second": 2.358, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6476054191589355, + "eval_runtime": 131.5993, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2460 }, { "epoch": 9.44, "learning_rate": 7.427841634738186e-05, - "loss": 0.0284, + "loss": 0.0059, "step": 2465 }, { "epoch": 9.44, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.6431066989898682, - "eval_runtime": 74.4192, - "eval_samples_per_second": 2.338, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7214477062225342, + "eval_runtime": 134.7166, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 2465 }, { "epoch": 9.46, "learning_rate": 7.402298850574712e-05, - "loss": 0.2414, + "loss": 0.1826, "step": 2470 }, { "epoch": 9.46, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.6797583103179932, - "eval_runtime": 73.2814, - "eval_samples_per_second": 2.374, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.7813475131988525, + "eval_runtime": 131.5182, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2470 }, { "epoch": 9.48, "learning_rate": 7.376756066411239e-05, - "loss": 0.037, + "loss": 0.1108, "step": 2475 }, { "epoch": 9.48, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.7022534608840942, - "eval_runtime": 76.0683, - "eval_samples_per_second": 2.287, - "eval_steps_per_second": 0.289, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.7746046781539917, + "eval_runtime": 131.497, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2475 }, { "epoch": 9.5, "learning_rate": 7.351213282247765e-05, - "loss": 0.2024, + "loss": 0.0244, "step": 2480 }, { "epoch": 9.5, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.7409052848815918, - "eval_runtime": 75.5479, - "eval_samples_per_second": 2.303, - "eval_steps_per_second": 0.291, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7000945806503296, + "eval_runtime": 134.8163, + "eval_samples_per_second": 1.291, + "eval_steps_per_second": 0.163, "step": 2480 }, { "epoch": 9.52, "learning_rate": 7.325670498084291e-05, - "loss": 0.0136, + "loss": 0.0004, "step": 2485 }, { "epoch": 9.52, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.5980173349380493, - "eval_runtime": 74.9877, - "eval_samples_per_second": 2.32, - "eval_steps_per_second": 0.293, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.7654314041137695, + "eval_runtime": 131.5898, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2485 }, { "epoch": 9.54, "learning_rate": 7.300127713920818e-05, - "loss": 0.0064, + "loss": 0.0017, "step": 2490 }, { "epoch": 9.54, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.572400450706482, - "eval_runtime": 74.8005, - "eval_samples_per_second": 2.326, - "eval_steps_per_second": 0.294, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.8154675960540771, + "eval_runtime": 134.6257, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 2490 }, { "epoch": 9.56, "learning_rate": 7.274584929757344e-05, - "loss": 0.0092, + "loss": 0.0048, "step": 2495 }, { "epoch": 9.56, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.5245319604873657, - "eval_runtime": 73.814, - "eval_samples_per_second": 2.357, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.7586040496826172, + "eval_runtime": 131.6022, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2495 }, { "epoch": 9.58, "learning_rate": 7.24904214559387e-05, - "loss": 0.0231, + "loss": 0.0001, "step": 2500 }, { "epoch": 9.58, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.4510209560394287, - "eval_runtime": 73.161, - "eval_samples_per_second": 2.378, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7478693723678589, + "eval_runtime": 131.5333, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2500 }, { "epoch": 9.6, "learning_rate": 7.223499361430395e-05, - "loss": 0.0152, + "loss": 0.0007, "step": 2505 }, { "epoch": 9.6, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.5502389669418335, - "eval_runtime": 76.6679, - "eval_samples_per_second": 2.27, - "eval_steps_per_second": 0.287, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.74885892868042, + "eval_runtime": 131.5823, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2505 }, { "epoch": 9.62, "learning_rate": 7.197956577266922e-05, - "loss": 0.1021, + "loss": 0.1523, "step": 2510 }, { "epoch": 9.62, "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.6658586263656616, - "eval_runtime": 75.2815, - "eval_samples_per_second": 2.311, - "eval_steps_per_second": 0.292, + "eval_loss": 1.7376835346221924, + "eval_runtime": 131.6011, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2510 }, { "epoch": 9.64, "learning_rate": 7.172413793103448e-05, - "loss": 0.0043, + "loss": 0.0032, "step": 2515 }, { "epoch": 9.64, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.735673189163208, - "eval_runtime": 73.8942, - "eval_samples_per_second": 2.355, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.6583445072174072, + "eval_runtime": 131.6093, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2515 }, { "epoch": 9.66, "learning_rate": 7.146871008939974e-05, - "loss": 0.0011, + "loss": 0.0094, "step": 2520 }, { "epoch": 9.66, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 1.8052144050598145, - "eval_runtime": 75.1879, - "eval_samples_per_second": 2.314, - "eval_steps_per_second": 0.293, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.5356999635696411, + "eval_runtime": 135.3986, + "eval_samples_per_second": 1.285, + "eval_steps_per_second": 0.162, "step": 2520 }, { "epoch": 9.67, "learning_rate": 7.1213282247765e-05, - "loss": 0.1036, + "loss": 0.001, "step": 2525 }, { "epoch": 9.67, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.8808355331420898, - "eval_runtime": 74.8079, - "eval_samples_per_second": 2.326, - "eval_steps_per_second": 0.294, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.489303708076477, + "eval_runtime": 131.5424, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2525 }, { "epoch": 9.69, "learning_rate": 7.095785440613027e-05, - "loss": 0.0641, + "loss": 0.0053, "step": 2530 }, { "epoch": 9.69, - "eval_accuracy": 0.6839080459770115, - "eval_loss": 1.7997287511825562, - "eval_runtime": 73.4276, - "eval_samples_per_second": 2.37, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.4710723161697388, + "eval_runtime": 131.6132, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2530 }, { "epoch": 9.71, "learning_rate": 7.070242656449553e-05, - "loss": 0.0263, + "loss": 0.0027, "step": 2535 }, { "epoch": 9.71, - "eval_accuracy": 0.6896551724137931, - "eval_loss": 1.8069945573806763, - "eval_runtime": 74.1898, - "eval_samples_per_second": 2.345, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.4847438335418701, + "eval_runtime": 131.8018, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2535 }, { "epoch": 9.73, "learning_rate": 7.04469987228608e-05, - "loss": 0.1702, + "loss": 0.0031, "step": 2540 }, { "epoch": 9.73, - "eval_accuracy": 0.6954022988505747, - "eval_loss": 1.8327751159667969, - "eval_runtime": 73.9343, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.5292288064956665, + "eval_runtime": 131.6847, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2540 }, { "epoch": 9.75, "learning_rate": 7.019157088122606e-05, - "loss": 0.134, + "loss": 0.0168, "step": 2545 }, { "epoch": 9.75, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.8617277145385742, - "eval_runtime": 75.8555, - "eval_samples_per_second": 2.294, - "eval_steps_per_second": 0.29, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.5313410758972168, + "eval_runtime": 131.6087, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2545 }, { "epoch": 9.77, "learning_rate": 6.993614303959132e-05, - "loss": 0.1743, + "loss": 0.0443, "step": 2550 }, { "epoch": 9.77, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.8030837774276733, - "eval_runtime": 75.2054, - "eval_samples_per_second": 2.314, - "eval_steps_per_second": 0.293, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.5084540843963623, + "eval_runtime": 134.9994, + "eval_samples_per_second": 1.289, + "eval_steps_per_second": 0.163, "step": 2550 }, { "epoch": 9.79, "learning_rate": 6.968071519795659e-05, - "loss": 0.0431, + "loss": 0.0237, "step": 2555 }, { "epoch": 9.79, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.720353364944458, - "eval_runtime": 73.8007, - "eval_samples_per_second": 2.358, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.577832818031311, + "eval_runtime": 131.7595, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2555 }, { "epoch": 9.81, "learning_rate": 6.942528735632185e-05, - "loss": 0.1325, + "loss": 0.0026, "step": 2560 }, { "epoch": 9.81, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.6215603351593018, - "eval_runtime": 74.9611, - "eval_samples_per_second": 2.321, - "eval_steps_per_second": 0.293, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7100647687911987, + "eval_runtime": 134.0966, + "eval_samples_per_second": 1.298, + "eval_steps_per_second": 0.164, "step": 2560 }, { "epoch": 9.83, "learning_rate": 6.916985951468711e-05, - "loss": 0.0021, + "loss": 0.0227, "step": 2565 }, { "epoch": 9.83, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.6373803615570068, - "eval_runtime": 74.0676, - "eval_samples_per_second": 2.349, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.719056248664856, + "eval_runtime": 131.4435, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 2565 }, { "epoch": 9.85, "learning_rate": 6.891443167305238e-05, - "loss": 0.0177, + "loss": 0.0001, "step": 2570 }, { "epoch": 9.85, - "eval_accuracy": 0.7011494252873564, - "eval_loss": 1.7743432521820068, - "eval_runtime": 73.9209, - "eval_samples_per_second": 2.354, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7180625200271606, + "eval_runtime": 131.4579, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 2570 }, { "epoch": 9.87, "learning_rate": 6.865900383141763e-05, - "loss": 0.0059, + "loss": 0.0013, "step": 2575 }, { "epoch": 9.87, - "eval_accuracy": 0.7068965517241379, - "eval_loss": 1.7778594493865967, - "eval_runtime": 73.9917, - "eval_samples_per_second": 2.352, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.7265838384628296, + "eval_runtime": 131.6065, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2575 }, { "epoch": 9.89, "learning_rate": 6.840357598978289e-05, - "loss": 0.0019, + "loss": 0.0153, "step": 2580 }, { "epoch": 9.89, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.670163869857788, - "eval_runtime": 75.1748, - "eval_samples_per_second": 2.315, - "eval_steps_per_second": 0.293, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.721193552017212, + "eval_runtime": 131.7875, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2580 }, { "epoch": 9.9, "learning_rate": 6.814814814814815e-05, - "loss": 0.3578, + "loss": 0.3362, "step": 2585 }, { "epoch": 9.9, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.6000075340270996, - "eval_runtime": 74.6944, - "eval_samples_per_second": 2.329, - "eval_steps_per_second": 0.295, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.6475728750228882, + "eval_runtime": 131.8419, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2585 }, { "epoch": 9.92, "learning_rate": 6.789272030651342e-05, - "loss": 0.0012, + "loss": 0.0002, "step": 2590 }, { "epoch": 9.92, "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.5551468133926392, - "eval_runtime": 73.4262, - "eval_samples_per_second": 2.37, - "eval_steps_per_second": 0.3, + "eval_loss": 1.6617615222930908, + "eval_runtime": 131.7141, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2590 }, { "epoch": 9.94, "learning_rate": 6.763729246487868e-05, - "loss": 0.0325, + "loss": 0.0041, "step": 2595 }, { "epoch": 9.94, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.5792527198791504, - "eval_runtime": 72.3934, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5420438051223755, + "eval_runtime": 131.461, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 2595 }, { "epoch": 9.96, "learning_rate": 6.738186462324394e-05, - "loss": 0.0029, + "loss": 0.0002, "step": 2600 }, { "epoch": 9.96, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.6270909309387207, - "eval_runtime": 71.7327, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5038961172103882, + "eval_runtime": 132.3186, + "eval_samples_per_second": 1.315, + "eval_steps_per_second": 0.166, "step": 2600 }, { "epoch": 9.98, "learning_rate": 6.71264367816092e-05, - "loss": 0.0894, + "loss": 0.0215, "step": 2605 }, { "epoch": 9.98, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.6158068180084229, - "eval_runtime": 72.1335, - "eval_samples_per_second": 2.412, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5120642185211182, + "eval_runtime": 134.7063, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 2605 }, { "epoch": 10.0, "learning_rate": 6.687100893997446e-05, - "loss": 0.0841, + "loss": 0.1991, "step": 2610 }, { "epoch": 10.0, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.6193947792053223, - "eval_runtime": 71.5867, - "eval_samples_per_second": 2.431, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.5479094982147217, + "eval_runtime": 131.4277, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 2610 }, { "epoch": 10.02, "learning_rate": 6.661558109833972e-05, - "loss": 0.0199, + "loss": 0.0005, "step": 2615 }, { "epoch": 10.02, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.6935107707977295, - "eval_runtime": 72.1289, - "eval_samples_per_second": 2.412, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6146314144134521, + "eval_runtime": 132.802, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 2615 }, { "epoch": 10.04, "learning_rate": 6.636015325670498e-05, - "loss": 0.0164, + "loss": 0.1721, "step": 2620 }, { "epoch": 10.04, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.7199974060058594, - "eval_runtime": 71.6036, - "eval_samples_per_second": 2.43, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.6754363775253296, + "eval_runtime": 133.3319, + "eval_samples_per_second": 1.305, + "eval_steps_per_second": 0.165, "step": 2620 }, { "epoch": 10.06, "learning_rate": 6.610472541507025e-05, - "loss": 0.017, + "loss": 0.0144, "step": 2625 }, { "epoch": 10.06, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7973920106887817, - "eval_runtime": 72.3533, - "eval_samples_per_second": 2.405, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.727333664894104, + "eval_runtime": 133.0937, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 2625 }, { "epoch": 10.08, "learning_rate": 6.584929757343551e-05, - "loss": 0.0005, + "loss": 0.0424, "step": 2630 }, { "epoch": 10.08, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.9108890295028687, - "eval_runtime": 73.2282, - "eval_samples_per_second": 2.376, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7126436781609196, + "eval_loss": 1.7386857271194458, + "eval_runtime": 133.3356, + "eval_samples_per_second": 1.305, + "eval_steps_per_second": 0.165, "step": 2630 }, { "epoch": 10.1, "learning_rate": 6.559386973180077e-05, - "loss": 0.0272, + "loss": 0.001, "step": 2635 }, { "epoch": 10.1, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.9884196519851685, - "eval_runtime": 73.6979, - "eval_samples_per_second": 2.361, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.6673250198364258, + "eval_runtime": 132.9196, + "eval_samples_per_second": 1.309, + "eval_steps_per_second": 0.166, "step": 2635 }, { "epoch": 10.11, "learning_rate": 6.533844189016604e-05, - "loss": 0.0006, + "loss": 0.0187, "step": 2640 }, { "epoch": 10.11, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 2.0442349910736084, - "eval_runtime": 71.6401, - "eval_samples_per_second": 2.429, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.5967484712600708, + "eval_runtime": 132.8447, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 2640 }, { "epoch": 10.13, "learning_rate": 6.508301404853129e-05, - "loss": 0.0003, + "loss": 0.0001, "step": 2645 }, { "epoch": 10.13, - "eval_accuracy": 0.735632183908046, - "eval_loss": 2.0899100303649902, - "eval_runtime": 74.2411, - "eval_samples_per_second": 2.344, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.65236234664917, + "eval_runtime": 137.5143, + "eval_samples_per_second": 1.265, + "eval_steps_per_second": 0.16, "step": 2645 }, { "epoch": 10.15, "learning_rate": 6.482758620689655e-05, - "loss": 0.0064, + "loss": 0.0003, "step": 2650 }, { "epoch": 10.15, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 2.0910024642944336, - "eval_runtime": 71.604, - "eval_samples_per_second": 2.43, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6943364143371582, + "eval_runtime": 135.7335, + "eval_samples_per_second": 1.282, + "eval_steps_per_second": 0.162, "step": 2650 }, { "epoch": 10.17, "learning_rate": 6.457215836526181e-05, - "loss": 0.3947, + "loss": 0.0047, "step": 2655 }, { "epoch": 10.17, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 2.110867500305176, - "eval_runtime": 72.2402, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7338972091674805, + "eval_runtime": 134.0312, + "eval_samples_per_second": 1.298, + "eval_steps_per_second": 0.164, "step": 2655 }, { "epoch": 10.19, "learning_rate": 6.431673052362708e-05, - "loss": 0.0026, + "loss": 0.0105, "step": 2660 }, { "epoch": 10.19, "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.997544765472412, - "eval_runtime": 73.4539, - "eval_samples_per_second": 2.369, - "eval_steps_per_second": 0.3, + "eval_loss": 1.760598063468933, + "eval_runtime": 134.4028, + "eval_samples_per_second": 1.295, + "eval_steps_per_second": 0.164, "step": 2660 }, { "epoch": 10.21, "learning_rate": 6.406130268199234e-05, - "loss": 0.0017, + "loss": 0.0015, "step": 2665 }, { "epoch": 10.21, "eval_accuracy": 0.735632183908046, - "eval_loss": 1.9001383781433105, - "eval_runtime": 72.3167, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_loss": 1.768870234489441, + "eval_runtime": 133.0003, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 2665 }, { "epoch": 10.23, "learning_rate": 6.38058748403576e-05, - "loss": 0.0457, + "loss": 0.0002, "step": 2670 }, { "epoch": 10.23, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7431246042251587, - "eval_runtime": 71.7066, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7068965517241379, + "eval_loss": 1.791227102279663, + "eval_runtime": 133.4851, + "eval_samples_per_second": 1.304, + "eval_steps_per_second": 0.165, "step": 2670 }, { "epoch": 10.25, "learning_rate": 6.355044699872287e-05, - "loss": 0.1929, + "loss": 0.002, "step": 2675 }, { "epoch": 10.25, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.6250685453414917, - "eval_runtime": 74.0216, - "eval_samples_per_second": 2.351, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7011494252873564, + "eval_loss": 1.777532696723938, + "eval_runtime": 133.0873, + "eval_samples_per_second": 1.307, + "eval_steps_per_second": 0.165, "step": 2675 }, { "epoch": 10.27, "learning_rate": 6.329501915708812e-05, - "loss": 0.0095, + "loss": 0.1839, "step": 2680 }, { "epoch": 10.27, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.595952033996582, - "eval_runtime": 71.6176, - "eval_samples_per_second": 2.43, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5833971500396729, + "eval_runtime": 132.9348, + "eval_samples_per_second": 1.309, + "eval_steps_per_second": 0.165, "step": 2680 }, { "epoch": 10.29, "learning_rate": 6.303959131545338e-05, - "loss": 0.0002, + "loss": 0.0004, "step": 2685 }, { "epoch": 10.29, "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.6410387754440308, - "eval_runtime": 72.2047, - "eval_samples_per_second": 2.41, - "eval_steps_per_second": 0.305, + "eval_loss": 1.5184221267700195, + "eval_runtime": 135.1931, + "eval_samples_per_second": 1.287, + "eval_steps_per_second": 0.163, "step": 2685 }, { "epoch": 10.31, "learning_rate": 6.278416347381864e-05, - "loss": 0.0325, + "loss": 0.0001, "step": 2690 }, { "epoch": 10.31, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.673262357711792, - "eval_runtime": 71.6131, - "eval_samples_per_second": 2.43, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5530625581741333, + "eval_runtime": 133.3044, + "eval_samples_per_second": 1.305, + "eval_steps_per_second": 0.165, "step": 2690 }, { "epoch": 10.33, "learning_rate": 6.25287356321839e-05, - "loss": 0.0004, + "loss": 0.0001, "step": 2695 }, { "epoch": 10.33, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.684037446975708, - "eval_runtime": 74.3936, - "eval_samples_per_second": 2.339, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.597658395767212, + "eval_runtime": 132.8657, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 2695 }, { "epoch": 10.34, "learning_rate": 6.227330779054917e-05, - "loss": 0.1141, + "loss": 0.2095, "step": 2700 }, { "epoch": 10.34, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.6978554725646973, - "eval_runtime": 73.7209, - "eval_samples_per_second": 2.36, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.6474486589431763, + "eval_runtime": 134.5259, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.164, "step": 2700 }, { "epoch": 10.36, "learning_rate": 6.201787994891443e-05, - "loss": 0.0059, + "loss": 0.0044, "step": 2705 }, { "epoch": 10.36, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7011663913726807, - "eval_runtime": 72.3468, - "eval_samples_per_second": 2.405, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.7372243404388428, + "eval_runtime": 132.009, + "eval_samples_per_second": 1.318, + "eval_steps_per_second": 0.167, "step": 2705 }, { "epoch": 10.38, "learning_rate": 6.17624521072797e-05, - "loss": 0.0245, + "loss": 0.0242, "step": 2710 }, { "epoch": 10.38, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.77028226852417, - "eval_runtime": 72.2673, - "eval_samples_per_second": 2.408, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7183908045977011, + "eval_loss": 1.7031418085098267, + "eval_runtime": 131.5485, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2710 }, { "epoch": 10.4, "learning_rate": 6.150702426564496e-05, - "loss": 0.0076, + "loss": 0.0119, "step": 2715 }, { "epoch": 10.4, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.8610811233520508, - "eval_runtime": 72.307, - "eval_samples_per_second": 2.406, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6391687393188477, + "eval_runtime": 131.5385, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2715 }, { "epoch": 10.42, "learning_rate": 6.125159642401021e-05, - "loss": 0.0021, + "loss": 0.0003, "step": 2720 }, { "epoch": 10.42, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.8926019668579102, - "eval_runtime": 71.6919, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.699074625968933, + "eval_runtime": 131.8011, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2720 }, { "epoch": 10.44, "learning_rate": 6.099616858237548e-05, - "loss": 0.0065, + "loss": 0.0001, "step": 2725 }, { "epoch": 10.44, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.880444049835205, - "eval_runtime": 72.3737, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7589409351348877, + "eval_runtime": 134.6631, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 2725 }, { "epoch": 10.46, "learning_rate": 6.074074074074074e-05, - "loss": 0.0056, + "loss": 0.0003, "step": 2730 }, { "epoch": 10.46, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.802688479423523, - "eval_runtime": 71.68, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.809535264968872, + "eval_runtime": 131.5414, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2730 }, { "epoch": 10.48, "learning_rate": 6.0485312899106007e-05, - "loss": 0.0115, + "loss": 0.0113, "step": 2735 }, { "epoch": 10.48, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.7529903650283813, - "eval_runtime": 72.2535, - "eval_samples_per_second": 2.408, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7475957870483398, + "eval_runtime": 131.6443, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2735 }, { "epoch": 10.5, "learning_rate": 6.022988505747127e-05, - "loss": 0.007, + "loss": 0.0253, "step": 2740 }, { "epoch": 10.5, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7274096012115479, - "eval_runtime": 71.6133, - "eval_samples_per_second": 2.43, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.732975721359253, + "eval_runtime": 131.7203, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2740 }, { "epoch": 10.52, "learning_rate": 5.997445721583653e-05, - "loss": 0.0006, + "loss": 0.0001, "step": 2745 }, { "epoch": 10.52, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7023934125900269, - "eval_runtime": 72.1912, - "eval_samples_per_second": 2.41, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.6785794496536255, + "eval_runtime": 134.7004, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 2745 }, { "epoch": 10.54, "learning_rate": 5.97190293742018e-05, - "loss": 0.0039, + "loss": 0.0001, "step": 2750 }, { "epoch": 10.54, "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7069329023361206, - "eval_runtime": 71.7141, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_loss": 1.6546560525894165, + "eval_runtime": 131.4825, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2750 }, { "epoch": 10.56, "learning_rate": 5.9463601532567046e-05, - "loss": 0.0006, + "loss": 0.0841, "step": 2755 }, { "epoch": 10.56, "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7325639724731445, - "eval_runtime": 72.2136, - "eval_samples_per_second": 2.41, - "eval_steps_per_second": 0.305, + "eval_loss": 1.5616384744644165, + "eval_runtime": 131.5563, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2755 }, { "epoch": 10.57, "learning_rate": 5.920817369093231e-05, - "loss": 0.0012, + "loss": 0.0003, "step": 2760 }, { "epoch": 10.57, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7432584762573242, - "eval_runtime": 71.6908, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.4814727306365967, + "eval_runtime": 131.5966, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2760 }, { "epoch": 10.59, "learning_rate": 5.895274584929757e-05, - "loss": 0.002, + "loss": 0.0064, "step": 2765 }, { "epoch": 10.59, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.6767596006393433, - "eval_runtime": 74.0851, - "eval_samples_per_second": 2.349, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.4246087074279785, + "eval_runtime": 135.6419, + "eval_samples_per_second": 1.283, + "eval_steps_per_second": 0.162, "step": 2765 }, { "epoch": 10.61, "learning_rate": 5.8697318007662837e-05, - "loss": 0.0051, + "loss": 0.0001, "step": 2770 }, { "epoch": 10.61, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.6323719024658203, - "eval_runtime": 71.7909, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.4092402458190918, + "eval_runtime": 131.8053, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2770 }, { "epoch": 10.63, "learning_rate": 5.84418901660281e-05, - "loss": 0.1266, + "loss": 0.0015, "step": 2775 }, { "epoch": 10.63, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.59928560256958, - "eval_runtime": 73.9918, - "eval_samples_per_second": 2.352, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.4107933044433594, + "eval_runtime": 131.7192, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2775 }, { "epoch": 10.65, "learning_rate": 5.818646232439336e-05, - "loss": 0.0003, + "loss": 0.0002, "step": 2780 }, { "epoch": 10.65, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.6210390329360962, - "eval_runtime": 71.7199, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.4259847402572632, + "eval_runtime": 131.5637, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2780 }, { "epoch": 10.67, "learning_rate": 5.7931034482758627e-05, - "loss": 0.0011, + "loss": 0.0005, "step": 2785 }, { "epoch": 10.67, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.6556010246276855, - "eval_runtime": 72.3514, - "eval_samples_per_second": 2.405, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.4420133829116821, + "eval_runtime": 131.7609, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2785 }, { "epoch": 10.69, "learning_rate": 5.767560664112388e-05, - "loss": 0.1543, + "loss": 0.2767, "step": 2790 }, { "epoch": 10.69, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.6259641647338867, - "eval_runtime": 71.8043, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.4646365642547607, + "eval_runtime": 138.9314, + "eval_samples_per_second": 1.252, + "eval_steps_per_second": 0.158, "step": 2790 }, { "epoch": 10.71, "learning_rate": 5.7420178799489147e-05, - "loss": 0.0174, + "loss": 0.0721, "step": 2795 }, { "epoch": 10.71, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.6242051124572754, - "eval_runtime": 76.4373, - "eval_samples_per_second": 2.276, - "eval_steps_per_second": 0.288, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5203619003295898, + "eval_runtime": 131.5083, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2795 }, { "epoch": 10.73, "learning_rate": 5.716475095785441e-05, - "loss": 0.0012, + "loss": 0.0009, "step": 2800 }, { "epoch": 10.73, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.6227067708969116, - "eval_runtime": 73.2688, - "eval_samples_per_second": 2.375, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6007354259490967, + "eval_runtime": 131.8312, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2800 }, { "epoch": 10.75, "learning_rate": 5.690932311621967e-05, - "loss": 0.0016, + "loss": 0.0003, "step": 2805 }, { "epoch": 10.75, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.6738653182983398, - "eval_runtime": 71.7359, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.668529987335205, + "eval_runtime": 131.6129, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2805 }, { "epoch": 10.77, "learning_rate": 5.665389527458494e-05, - "loss": 0.2556, + "loss": 0.3266, "step": 2810 }, { "epoch": 10.77, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.6926209926605225, - "eval_runtime": 71.1551, - "eval_samples_per_second": 2.445, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7353466749191284, + "eval_runtime": 131.5759, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2810 }, { "epoch": 10.79, "learning_rate": 5.63984674329502e-05, - "loss": 0.013, + "loss": 0.0028, "step": 2815 }, { "epoch": 10.79, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7026501893997192, - "eval_runtime": 71.7376, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.747633457183838, + "eval_runtime": 134.916, + "eval_samples_per_second": 1.29, + "eval_steps_per_second": 0.163, "step": 2815 }, { "epoch": 10.8, "learning_rate": 5.614303959131546e-05, - "loss": 0.0002, + "loss": 0.0, "step": 2820 }, { "epoch": 10.8, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.7139840126037598, - "eval_runtime": 73.0784, - "eval_samples_per_second": 2.381, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7377413511276245, + "eval_runtime": 131.5547, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2820 }, { "epoch": 10.82, "learning_rate": 5.588761174968071e-05, - "loss": 0.0023, + "loss": 0.0001, "step": 2825 }, { "epoch": 10.82, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.7233027219772339, - "eval_runtime": 71.8919, - "eval_samples_per_second": 2.42, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7389466762542725, + "eval_runtime": 131.5452, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2825 }, { "epoch": 10.84, "learning_rate": 5.5632183908045976e-05, - "loss": 0.0027, + "loss": 0.0004, "step": 2830 }, { "epoch": 10.84, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.7237999439239502, - "eval_runtime": 71.2043, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.740808367729187, + "eval_runtime": 131.5177, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2830 }, { "epoch": 10.86, "learning_rate": 5.537675606641124e-05, - "loss": 0.0051, + "loss": 0.0794, "step": 2835 }, { "epoch": 10.86, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.6586277484893799, - "eval_runtime": 71.7622, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7411603927612305, + "eval_runtime": 131.6661, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2835 }, { "epoch": 10.88, "learning_rate": 5.51213282247765e-05, - "loss": 0.0003, + "loss": 0.0001, "step": 2840 }, { "epoch": 10.88, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.6285842657089233, - "eval_runtime": 73.1223, - "eval_samples_per_second": 2.38, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.747639775276184, + "eval_runtime": 134.556, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.164, "step": 2840 }, { "epoch": 10.9, "learning_rate": 5.4865900383141767e-05, - "loss": 0.0193, + "loss": 0.0022, "step": 2845 }, { "epoch": 10.9, - "eval_accuracy": 0.7701149425287356, - "eval_loss": 1.5850633382797241, - "eval_runtime": 71.8192, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.7501295804977417, + "eval_runtime": 135.0416, + "eval_samples_per_second": 1.288, + "eval_steps_per_second": 0.163, "step": 2845 }, { "epoch": 10.92, "learning_rate": 5.461047254150703e-05, - "loss": 0.0003, + "loss": 0.0007, "step": 2850 }, { "epoch": 10.92, - "eval_accuracy": 0.7758620689655172, - "eval_loss": 1.5855857133865356, - "eval_runtime": 71.1652, - "eval_samples_per_second": 2.445, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7450084686279297, + "eval_runtime": 131.6406, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2850 }, { "epoch": 10.94, "learning_rate": 5.435504469987229e-05, - "loss": 0.0087, + "loss": 0.0162, "step": 2855 }, { "epoch": 10.94, - "eval_accuracy": 0.7816091954022989, - "eval_loss": 1.614903211593628, - "eval_runtime": 71.7827, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.6570850610733032, + "eval_runtime": 131.5925, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2855 }, { "epoch": 10.96, "learning_rate": 5.409961685823754e-05, - "loss": 0.0048, + "loss": 0.0002, "step": 2860 }, { "epoch": 10.96, - "eval_accuracy": 0.7701149425287356, - "eval_loss": 1.6289048194885254, - "eval_runtime": 71.1167, - "eval_samples_per_second": 2.447, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.5867286920547485, + "eval_runtime": 131.8387, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2860 }, { "epoch": 10.98, "learning_rate": 5.3844189016602806e-05, - "loss": 0.0005, + "loss": 0.0004, "step": 2865 }, { "epoch": 10.98, - "eval_accuracy": 0.7586206896551724, - "eval_loss": 1.6815837621688843, - "eval_runtime": 73.4806, - "eval_samples_per_second": 2.368, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5566998720169067, + "eval_runtime": 133.6513, + "eval_samples_per_second": 1.302, + "eval_steps_per_second": 0.165, "step": 2865 }, { "epoch": 11.0, "learning_rate": 5.358876117496807e-05, - "loss": 0.0028, + "loss": 0.007, "step": 2870 }, { "epoch": 11.0, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7306228876113892, - "eval_runtime": 71.2539, - "eval_samples_per_second": 2.442, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5519108772277832, + "eval_runtime": 131.5593, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2870 }, { "epoch": 11.02, "learning_rate": 5.333333333333333e-05, - "loss": 0.2624, + "loss": 0.222, "step": 2875 }, { "epoch": 11.02, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7713919878005981, - "eval_runtime": 73.4631, - "eval_samples_per_second": 2.369, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.5496468544006348, + "eval_runtime": 131.4761, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2875 }, { "epoch": 11.03, "learning_rate": 5.3077905491698597e-05, - "loss": 0.1484, + "loss": 0.175, "step": 2880 }, { "epoch": 11.03, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.8120393753051758, - "eval_runtime": 72.9705, - "eval_samples_per_second": 2.385, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.576553463935852, + "eval_runtime": 134.5929, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.163, "step": 2880 }, { "epoch": 11.05, "learning_rate": 5.282247765006386e-05, - "loss": 0.0006, + "loss": 0.0004, "step": 2885 }, { "epoch": 11.05, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.8391227722167969, - "eval_runtime": 71.7975, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.611812949180603, + "eval_runtime": 134.8494, + "eval_samples_per_second": 1.29, + "eval_steps_per_second": 0.163, "step": 2885 }, { "epoch": 11.07, "learning_rate": 5.256704980842912e-05, - "loss": 0.0001, + "loss": 0.0047, "step": 2890 }, { "epoch": 11.07, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.8372502326965332, - "eval_runtime": 73.0853, - "eval_samples_per_second": 2.381, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.635672926902771, + "eval_runtime": 131.6624, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2890 }, { "epoch": 11.09, "learning_rate": 5.231162196679439e-05, - "loss": 0.2108, + "loss": 0.0002, "step": 2895 }, { "epoch": 11.09, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7650718688964844, - "eval_runtime": 71.8157, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.646173119544983, + "eval_runtime": 131.6251, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2895 }, { "epoch": 11.11, "learning_rate": 5.205619412515964e-05, - "loss": 0.0008, + "loss": 0.0007, "step": 2900 }, { "epoch": 11.11, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.761852741241455, - "eval_runtime": 71.2906, - "eval_samples_per_second": 2.441, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.6495263576507568, + "eval_runtime": 131.5865, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2900 }, { "epoch": 11.13, "learning_rate": 5.1800766283524907e-05, - "loss": 0.0007, + "loss": 0.0003, "step": 2905 }, { "epoch": 11.13, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.7633298635482788, - "eval_runtime": 71.7947, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.6477447748184204, + "eval_runtime": 131.7735, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2905 }, { "epoch": 11.15, "learning_rate": 5.154533844189017e-05, - "loss": 0.0001, + "loss": 0.002, "step": 2910 }, { "epoch": 11.15, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.7667099237442017, - "eval_runtime": 71.8588, - "eval_samples_per_second": 2.421, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.644494652748108, + "eval_runtime": 131.6514, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2910 }, { @@ -8746,56 +8746,56 @@ }, { "epoch": 11.17, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.7692232131958008, - "eval_runtime": 71.8252, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.650899052619934, + "eval_runtime": 131.342, + "eval_samples_per_second": 1.325, + "eval_steps_per_second": 0.168, "step": 2915 }, { "epoch": 11.19, "learning_rate": 5.10344827586207e-05, - "loss": 0.0041, + "loss": 0.0026, "step": 2920 }, { "epoch": 11.19, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.7618753910064697, - "eval_runtime": 71.1483, - "eval_samples_per_second": 2.446, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.6433719396591187, + "eval_runtime": 134.4263, + "eval_samples_per_second": 1.294, + "eval_steps_per_second": 0.164, "step": 2920 }, { "epoch": 11.21, "learning_rate": 5.077905491698596e-05, - "loss": 0.0004, + "loss": 0.0001, "step": 2925 }, { "epoch": 11.21, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.744259238243103, - "eval_runtime": 71.828, - "eval_samples_per_second": 2.422, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.6255303621292114, + "eval_runtime": 131.6755, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2925 }, { "epoch": 11.23, "learning_rate": 5.052362707535122e-05, - "loss": 0.0016, + "loss": 0.0003, "step": 2930 }, { "epoch": 11.23, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.7307969331741333, - "eval_runtime": 71.1919, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.6109023094177246, + "eval_runtime": 131.6429, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2930 }, { @@ -8806,491 +8806,491 @@ }, { "epoch": 11.25, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.706060767173767, - "eval_runtime": 71.6927, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.6027604341506958, + "eval_runtime": 134.4577, + "eval_samples_per_second": 1.294, + "eval_steps_per_second": 0.164, "step": 2935 }, { "epoch": 11.26, "learning_rate": 5.0012771392081737e-05, - "loss": 0.0046, + "loss": 0.0044, "step": 2940 }, { "epoch": 11.26, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.7156652212142944, - "eval_runtime": 71.1594, - "eval_samples_per_second": 2.445, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.6084132194519043, + "eval_runtime": 134.0619, + "eval_samples_per_second": 1.298, + "eval_steps_per_second": 0.164, "step": 2940 }, { "epoch": 11.28, "learning_rate": 4.9757343550447e-05, - "loss": 0.0002, + "loss": 0.0001, "step": 2945 }, { "epoch": 11.28, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.7299869060516357, - "eval_runtime": 71.7347, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.6188857555389404, + "eval_runtime": 131.6719, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2945 }, { "epoch": 11.3, "learning_rate": 4.950191570881226e-05, - "loss": 0.0005, + "loss": 0.0003, "step": 2950 }, { "epoch": 11.3, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.739342212677002, - "eval_runtime": 71.1889, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.6270678043365479, + "eval_runtime": 131.7047, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 2950 }, { "epoch": 11.32, "learning_rate": 4.9246487867177527e-05, - "loss": 0.2592, + "loss": 0.2528, "step": 2955 }, { "epoch": 11.32, - "eval_accuracy": 0.7586206896551724, - "eval_loss": 1.7353452444076538, - "eval_runtime": 71.7474, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.6437561511993408, + "eval_runtime": 131.8311, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 2955 }, { "epoch": 11.34, "learning_rate": 4.899106002554278e-05, - "loss": 0.0018, + "loss": 0.0001, "step": 2960 }, { "epoch": 11.34, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.753889799118042, - "eval_runtime": 71.1986, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.6640594005584717, + "eval_runtime": 131.6151, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2960 }, { "epoch": 11.36, "learning_rate": 4.8735632183908047e-05, - "loss": 0.0002, + "loss": 0.0035, "step": 2965 }, { "epoch": 11.36, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7838666439056396, - "eval_runtime": 73.3097, - "eval_samples_per_second": 2.373, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.6824891567230225, + "eval_runtime": 131.5262, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 2965 }, { "epoch": 11.38, "learning_rate": 4.848020434227331e-05, - "loss": 0.0002, + "loss": 0.0005, "step": 2970 }, { "epoch": 11.38, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.8079078197479248, - "eval_runtime": 71.2087, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.7031219005584717, + "eval_runtime": 135.3436, + "eval_samples_per_second": 1.286, + "eval_steps_per_second": 0.163, "step": 2970 }, { "epoch": 11.4, "learning_rate": 4.822477650063857e-05, - "loss": 0.3184, + "loss": 0.2952, "step": 2975 }, { "epoch": 11.4, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.7978729009628296, - "eval_runtime": 71.6881, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6996899843215942, + "eval_runtime": 131.6532, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2975 }, { "epoch": 11.42, "learning_rate": 4.796934865900383e-05, - "loss": 0.0002, + "loss": 0.0001, "step": 2980 }, { "epoch": 11.42, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.802263617515564, - "eval_runtime": 71.289, - "eval_samples_per_second": 2.441, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6979587078094482, + "eval_runtime": 131.4663, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 2980 }, { "epoch": 11.44, "learning_rate": 4.771392081736909e-05, - "loss": 0.0002, + "loss": 0.0004, "step": 2985 }, { "epoch": 11.44, - "eval_accuracy": 0.7183908045977011, - "eval_loss": 1.8103998899459839, - "eval_runtime": 71.6925, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.6984186172485352, + "eval_runtime": 134.7298, + "eval_samples_per_second": 1.291, + "eval_steps_per_second": 0.163, "step": 2985 }, { "epoch": 11.46, "learning_rate": 4.7458492975734357e-05, - "loss": 0.0037, + "loss": 0.0015, "step": 2990 }, { "epoch": 11.46, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.7962597608566284, - "eval_runtime": 71.0509, - "eval_samples_per_second": 2.449, - "eval_steps_per_second": 0.31, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6948944330215454, + "eval_runtime": 131.6251, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2990 }, { "epoch": 11.48, "learning_rate": 4.720306513409962e-05, - "loss": 0.0582, + "loss": 0.0248, "step": 2995 }, { "epoch": 11.48, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7625627517700195, - "eval_runtime": 71.6864, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6734544038772583, + "eval_runtime": 131.6311, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 2995 }, { "epoch": 11.49, "learning_rate": 4.694763729246488e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3000 }, { "epoch": 11.49, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7620983123779297, - "eval_runtime": 71.1842, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.658267617225647, + "eval_runtime": 131.5592, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3000 }, { "epoch": 11.51, "learning_rate": 4.669220945083015e-05, - "loss": 0.1902, + "loss": 0.0129, "step": 3005 }, { "epoch": 11.51, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7105143070220947, - "eval_runtime": 71.7636, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6415587663650513, + "eval_runtime": 131.567, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3005 }, { "epoch": 11.53, "learning_rate": 4.643678160919541e-05, - "loss": 0.1473, + "loss": 0.1049, "step": 3010 }, { "epoch": 11.53, "eval_accuracy": 0.7586206896551724, - "eval_loss": 1.674761176109314, - "eval_runtime": 71.1642, - "eval_samples_per_second": 2.445, - "eval_steps_per_second": 0.309, + "eval_loss": 1.6351646184921265, + "eval_runtime": 134.0039, + "eval_samples_per_second": 1.298, + "eval_steps_per_second": 0.164, "step": 3010 }, { "epoch": 11.55, "learning_rate": 4.6181353767560667e-05, - "loss": 0.0052, + "loss": 0.0001, "step": 3015 }, { "epoch": 11.55, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.6427284479141235, - "eval_runtime": 71.7334, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6312400102615356, + "eval_runtime": 131.5975, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3015 }, { "epoch": 11.57, "learning_rate": 4.592592592592593e-05, - "loss": 0.0005, + "loss": 0.0001, "step": 3020 }, { "epoch": 11.57, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.633091926574707, - "eval_runtime": 71.2235, - "eval_samples_per_second": 2.443, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6293408870697021, + "eval_runtime": 131.6558, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3020 }, { "epoch": 11.59, "learning_rate": 4.567049808429119e-05, - "loss": 0.0001, + "loss": 0.0004, "step": 3025 }, { "epoch": 11.59, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.6355048418045044, - "eval_runtime": 71.7881, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5769641399383545, + "eval_runtime": 131.6201, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3025 }, { "epoch": 11.61, "learning_rate": 4.541507024265646e-05, - "loss": 0.0004, + "loss": 0.0001, "step": 3030 }, { "epoch": 11.61, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.6394970417022705, - "eval_runtime": 73.0643, - "eval_samples_per_second": 2.381, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.5390961170196533, + "eval_runtime": 131.381, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3030 }, { "epoch": 11.63, "learning_rate": 4.515964240102171e-05, - "loss": 0.0001, + "loss": 0.0005, "step": 3035 }, { "epoch": 11.63, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.6441478729248047, - "eval_runtime": 71.7452, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.5512661933898926, + "eval_runtime": 131.575, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3035 }, { "epoch": 11.65, "learning_rate": 4.4904214559386977e-05, - "loss": 0.0001, + "loss": 0.0007, "step": 3040 }, { "epoch": 11.65, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.6461490392684937, - "eval_runtime": 71.208, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.5458471775054932, + "eval_runtime": 131.5999, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3040 }, { "epoch": 11.67, "learning_rate": 4.464878671775224e-05, - "loss": 0.0001, + "loss": 0.0002, "step": 3045 }, { "epoch": 11.67, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.649631142616272, - "eval_runtime": 71.8178, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5148214101791382, + "eval_runtime": 131.7032, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3045 }, { "epoch": 11.69, "learning_rate": 4.4393358876117497e-05, - "loss": 0.0254, + "loss": 0.0084, "step": 3050 }, { "epoch": 11.69, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.6433813571929932, - "eval_runtime": 71.1821, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.4826661348342896, + "eval_runtime": 134.959, + "eval_samples_per_second": 1.289, + "eval_steps_per_second": 0.163, "step": 3050 }, { "epoch": 11.7, "learning_rate": 4.413793103448276e-05, - "loss": 0.0007, + "loss": 0.0002, "step": 3055 }, { "epoch": 11.7, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.642929196357727, - "eval_runtime": 71.7678, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.4733022451400757, + "eval_runtime": 131.4231, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3055 }, { "epoch": 11.72, "learning_rate": 4.388250319284802e-05, - "loss": 0.0004, + "loss": 0.0001, "step": 3060 }, { "epoch": 11.72, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.6454113721847534, - "eval_runtime": 72.9367, - "eval_samples_per_second": 2.386, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.7816091954022989, + "eval_loss": 1.4764585494995117, + "eval_runtime": 131.5735, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3060 }, { "epoch": 11.74, "learning_rate": 4.362707535121329e-05, - "loss": 0.0001, + "loss": 0.0005, "step": 3065 }, { "epoch": 11.74, - "eval_accuracy": 0.7528735632183908, - "eval_loss": 1.6498183012008667, - "eval_runtime": 71.6868, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7816091954022989, + "eval_loss": 1.48111891746521, + "eval_runtime": 131.6303, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3065 }, { "epoch": 11.76, "learning_rate": 4.337164750957854e-05, - "loss": 0.0004, + "loss": 0.0037, "step": 3070 }, { "epoch": 11.76, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.6580952405929565, - "eval_runtime": 71.2302, - "eval_samples_per_second": 2.443, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7816091954022989, + "eval_loss": 1.452279806137085, + "eval_runtime": 135.5032, + "eval_samples_per_second": 1.284, + "eval_steps_per_second": 0.162, "step": 3070 }, { "epoch": 11.78, "learning_rate": 4.3116219667943807e-05, - "loss": 0.0003, + "loss": 0.0001, "step": 3075 }, { "epoch": 11.78, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.6635980606079102, - "eval_runtime": 71.8047, - "eval_samples_per_second": 2.423, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.4755744934082031, + "eval_runtime": 135.2628, + "eval_samples_per_second": 1.286, + "eval_steps_per_second": 0.163, "step": 3075 }, { "epoch": 11.8, "learning_rate": 4.286079182630907e-05, - "loss": 0.0002, + "loss": 0.0001, "step": 3080 }, { "epoch": 11.8, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.6681902408599854, - "eval_runtime": 71.207, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.498329997062683, + "eval_runtime": 131.6123, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3080 }, { "epoch": 11.82, "learning_rate": 4.2605363984674326e-05, - "loss": 0.0007, + "loss": 0.0002, "step": 3085 }, { "epoch": 11.82, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.6737765073776245, - "eval_runtime": 71.7861, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.5150718688964844, + "eval_runtime": 131.5818, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3085 }, { "epoch": 11.84, "learning_rate": 4.234993614303959e-05, - "loss": 0.0007, + "loss": 0.0009, "step": 3090 }, { "epoch": 11.84, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.6775649785995483, - "eval_runtime": 71.4067, - "eval_samples_per_second": 2.437, - "eval_steps_per_second": 0.308, + "eval_accuracy": 0.7758620689655172, + "eval_loss": 1.5199931859970093, + "eval_runtime": 131.7399, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3090 }, { "epoch": 11.86, "learning_rate": 4.209450830140485e-05, - "loss": 0.002, + "loss": 0.0001, "step": 3095 }, { "epoch": 11.86, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.6792525053024292, - "eval_runtime": 75.5033, - "eval_samples_per_second": 2.305, - "eval_steps_per_second": 0.291, + "eval_accuracy": 0.7931034482758621, + "eval_loss": 1.5113189220428467, + "eval_runtime": 131.7523, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3095 }, { @@ -9301,26 +9301,26 @@ }, { "epoch": 11.88, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.6821579933166504, - "eval_runtime": 72.572, - "eval_samples_per_second": 2.398, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7931034482758621, + "eval_loss": 1.5138452053070068, + "eval_runtime": 131.5049, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3100 }, { "epoch": 11.9, "learning_rate": 4.158365261813538e-05, - "loss": 0.0015, + "loss": 0.001, "step": 3105 }, { "epoch": 11.9, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.6829290390014648, - "eval_runtime": 74.9292, - "eval_samples_per_second": 2.322, - "eval_steps_per_second": 0.294, + "eval_accuracy": 0.7931034482758621, + "eval_loss": 1.440112590789795, + "eval_runtime": 131.3922, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3105 }, { @@ -9331,71 +9331,71 @@ }, { "epoch": 11.92, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7074753046035767, - "eval_runtime": 72.5058, - "eval_samples_per_second": 2.4, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7931034482758621, + "eval_loss": 1.4551225900650024, + "eval_runtime": 131.4821, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3110 }, { "epoch": 11.93, "learning_rate": 4.107279693486591e-05, - "loss": 0.0002, + "loss": 0.0001, "step": 3115 }, { "epoch": 11.93, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7354847192764282, - "eval_runtime": 72.5819, - "eval_samples_per_second": 2.397, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7873563218390804, + "eval_loss": 1.4636459350585938, + "eval_runtime": 132.1656, + "eval_samples_per_second": 1.317, + "eval_steps_per_second": 0.166, "step": 3115 }, { "epoch": 11.95, "learning_rate": 4.081736909323116e-05, - "loss": 0.0008, + "loss": 0.0007, "step": 3120 }, { "epoch": 11.95, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7531324625015259, - "eval_runtime": 74.5508, - "eval_samples_per_second": 2.334, - "eval_steps_per_second": 0.295, + "eval_accuracy": 0.7816091954022989, + "eval_loss": 1.4635924100875854, + "eval_runtime": 131.5512, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3120 }, { "epoch": 11.97, "learning_rate": 4.0561941251596427e-05, - "loss": 0.0001, + "loss": 0.0002, "step": 3125 }, { "epoch": 11.97, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7682172060012817, - "eval_runtime": 72.5782, - "eval_samples_per_second": 2.397, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7988505747126436, + "eval_loss": 1.462807536125183, + "eval_runtime": 131.5756, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3125 }, { "epoch": 11.99, "learning_rate": 4.030651340996169e-05, - "loss": 0.0004, + "loss": 0.0001, "step": 3130 }, { "epoch": 11.99, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.774706244468689, - "eval_runtime": 73.9012, - "eval_samples_per_second": 2.354, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7931034482758621, + "eval_loss": 1.4629733562469482, + "eval_runtime": 131.6187, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3130 }, { @@ -9406,11 +9406,11 @@ }, { "epoch": 12.01, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.77650785446167, - "eval_runtime": 75.9663, - "eval_samples_per_second": 2.29, - "eval_steps_per_second": 0.29, + "eval_accuracy": 0.7931034482758621, + "eval_loss": 1.463382601737976, + "eval_runtime": 131.7186, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3135 }, { @@ -9421,41 +9421,41 @@ }, { "epoch": 12.03, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.778681993484497, - "eval_runtime": 72.8484, - "eval_samples_per_second": 2.389, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.7931034482758621, + "eval_loss": 1.4633333683013916, + "eval_runtime": 131.6306, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3140 }, { "epoch": 12.05, "learning_rate": 3.954022988505747e-05, - "loss": 0.0004, + "loss": 0.0001, "step": 3145 }, { "epoch": 12.05, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.778651475906372, - "eval_runtime": 74.5051, - "eval_samples_per_second": 2.335, - "eval_steps_per_second": 0.295, + "eval_accuracy": 0.7931034482758621, + "eval_loss": 1.463578462600708, + "eval_runtime": 131.6753, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3145 }, { "epoch": 12.07, "learning_rate": 3.9284802043422737e-05, - "loss": 0.1525, + "loss": 0.1353, "step": 3150 }, { "epoch": 12.07, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.7690765857696533, - "eval_runtime": 73.949, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7931034482758621, + "eval_loss": 1.4747191667556763, + "eval_runtime": 131.6197, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3150 }, { @@ -9466,11 +9466,11 @@ }, { "epoch": 12.09, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7667592763900757, - "eval_runtime": 73.0801, - "eval_samples_per_second": 2.381, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7931034482758621, + "eval_loss": 1.4827746152877808, + "eval_runtime": 131.4765, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3155 }, { @@ -9481,56 +9481,56 @@ }, { "epoch": 12.11, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7664886713027954, - "eval_runtime": 74.7535, - "eval_samples_per_second": 2.328, - "eval_steps_per_second": 0.294, + "eval_accuracy": 0.7873563218390804, + "eval_loss": 1.4878242015838623, + "eval_runtime": 131.5682, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3160 }, { "epoch": 12.13, "learning_rate": 3.851851851851852e-05, - "loss": 0.0023, + "loss": 0.001, "step": 3165 }, { "epoch": 12.13, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.764859914779663, - "eval_runtime": 73.1922, - "eval_samples_per_second": 2.377, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7873563218390804, + "eval_loss": 1.4895766973495483, + "eval_runtime": 131.5675, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3165 }, { "epoch": 12.15, "learning_rate": 3.826309067688378e-05, - "loss": 0.0004, + "loss": 0.0719, "step": 3170 }, { "epoch": 12.15, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7692474126815796, - "eval_runtime": 72.5207, - "eval_samples_per_second": 2.399, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7701149425287356, + "eval_loss": 1.5220507383346558, + "eval_runtime": 134.8358, + "eval_samples_per_second": 1.29, + "eval_steps_per_second": 0.163, "step": 3170 }, { "epoch": 12.16, "learning_rate": 3.800766283524904e-05, - "loss": 0.0005, + "loss": 0.0002, "step": 3175 }, { "epoch": 12.16, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7775356769561768, - "eval_runtime": 73.2672, - "eval_samples_per_second": 2.375, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.595354437828064, + "eval_runtime": 137.4557, + "eval_samples_per_second": 1.266, + "eval_steps_per_second": 0.16, "step": 3175 }, { @@ -9541,56 +9541,56 @@ }, { "epoch": 12.18, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.784372091293335, - "eval_runtime": 72.3493, - "eval_samples_per_second": 2.405, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.636945366859436, + "eval_runtime": 132.8292, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 3180 }, { "epoch": 12.2, "learning_rate": 3.7496807151979567e-05, - "loss": 0.0001, + "loss": 0.0005, "step": 3185 }, { "epoch": 12.2, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.7875841856002808, - "eval_runtime": 72.8117, - "eval_samples_per_second": 2.39, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6581592559814453, + "eval_runtime": 136.5024, + "eval_samples_per_second": 1.275, + "eval_steps_per_second": 0.161, "step": 3185 }, { "epoch": 12.22, "learning_rate": 3.724137931034483e-05, - "loss": 0.0, + "loss": 0.0001, "step": 3190 }, { "epoch": 12.22, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.7894097566604614, - "eval_runtime": 73.3527, - "eval_samples_per_second": 2.372, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6685248613357544, + "eval_runtime": 133.3428, + "eval_samples_per_second": 1.305, + "eval_steps_per_second": 0.165, "step": 3190 }, { "epoch": 12.24, "learning_rate": 3.6985951468710087e-05, - "loss": 0.0002, + "loss": 0.0001, "step": 3195 }, { "epoch": 12.24, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.7878044843673706, - "eval_runtime": 75.4362, - "eval_samples_per_second": 2.307, - "eval_steps_per_second": 0.292, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6744290590286255, + "eval_runtime": 133.4932, + "eval_samples_per_second": 1.303, + "eval_steps_per_second": 0.165, "step": 3195 }, { @@ -9601,251 +9601,251 @@ }, { "epoch": 12.26, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.787352442741394, - "eval_runtime": 72.6018, - "eval_samples_per_second": 2.397, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6778743267059326, + "eval_runtime": 134.2586, + "eval_samples_per_second": 1.296, + "eval_steps_per_second": 0.164, "step": 3200 }, { "epoch": 12.28, "learning_rate": 3.647509578544061e-05, - "loss": 0.0789, + "loss": 0.0921, "step": 3205 }, { "epoch": 12.28, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.7876189947128296, - "eval_runtime": 73.2483, - "eval_samples_per_second": 2.375, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6830893754959106, + "eval_runtime": 134.2289, + "eval_samples_per_second": 1.296, + "eval_steps_per_second": 0.164, "step": 3205 }, { "epoch": 12.3, "learning_rate": 3.6219667943805877e-05, - "loss": 0.0001, + "loss": 0.0002, "step": 3210 }, { "epoch": 12.3, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7912352085113525, - "eval_runtime": 72.2227, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.698684573173523, + "eval_runtime": 134.2954, + "eval_samples_per_second": 1.296, + "eval_steps_per_second": 0.164, "step": 3210 }, { "epoch": 12.32, "learning_rate": 3.596424010217114e-05, - "loss": 0.0001, + "loss": 0.0005, "step": 3215 }, { "epoch": 12.32, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7966572046279907, - "eval_runtime": 74.8197, - "eval_samples_per_second": 2.326, - "eval_steps_per_second": 0.294, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.705964207649231, + "eval_runtime": 132.857, + "eval_samples_per_second": 1.31, + "eval_steps_per_second": 0.166, "step": 3215 }, { "epoch": 12.34, "learning_rate": 3.57088122605364e-05, - "loss": 0.0003, + "loss": 0.0006, "step": 3220 }, { "epoch": 12.34, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7997833490371704, - "eval_runtime": 73.9379, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7057161331176758, + "eval_runtime": 133.2456, + "eval_samples_per_second": 1.306, + "eval_steps_per_second": 0.165, "step": 3220 }, { "epoch": 12.36, "learning_rate": 3.545338441890167e-05, - "loss": 0.0005, + "loss": 0.0004, "step": 3225 }, { "epoch": 12.36, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.8003336191177368, - "eval_runtime": 73.7895, - "eval_samples_per_second": 2.358, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7048311233520508, + "eval_runtime": 132.9324, + "eval_samples_per_second": 1.309, + "eval_steps_per_second": 0.165, "step": 3225 }, { "epoch": 12.38, "learning_rate": 3.519795657726692e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3230 }, { "epoch": 12.38, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.801089882850647, - "eval_runtime": 73.3293, - "eval_samples_per_second": 2.373, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7053695917129517, + "eval_runtime": 134.6248, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 3230 }, { "epoch": 12.39, "learning_rate": 3.4942528735632187e-05, - "loss": 0.0051, + "loss": 0.0683, "step": 3235 }, { "epoch": 12.39, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.8076083660125732, - "eval_runtime": 73.9488, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7123521566390991, + "eval_runtime": 133.0527, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 3235 }, { "epoch": 12.41, "learning_rate": 3.468710089399745e-05, - "loss": 0.1098, + "loss": 0.0834, "step": 3240 }, { "epoch": 12.41, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.8316843509674072, - "eval_runtime": 72.738, - "eval_samples_per_second": 2.392, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7344695329666138, + "eval_runtime": 132.9362, + "eval_samples_per_second": 1.309, + "eval_steps_per_second": 0.165, "step": 3240 }, { "epoch": 12.43, "learning_rate": 3.443167305236271e-05, - "loss": 0.0009, + "loss": 0.0007, "step": 3245 }, { "epoch": 12.43, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.8573225736618042, - "eval_runtime": 71.7532, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7519522905349731, + "eval_runtime": 133.0604, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.165, "step": 3245 }, { "epoch": 12.45, "learning_rate": 3.417624521072797e-05, - "loss": 0.0001, + "loss": 0.0005, "step": 3250 }, { "epoch": 12.45, - "eval_accuracy": 0.7126436781609196, - "eval_loss": 1.8779215812683105, - "eval_runtime": 71.1624, - "eval_samples_per_second": 2.445, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7614890336990356, + "eval_runtime": 133.4235, + "eval_samples_per_second": 1.304, + "eval_steps_per_second": 0.165, "step": 3250 }, { "epoch": 12.47, "learning_rate": 3.392081736909323e-05, - "loss": 0.1514, + "loss": 0.1469, "step": 3255 }, { "epoch": 12.47, - "eval_accuracy": 0.7241379310344828, - "eval_loss": 1.8445746898651123, - "eval_runtime": 73.7727, - "eval_samples_per_second": 2.359, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.7582188844680786, + "eval_runtime": 133.2039, + "eval_samples_per_second": 1.306, + "eval_steps_per_second": 0.165, "step": 3255 }, { "epoch": 12.49, "learning_rate": 3.36653895274585e-05, - "loss": 0.0002, + "loss": 0.0001, "step": 3260 }, { "epoch": 12.49, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.8186622858047485, - "eval_runtime": 72.9059, - "eval_samples_per_second": 2.387, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7566941976547241, + "eval_runtime": 136.0308, + "eval_samples_per_second": 1.279, + "eval_steps_per_second": 0.162, "step": 3260 }, { "epoch": 12.51, "learning_rate": 3.340996168582375e-05, - "loss": 0.0024, + "loss": 0.0013, "step": 3265 }, { "epoch": 12.51, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.791884183883667, - "eval_runtime": 71.8399, - "eval_samples_per_second": 2.422, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.7564201354980469, + "eval_runtime": 131.5808, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3265 }, { "epoch": 12.53, "learning_rate": 3.3154533844189017e-05, - "loss": 0.0003, + "loss": 0.0009, "step": 3270 }, { "epoch": 12.53, "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7768088579177856, - "eval_runtime": 71.2715, - "eval_samples_per_second": 2.441, - "eval_steps_per_second": 0.309, + "eval_loss": 1.7547907829284668, + "eval_runtime": 131.5031, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3270 }, { "epoch": 12.55, "learning_rate": 3.289910600255428e-05, - "loss": 0.0004, + "loss": 0.0014, "step": 3275 }, { "epoch": 12.55, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.7849540710449219, - "eval_runtime": 72.4582, - "eval_samples_per_second": 2.401, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7478688955307007, + "eval_runtime": 131.6966, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3275 }, { "epoch": 12.57, "learning_rate": 3.264367816091954e-05, - "loss": 0.2673, + "loss": 0.0358, "step": 3280 }, { "epoch": 12.57, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.788151502609253, - "eval_runtime": 71.2667, - "eval_samples_per_second": 2.442, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.7233160734176636, + "eval_runtime": 134.66, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 3280 }, { @@ -9856,11 +9856,11 @@ }, { "epoch": 12.59, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.791655421257019, - "eval_runtime": 71.75, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6970218420028687, + "eval_runtime": 131.3525, + "eval_samples_per_second": 1.325, + "eval_steps_per_second": 0.167, "step": 3285 }, { @@ -9871,41 +9871,41 @@ }, { "epoch": 12.61, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7958180904388428, - "eval_runtime": 71.2081, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.6798888444900513, + "eval_runtime": 131.4043, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3290 }, { "epoch": 12.62, "learning_rate": 3.1877394636015327e-05, - "loss": 0.0094, + "loss": 0.012, "step": 3295 }, { "epoch": 12.62, - "eval_accuracy": 0.7298850574712644, - "eval_loss": 1.811642050743103, - "eval_runtime": 73.4826, - "eval_samples_per_second": 2.368, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.6489917039871216, + "eval_runtime": 133.8875, + "eval_samples_per_second": 1.3, + "eval_steps_per_second": 0.164, "step": 3295 }, { "epoch": 12.64, "learning_rate": 3.162196679438058e-05, - "loss": 0.0299, + "loss": 0.0055, "step": 3300 }, { "epoch": 12.64, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.8312135934829712, - "eval_runtime": 72.3831, - "eval_samples_per_second": 2.404, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.6284756660461426, + "eval_runtime": 134.5307, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.164, "step": 3300 }, { @@ -9916,116 +9916,116 @@ }, { "epoch": 12.66, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.8625733852386475, - "eval_runtime": 71.8476, - "eval_samples_per_second": 2.422, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.6172682046890259, + "eval_runtime": 131.376, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3305 }, { "epoch": 12.68, "learning_rate": 3.111111111111111e-05, - "loss": 0.0007, + "loss": 0.0001, "step": 3310 }, { "epoch": 12.68, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.8807812929153442, - "eval_runtime": 71.1083, - "eval_samples_per_second": 2.447, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.6175791025161743, + "eval_runtime": 131.4088, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3310 }, { "epoch": 12.7, "learning_rate": 3.085568326947637e-05, - "loss": 0.0002, + "loss": 0.0, "step": 3315 }, { "epoch": 12.7, "eval_accuracy": 0.735632183908046, - "eval_loss": 1.8896175622940063, - "eval_runtime": 71.6802, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_loss": 1.6215569972991943, + "eval_runtime": 131.6808, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3315 }, { "epoch": 12.72, "learning_rate": 3.0600255427841637e-05, - "loss": 0.0003, + "loss": 0.0001, "step": 3320 }, { "epoch": 12.72, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.8892227411270142, - "eval_runtime": 74.5342, - "eval_samples_per_second": 2.334, - "eval_steps_per_second": 0.295, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.6240766048431396, + "eval_runtime": 131.5056, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3320 }, { "epoch": 12.74, "learning_rate": 3.0344827586206897e-05, - "loss": 0.0001, + "loss": 0.0009, "step": 3325 }, { "epoch": 12.74, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.8880239725112915, - "eval_runtime": 73.6161, - "eval_samples_per_second": 2.364, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.6174923181533813, + "eval_runtime": 134.7652, + "eval_samples_per_second": 1.291, + "eval_steps_per_second": 0.163, "step": 3325 }, { "epoch": 12.76, "learning_rate": 3.008939974457216e-05, - "loss": 0.0013, + "loss": 0.0001, "step": 3330 }, { "epoch": 12.76, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.874098539352417, - "eval_runtime": 73.2537, - "eval_samples_per_second": 2.375, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.5911328792572021, + "eval_runtime": 131.6881, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3330 }, { "epoch": 12.78, "learning_rate": 2.9833971902937423e-05, - "loss": 0.0029, + "loss": 0.0, "step": 3335 }, { "epoch": 12.78, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.8415035009384155, - "eval_runtime": 71.8592, - "eval_samples_per_second": 2.421, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.5766935348510742, + "eval_runtime": 131.5549, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3335 }, { "epoch": 12.8, "learning_rate": 2.9578544061302683e-05, - "loss": 0.0, + "loss": 0.0003, "step": 3340 }, { "epoch": 12.8, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.8206106424331665, - "eval_runtime": 71.14, - "eval_samples_per_second": 2.446, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7241379310344828, + "eval_loss": 1.5662565231323242, + "eval_runtime": 131.8731, + "eval_samples_per_second": 1.319, + "eval_steps_per_second": 0.167, "step": 3340 }, { @@ -10036,26 +10036,26 @@ }, { "epoch": 12.82, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.80865478515625, - "eval_runtime": 73.9426, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7298850574712644, + "eval_loss": 1.5604114532470703, + "eval_runtime": 131.5616, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3345 }, { "epoch": 12.84, "learning_rate": 2.906768837803321e-05, - "loss": 0.0003, + "loss": 0.0002, "step": 3350 }, { "epoch": 12.84, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.8006689548492432, - "eval_runtime": 71.1637, - "eval_samples_per_second": 2.445, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.735632183908046, + "eval_loss": 1.5551427602767944, + "eval_runtime": 131.4326, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3350 }, { @@ -10066,26 +10066,26 @@ }, { "epoch": 12.85, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7933427095413208, - "eval_runtime": 71.6538, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7413793103448276, + "eval_loss": 1.5520435571670532, + "eval_runtime": 131.5755, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3355 }, { "epoch": 12.87, "learning_rate": 2.855683269476373e-05, - "loss": 0.0001, + "loss": 0.0002, "step": 3360 }, { "epoch": 12.87, "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7883424758911133, - "eval_runtime": 71.2115, - "eval_samples_per_second": 2.443, - "eval_steps_per_second": 0.309, + "eval_loss": 1.5491315126419067, + "eval_runtime": 131.5638, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3360 }, { @@ -10097,25 +10097,25 @@ { "epoch": 12.89, "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7855582237243652, - "eval_runtime": 71.6601, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_loss": 1.5480037927627563, + "eval_runtime": 134.5953, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.163, "step": 3365 }, { "epoch": 12.91, "learning_rate": 2.8045977011494257e-05, - "loss": 0.0002, + "loss": 0.0003, "step": 3370 }, { "epoch": 12.91, "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7831445932388306, - "eval_runtime": 72.466, - "eval_samples_per_second": 2.401, - "eval_steps_per_second": 0.304, + "eval_loss": 1.5459556579589844, + "eval_runtime": 131.726, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3370 }, { @@ -10127,10 +10127,10 @@ { "epoch": 12.93, "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7811237573623657, - "eval_runtime": 73.4621, - "eval_samples_per_second": 2.369, - "eval_steps_per_second": 0.299, + "eval_loss": 1.5453726053237915, + "eval_runtime": 134.0392, + "eval_samples_per_second": 1.298, + "eval_steps_per_second": 0.164, "step": 3375 }, { @@ -10141,41 +10141,41 @@ }, { "epoch": 12.95, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7792061567306519, - "eval_runtime": 71.133, - "eval_samples_per_second": 2.446, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5440211296081543, + "eval_runtime": 131.4982, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3380 }, { "epoch": 12.97, "learning_rate": 2.727969348659004e-05, - "loss": 0.0003, + "loss": 0.0001, "step": 3385 }, { "epoch": 12.97, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7786797285079956, - "eval_runtime": 71.7413, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5426923036575317, + "eval_runtime": 134.7213, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 3385 }, { "epoch": 12.99, "learning_rate": 2.70242656449553e-05, - "loss": 0.0001, + "loss": 0.0002, "step": 3390 }, { "epoch": 12.99, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7803369760513306, - "eval_runtime": 71.2098, - "eval_samples_per_second": 2.443, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5425446033477783, + "eval_runtime": 131.7157, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3390 }, { @@ -10186,41 +10186,41 @@ }, { "epoch": 13.01, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7803747653961182, - "eval_runtime": 73.577, - "eval_samples_per_second": 2.365, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5422731637954712, + "eval_runtime": 131.664, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3395 }, { "epoch": 13.03, "learning_rate": 2.6513409961685827e-05, - "loss": 0.0001, + "loss": 0.0002, "step": 3400 }, { "epoch": 13.03, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7811692953109741, - "eval_runtime": 71.1922, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.540529489517212, + "eval_runtime": 131.4555, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3400 }, { "epoch": 13.05, "learning_rate": 2.625798212005109e-05, - "loss": 0.0002, + "loss": 0.0001, "step": 3405 }, { "epoch": 13.05, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7811998128890991, - "eval_runtime": 71.7905, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5385899543762207, + "eval_runtime": 131.4962, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3405 }, { @@ -10231,26 +10231,26 @@ }, { "epoch": 13.07, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7812321186065674, - "eval_runtime": 71.2005, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5374879837036133, + "eval_runtime": 131.5346, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3410 }, { "epoch": 13.08, "learning_rate": 2.574712643678161e-05, - "loss": 0.0002, + "loss": 0.0001, "step": 3415 }, { "epoch": 13.08, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7813913822174072, - "eval_runtime": 73.6067, - "eval_samples_per_second": 2.364, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5360500812530518, + "eval_runtime": 131.7287, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3415 }, { @@ -10261,26 +10261,26 @@ }, { "epoch": 13.1, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7811163663864136, - "eval_runtime": 71.1956, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5362660884857178, + "eval_runtime": 131.6875, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3420 }, { "epoch": 13.12, "learning_rate": 2.5236270753512137e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3425 }, { "epoch": 13.12, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7811745405197144, - "eval_runtime": 73.6252, - "eval_samples_per_second": 2.363, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5362850427627563, + "eval_runtime": 131.432, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3425 }, { @@ -10291,11 +10291,11 @@ }, { "epoch": 13.14, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7812987565994263, - "eval_runtime": 73.0837, - "eval_samples_per_second": 2.381, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5356768369674683, + "eval_runtime": 131.5867, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3430 }, { @@ -10306,11 +10306,11 @@ }, { "epoch": 13.16, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7799280881881714, - "eval_runtime": 71.791, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5358167886734009, + "eval_runtime": 134.5615, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.163, "step": 3435 }, { @@ -10321,41 +10321,41 @@ }, { "epoch": 13.18, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7800962924957275, - "eval_runtime": 71.1707, - "eval_samples_per_second": 2.445, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.535969853401184, + "eval_runtime": 131.7354, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3440 }, { "epoch": 13.2, "learning_rate": 2.4214559386973183e-05, - "loss": 0.1536, + "loss": 0.1681, "step": 3445 }, { "epoch": 13.2, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7748968601226807, - "eval_runtime": 71.7615, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5371443033218384, + "eval_runtime": 131.3478, + "eval_samples_per_second": 1.325, + "eval_steps_per_second": 0.167, "step": 3445 }, { "epoch": 13.22, "learning_rate": 2.3959131545338443e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3450 }, { "epoch": 13.22, "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7677268981933594, - "eval_runtime": 73.6553, - "eval_samples_per_second": 2.362, - "eval_steps_per_second": 0.299, + "eval_loss": 1.5422464609146118, + "eval_runtime": 134.7795, + "eval_samples_per_second": 1.291, + "eval_steps_per_second": 0.163, "step": 3450 }, { @@ -10366,11 +10366,11 @@ }, { "epoch": 13.24, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7640659809112549, - "eval_runtime": 71.7772, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.546493649482727, + "eval_runtime": 131.4931, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3455 }, { @@ -10381,101 +10381,101 @@ }, { "epoch": 13.26, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7622041702270508, - "eval_runtime": 71.3282, - "eval_samples_per_second": 2.439, - "eval_steps_per_second": 0.308, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5489312410354614, + "eval_runtime": 131.5674, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3460 }, { "epoch": 13.28, "learning_rate": 2.319284802043423e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3465 }, { "epoch": 13.28, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.760646104812622, - "eval_runtime": 71.7718, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.550827145576477, + "eval_runtime": 134.4819, + "eval_samples_per_second": 1.294, + "eval_steps_per_second": 0.164, "step": 3465 }, { "epoch": 13.3, "learning_rate": 2.293742017879949e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3470 }, { "epoch": 13.3, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7595189809799194, - "eval_runtime": 71.2335, - "eval_samples_per_second": 2.443, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5521552562713623, + "eval_runtime": 131.4751, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3470 }, { "epoch": 13.31, "learning_rate": 2.268199233716475e-05, - "loss": 0.0, + "loss": 0.0001, "step": 3475 }, { "epoch": 13.31, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7589017152786255, - "eval_runtime": 73.1315, - "eval_samples_per_second": 2.379, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5527838468551636, + "eval_runtime": 133.8598, + "eval_samples_per_second": 1.3, + "eval_steps_per_second": 0.164, "step": 3475 }, { "epoch": 13.33, "learning_rate": 2.2426564495530013e-05, - "loss": 0.0108, + "loss": 0.0012, "step": 3480 }, { "epoch": 13.33, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7552155256271362, - "eval_runtime": 74.2293, - "eval_samples_per_second": 2.344, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.551941990852356, + "eval_runtime": 133.9177, + "eval_samples_per_second": 1.299, + "eval_steps_per_second": 0.164, "step": 3480 }, { "epoch": 13.35, "learning_rate": 2.2171136653895273e-05, - "loss": 0.0088, + "loss": 0.116, "step": 3485 }, { "epoch": 13.35, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7531734704971313, - "eval_runtime": 73.069, - "eval_samples_per_second": 2.381, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5588303804397583, + "eval_runtime": 131.58, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3485 }, { "epoch": 13.37, "learning_rate": 2.1915708812260537e-05, - "loss": 0.003, + "loss": 0.0017, "step": 3490 }, { "epoch": 13.37, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7560912370681763, - "eval_runtime": 72.5457, - "eval_samples_per_second": 2.398, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5604760646820068, + "eval_runtime": 131.2524, + "eval_samples_per_second": 1.326, + "eval_steps_per_second": 0.168, "step": 3490 }, { @@ -10486,71 +10486,71 @@ }, { "epoch": 13.39, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.760205864906311, - "eval_runtime": 73.1685, - "eval_samples_per_second": 2.378, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.55997896194458, + "eval_runtime": 131.3434, + "eval_samples_per_second": 1.325, + "eval_steps_per_second": 0.167, "step": 3495 }, { "epoch": 13.41, "learning_rate": 2.1404853128991063e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3500 }, { "epoch": 13.41, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.763502836227417, - "eval_runtime": 74.1089, - "eval_samples_per_second": 2.348, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5588353872299194, + "eval_runtime": 135.0074, + "eval_samples_per_second": 1.289, + "eval_steps_per_second": 0.163, "step": 3500 }, { "epoch": 13.43, "learning_rate": 2.1149425287356323e-05, - "loss": 0.0002, + "loss": 0.0001, "step": 3505 }, { "epoch": 13.43, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.765455961227417, - "eval_runtime": 72.6454, - "eval_samples_per_second": 2.395, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.559090256690979, + "eval_runtime": 131.5397, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3505 }, { "epoch": 13.45, "learning_rate": 2.0893997445721587e-05, - "loss": 0.1119, + "loss": 0.1047, "step": 3510 }, { "epoch": 13.45, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7576346397399902, - "eval_runtime": 72.2316, - "eval_samples_per_second": 2.409, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.565811038017273, + "eval_runtime": 135.6105, + "eval_samples_per_second": 1.283, + "eval_steps_per_second": 0.162, "step": 3510 }, { "epoch": 13.47, "learning_rate": 2.0638569604086847e-05, - "loss": 0.0025, + "loss": 0.0023, "step": 3515 }, { "epoch": 13.47, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7575277090072632, - "eval_runtime": 73.2772, - "eval_samples_per_second": 2.375, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.568742036819458, + "eval_runtime": 131.5718, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3515 }, { @@ -10561,26 +10561,26 @@ }, { "epoch": 13.49, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7568941116333008, - "eval_runtime": 74.9459, - "eval_samples_per_second": 2.322, - "eval_steps_per_second": 0.294, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.569899320602417, + "eval_runtime": 131.5005, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3520 }, { "epoch": 13.51, "learning_rate": 2.012771392081737e-05, - "loss": 0.0, + "loss": 0.0001, "step": 3525 }, { "epoch": 13.51, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7557793855667114, - "eval_runtime": 72.5811, - "eval_samples_per_second": 2.397, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5698680877685547, + "eval_runtime": 132.1871, + "eval_samples_per_second": 1.316, + "eval_steps_per_second": 0.166, "step": 3525 }, { @@ -10591,11 +10591,11 @@ }, { "epoch": 13.52, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7556322813034058, - "eval_runtime": 73.9541, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.57002592086792, + "eval_runtime": 131.6043, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3530 }, { @@ -10606,41 +10606,41 @@ }, { "epoch": 13.54, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7552297115325928, - "eval_runtime": 73.7388, - "eval_samples_per_second": 2.36, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.57023286819458, + "eval_runtime": 131.5771, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3535 }, { "epoch": 13.56, "learning_rate": 1.9361430395913153e-05, - "loss": 0.0, + "loss": 0.0001, "step": 3540 }, { "epoch": 13.56, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.755204439163208, - "eval_runtime": 74.1877, - "eval_samples_per_second": 2.345, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5700526237487793, + "eval_runtime": 131.5081, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3540 }, { "epoch": 13.58, "learning_rate": 1.9106002554278417e-05, - "loss": 0.0002, + "loss": 0.0001, "step": 3545 }, { "epoch": 13.58, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7553986310958862, - "eval_runtime": 73.9835, - "eval_samples_per_second": 2.352, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.568669080734253, + "eval_runtime": 131.5846, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3545 }, { @@ -10651,11 +10651,11 @@ }, { "epoch": 13.6, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.755505084991455, - "eval_runtime": 73.6282, - "eval_samples_per_second": 2.363, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5692752599716187, + "eval_runtime": 131.4298, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3550 }, { @@ -10666,11 +10666,11 @@ }, { "epoch": 13.62, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7556419372558594, - "eval_runtime": 73.3262, - "eval_samples_per_second": 2.373, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5690228939056396, + "eval_runtime": 135.56, + "eval_samples_per_second": 1.284, + "eval_steps_per_second": 0.162, "step": 3555 }, { @@ -10681,11 +10681,11 @@ }, { "epoch": 13.64, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7553131580352783, - "eval_runtime": 72.3509, - "eval_samples_per_second": 2.405, - "eval_steps_per_second": 0.304, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5685721635818481, + "eval_runtime": 131.7676, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3560 }, { @@ -10696,11 +10696,11 @@ }, { "epoch": 13.66, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7552363872528076, - "eval_runtime": 74.575, - "eval_samples_per_second": 2.333, - "eval_steps_per_second": 0.295, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5687892436981201, + "eval_runtime": 131.381, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3565 }, { @@ -10711,11 +10711,11 @@ }, { "epoch": 13.68, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7547740936279297, - "eval_runtime": 74.1586, - "eval_samples_per_second": 2.346, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.569113850593567, + "eval_runtime": 131.5094, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3570 }, { @@ -10726,71 +10726,71 @@ }, { "epoch": 13.7, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7549880743026733, - "eval_runtime": 73.4905, - "eval_samples_per_second": 2.368, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5693120956420898, + "eval_runtime": 131.6036, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3575 }, { "epoch": 13.72, "learning_rate": 1.731800766283525e-05, - "loss": 0.0, + "loss": 0.0001, "step": 3580 }, { "epoch": 13.72, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7544682025909424, - "eval_runtime": 72.5422, - "eval_samples_per_second": 2.399, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5689599514007568, + "eval_runtime": 135.599, + "eval_samples_per_second": 1.283, + "eval_steps_per_second": 0.162, "step": 3580 }, { "epoch": 13.74, "learning_rate": 1.706257982120051e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3585 }, { "epoch": 13.74, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.755007028579712, - "eval_runtime": 73.8715, - "eval_samples_per_second": 2.355, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5693317651748657, + "eval_runtime": 134.7788, + "eval_samples_per_second": 1.291, + "eval_steps_per_second": 0.163, "step": 3585 }, { "epoch": 13.75, "learning_rate": 1.6807151979565773e-05, - "loss": 0.0568, + "loss": 0.0631, "step": 3590 }, { "epoch": 13.75, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7550690174102783, - "eval_runtime": 72.5531, - "eval_samples_per_second": 2.398, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5679577589035034, + "eval_runtime": 134.5103, + "eval_samples_per_second": 1.294, + "eval_steps_per_second": 0.164, "step": 3590 }, { "epoch": 13.77, "learning_rate": 1.6551724137931037e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3595 }, { "epoch": 13.77, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7550101280212402, - "eval_runtime": 75.6777, - "eval_samples_per_second": 2.299, - "eval_steps_per_second": 0.291, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5653505325317383, + "eval_runtime": 131.652, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3595 }, { @@ -10801,11 +10801,11 @@ }, { "epoch": 13.79, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.754870891571045, - "eval_runtime": 72.8303, - "eval_samples_per_second": 2.389, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5649654865264893, + "eval_runtime": 131.6632, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3600 }, { @@ -10816,26 +10816,26 @@ }, { "epoch": 13.81, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7544294595718384, - "eval_runtime": 73.3275, - "eval_samples_per_second": 2.373, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.5650653839111328, + "eval_runtime": 131.6911, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3605 }, { "epoch": 13.83, "learning_rate": 1.578544061302682e-05, - "loss": 0.0528, + "loss": 0.0407, "step": 3610 }, { "epoch": 13.83, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7550995349884033, - "eval_runtime": 71.3272, - "eval_samples_per_second": 2.439, - "eval_steps_per_second": 0.308, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5672154426574707, + "eval_runtime": 131.4084, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3610 }, { @@ -10846,41 +10846,41 @@ }, { "epoch": 13.85, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7551816701889038, - "eval_runtime": 71.892, - "eval_samples_per_second": 2.42, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5693352222442627, + "eval_runtime": 131.3899, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3615 }, { "epoch": 13.87, "learning_rate": 1.5274584929757343e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3620 }, { "epoch": 13.87, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7544273138046265, - "eval_runtime": 71.4145, - "eval_samples_per_second": 2.436, - "eval_steps_per_second": 0.308, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5711853504180908, + "eval_runtime": 131.5375, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3620 }, { "epoch": 13.89, "learning_rate": 1.5019157088122607e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3625 }, { "epoch": 13.89, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7543997764587402, - "eval_runtime": 71.7733, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.572008490562439, + "eval_runtime": 135.5757, + "eval_samples_per_second": 1.283, + "eval_steps_per_second": 0.162, "step": 3625 }, { @@ -10891,41 +10891,41 @@ }, { "epoch": 13.91, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.753382682800293, - "eval_runtime": 71.2431, - "eval_samples_per_second": 2.442, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5722723007202148, + "eval_runtime": 134.6601, + "eval_samples_per_second": 1.292, + "eval_steps_per_second": 0.163, "step": 3630 }, { "epoch": 13.93, "learning_rate": 1.450830140485313e-05, - "loss": 0.0001, + "loss": 0.0002, "step": 3635 }, { "epoch": 13.93, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7532877922058105, - "eval_runtime": 71.7023, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5724270343780518, + "eval_runtime": 131.7012, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3635 }, { "epoch": 13.95, "learning_rate": 1.4252873563218392e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3640 }, { "epoch": 13.95, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7533704042434692, - "eval_runtime": 71.9533, - "eval_samples_per_second": 2.418, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5720359086990356, + "eval_runtime": 134.6156, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.163, "step": 3640 }, { @@ -10936,71 +10936,71 @@ }, { "epoch": 13.97, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7539461851119995, - "eval_runtime": 72.1079, - "eval_samples_per_second": 2.413, - "eval_steps_per_second": 0.305, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5716608762741089, + "eval_runtime": 131.5241, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3645 }, { "epoch": 13.98, "learning_rate": 1.3742017879948915e-05, - "loss": 0.1209, + "loss": 0.0947, "step": 3650 }, { "epoch": 13.98, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7514145374298096, - "eval_runtime": 73.1219, - "eval_samples_per_second": 2.38, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5741726160049438, + "eval_runtime": 131.7395, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3650 }, { "epoch": 14.0, "learning_rate": 1.3486590038314175e-05, - "loss": 0.1395, + "loss": 0.0042, "step": 3655 }, { "epoch": 14.0, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7531005144119263, - "eval_runtime": 71.7851, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.571437120437622, + "eval_runtime": 131.5082, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3655 }, { "epoch": 14.02, "learning_rate": 1.3231162196679438e-05, - "loss": 0.0037, + "loss": 0.0791, "step": 3660 }, { "epoch": 14.02, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7534183263778687, - "eval_runtime": 71.1617, - "eval_samples_per_second": 2.445, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5750482082366943, + "eval_runtime": 131.5289, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3660 }, { "epoch": 14.04, "learning_rate": 1.29757343550447e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3665 }, { "epoch": 14.04, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7549954652786255, - "eval_runtime": 71.701, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.575944185256958, + "eval_runtime": 131.463, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3665 }, { @@ -11011,11 +11011,11 @@ }, { "epoch": 14.06, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.754940390586853, - "eval_runtime": 71.172, - "eval_samples_per_second": 2.445, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.57651948928833, + "eval_runtime": 131.6655, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3670 }, { @@ -11026,11 +11026,11 @@ }, { "epoch": 14.08, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7556744813919067, - "eval_runtime": 71.7906, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5765440464019775, + "eval_runtime": 131.5009, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3675 }, { @@ -11041,11 +11041,11 @@ }, { "epoch": 14.1, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.755606770515442, - "eval_runtime": 71.2401, - "eval_samples_per_second": 2.442, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.576804280281067, + "eval_runtime": 131.5344, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3680 }, { @@ -11056,26 +11056,26 @@ }, { "epoch": 14.12, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7560800313949585, - "eval_runtime": 73.8699, - "eval_samples_per_second": 2.355, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5764987468719482, + "eval_runtime": 131.7171, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3685 }, { "epoch": 14.14, "learning_rate": 1.169859514687101e-05, - "loss": 0.0956, + "loss": 0.0765, "step": 3690 }, { "epoch": 14.14, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.749796748161316, - "eval_runtime": 72.7543, - "eval_samples_per_second": 2.392, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5836353302001953, + "eval_runtime": 131.6805, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3690 }, { @@ -11086,26 +11086,26 @@ }, { "epoch": 14.16, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.745739459991455, - "eval_runtime": 71.7312, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5884052515029907, + "eval_runtime": 131.778, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 3695 }, { "epoch": 14.18, "learning_rate": 1.1187739463601533e-05, - "loss": 0.0001, + "loss": 0.0, "step": 3700 }, { "epoch": 14.18, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7437233924865723, - "eval_runtime": 71.1505, - "eval_samples_per_second": 2.446, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5915166139602661, + "eval_runtime": 131.777, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.167, "step": 3700 }, { @@ -11116,26 +11116,26 @@ }, { "epoch": 14.2, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.742749571800232, - "eval_runtime": 71.695, - "eval_samples_per_second": 2.427, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5935451984405518, + "eval_runtime": 131.5481, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3705 }, { "epoch": 14.21, "learning_rate": 1.0676883780332057e-05, - "loss": 0.0, + "loss": 0.0001, "step": 3710 }, { "epoch": 14.21, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7426731586456299, - "eval_runtime": 71.2028, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5938645601272583, + "eval_runtime": 131.6509, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3710 }, { @@ -11146,26 +11146,26 @@ }, { "epoch": 14.23, - "eval_accuracy": 0.7471264367816092, - "eval_loss": 1.7424765825271606, - "eval_runtime": 71.7529, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5943220853805542, + "eval_runtime": 131.574, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3715 }, { "epoch": 14.25, "learning_rate": 1.016602809706258e-05, - "loss": 0.0558, + "loss": 0.0647, "step": 3720 }, { "epoch": 14.25, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7421131134033203, - "eval_runtime": 71.1886, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.5929863452911377, + "eval_runtime": 131.7035, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3720 }, { @@ -11176,11 +11176,11 @@ }, { "epoch": 14.27, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7416549921035767, - "eval_runtime": 71.8684, - "eval_samples_per_second": 2.421, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.5920848846435547, + "eval_runtime": 131.5269, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3725 }, { @@ -11191,26 +11191,26 @@ }, { "epoch": 14.29, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7416775226593018, - "eval_runtime": 71.2129, - "eval_samples_per_second": 2.443, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.5917651653289795, + "eval_runtime": 131.5915, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3730 }, { "epoch": 14.31, "learning_rate": 9.399744572158365e-06, - "loss": 0.0, + "loss": 0.0001, "step": 3735 }, { "epoch": 14.31, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7417203187942505, - "eval_runtime": 71.6762, - "eval_samples_per_second": 2.428, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.5915542840957642, + "eval_runtime": 131.612, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3735 }, { @@ -11221,116 +11221,116 @@ }, { "epoch": 14.33, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7419242858886719, - "eval_runtime": 71.2197, - "eval_samples_per_second": 2.443, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.5907422304153442, + "eval_runtime": 131.6774, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3740 }, { "epoch": 14.35, "learning_rate": 8.88888888888889e-06, - "loss": 0.0078, + "loss": 0.0075, "step": 3745 }, { "epoch": 14.35, - "eval_accuracy": 0.7413793103448276, - "eval_loss": 1.7439537048339844, - "eval_runtime": 73.4872, - "eval_samples_per_second": 2.368, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.764367816091954, + "eval_loss": 1.585798978805542, + "eval_runtime": 131.6382, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3745 }, { "epoch": 14.37, "learning_rate": 8.633461047254152e-06, - "loss": 0.0001, + "loss": 0.0, "step": 3750 }, { "epoch": 14.37, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7466919422149658, - "eval_runtime": 72.9233, - "eval_samples_per_second": 2.386, - "eval_steps_per_second": 0.302, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5811045169830322, + "eval_runtime": 131.3857, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3750 }, { "epoch": 14.39, "learning_rate": 8.378033205619413e-06, - "loss": 0.0009, + "loss": 0.0033, "step": 3755 }, { "epoch": 14.39, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7497990131378174, - "eval_runtime": 71.7246, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.572999119758606, + "eval_runtime": 131.4285, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3755 }, { "epoch": 14.41, "learning_rate": 8.122605363984675e-06, - "loss": 0.0992, + "loss": 0.0814, "step": 3760 }, { "epoch": 14.41, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7496720552444458, - "eval_runtime": 71.1652, - "eval_samples_per_second": 2.445, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5697554349899292, + "eval_runtime": 131.595, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3760 }, { "epoch": 14.43, "learning_rate": 7.867177522349937e-06, - "loss": 0.0004, + "loss": 0.0001, "step": 3765 }, { "epoch": 14.43, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7489051818847656, - "eval_runtime": 71.7293, - "eval_samples_per_second": 2.426, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5690526962280273, + "eval_runtime": 131.6676, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3765 }, { "epoch": 14.44, "learning_rate": 7.611749680715198e-06, - "loss": 0.0001, + "loss": 0.0002, "step": 3770 }, { "epoch": 14.44, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7488287687301636, - "eval_runtime": 71.1909, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5678595304489136, + "eval_runtime": 131.4813, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3770 }, { "epoch": 14.46, "learning_rate": 7.35632183908046e-06, - "loss": 0.0001, + "loss": 0.0002, "step": 3775 }, { "epoch": 14.46, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7490508556365967, - "eval_runtime": 71.7426, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5677728652954102, + "eval_runtime": 131.4831, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3775 }, { @@ -11341,11 +11341,11 @@ }, { "epoch": 14.48, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7489107847213745, - "eval_runtime": 71.2131, - "eval_samples_per_second": 2.443, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.5681380033493042, + "eval_runtime": 131.5915, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3780 }, { @@ -11356,41 +11356,41 @@ }, { "epoch": 14.5, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7490137815475464, - "eval_runtime": 71.8519, - "eval_samples_per_second": 2.422, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.567604899406433, + "eval_runtime": 131.6719, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3785 }, { "epoch": 14.52, "learning_rate": 6.590038314176246e-06, - "loss": 0.1097, + "loss": 0.0058, "step": 3790 }, { "epoch": 14.52, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.750510573387146, - "eval_runtime": 71.3059, - "eval_samples_per_second": 2.44, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7586206896551724, + "eval_loss": 1.564001441001892, + "eval_runtime": 135.5216, + "eval_samples_per_second": 1.284, + "eval_steps_per_second": 0.162, "step": 3790 }, { "epoch": 14.54, "learning_rate": 6.3346104725415075e-06, - "loss": 0.0001, + "loss": 0.0002, "step": 3795 }, { "epoch": 14.54, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7523186206817627, - "eval_runtime": 71.7857, - "eval_samples_per_second": 2.424, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5616205930709839, + "eval_runtime": 131.5494, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3795 }, { @@ -11401,11 +11401,11 @@ }, { "epoch": 14.56, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7536077499389648, - "eval_runtime": 73.2363, - "eval_samples_per_second": 2.376, - "eval_steps_per_second": 0.3, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.5597946643829346, + "eval_runtime": 131.5269, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3800 }, { @@ -11416,26 +11416,26 @@ }, { "epoch": 14.58, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7539523839950562, - "eval_runtime": 73.6071, - "eval_samples_per_second": 2.364, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7471264367816092, + "eval_loss": 1.559468388557434, + "eval_runtime": 131.6538, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3805 }, { "epoch": 14.6, "learning_rate": 5.568326947637293e-06, - "loss": 0.0002, + "loss": 0.0001, "step": 3810 }, { "epoch": 14.6, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7543648481369019, - "eval_runtime": 72.9966, - "eval_samples_per_second": 2.384, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5583645105361938, + "eval_runtime": 131.7004, + "eval_samples_per_second": 1.321, + "eval_steps_per_second": 0.167, "step": 3810 }, { @@ -11446,11 +11446,11 @@ }, { "epoch": 14.62, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7542980909347534, - "eval_runtime": 71.752, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5580310821533203, + "eval_runtime": 131.4921, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3815 }, { @@ -11461,26 +11461,26 @@ }, { "epoch": 14.64, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7545336484909058, - "eval_runtime": 71.8754, - "eval_samples_per_second": 2.421, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5573028326034546, + "eval_runtime": 131.4684, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3820 }, { "epoch": 14.66, "learning_rate": 4.802043422733078e-06, - "loss": 0.0002, + "loss": 0.0001, "step": 3825 }, { "epoch": 14.66, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.754623293876648, - "eval_runtime": 71.767, - "eval_samples_per_second": 2.425, - "eval_steps_per_second": 0.307, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5569429397583008, + "eval_runtime": 131.5291, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3825 }, { @@ -11491,11 +11491,11 @@ }, { "epoch": 14.67, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7546714544296265, - "eval_runtime": 73.0008, - "eval_samples_per_second": 2.384, - "eval_steps_per_second": 0.301, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5563690662384033, + "eval_runtime": 131.4559, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3830 }, { @@ -11506,11 +11506,11 @@ }, { "epoch": 14.69, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7549469470977783, - "eval_runtime": 71.8595, - "eval_samples_per_second": 2.421, - "eval_steps_per_second": 0.306, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5566602945327759, + "eval_runtime": 131.5184, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3835 }, { @@ -11521,26 +11521,26 @@ }, { "epoch": 14.71, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7555478811264038, - "eval_runtime": 71.1999, - "eval_samples_per_second": 2.444, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5568325519561768, + "eval_runtime": 131.496, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3840 }, { "epoch": 14.73, "learning_rate": 3.7803320561941254e-06, - "loss": 0.0284, + "loss": 0.0205, "step": 3845 }, { "epoch": 14.73, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7555177211761475, - "eval_runtime": 73.5037, - "eval_samples_per_second": 2.367, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.557239055633545, + "eval_runtime": 131.3743, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3845 }, { @@ -11551,11 +11551,11 @@ }, { "epoch": 14.75, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7556744813919067, - "eval_runtime": 71.2208, - "eval_samples_per_second": 2.443, - "eval_steps_per_second": 0.309, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5573699474334717, + "eval_runtime": 131.5683, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3850 }, { @@ -11566,11 +11566,11 @@ }, { "epoch": 14.77, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7554370164871216, - "eval_runtime": 75.3145, - "eval_samples_per_second": 2.31, - "eval_steps_per_second": 0.292, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5578587055206299, + "eval_runtime": 134.8968, + "eval_samples_per_second": 1.29, + "eval_steps_per_second": 0.163, "step": 3855 }, { @@ -11581,26 +11581,26 @@ }, { "epoch": 14.79, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7560198307037354, - "eval_runtime": 74.6084, - "eval_samples_per_second": 2.332, - "eval_steps_per_second": 0.295, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.557448148727417, + "eval_runtime": 131.5372, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3860 }, { "epoch": 14.81, "learning_rate": 2.7586206896551725e-06, - "loss": 0.0001, + "loss": 0.0002, "step": 3865 }, { "epoch": 14.81, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7560211420059204, - "eval_runtime": 73.5059, - "eval_samples_per_second": 2.367, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5573031902313232, + "eval_runtime": 134.5697, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.163, "step": 3865 }, { @@ -11611,26 +11611,26 @@ }, { "epoch": 14.83, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7558296918869019, - "eval_runtime": 72.5044, - "eval_samples_per_second": 2.4, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5574105978012085, + "eval_runtime": 131.472, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3870 }, { "epoch": 14.85, "learning_rate": 2.2477650063856962e-06, - "loss": 0.0001, + "loss": 0.0, "step": 3875 }, { "epoch": 14.85, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7559691667556763, - "eval_runtime": 74.4471, - "eval_samples_per_second": 2.337, - "eval_steps_per_second": 0.296, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5576403141021729, + "eval_runtime": 131.5008, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3875 }, { @@ -11641,11 +11641,11 @@ }, { "epoch": 14.87, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7558660507202148, - "eval_runtime": 72.5388, - "eval_samples_per_second": 2.399, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5580261945724487, + "eval_runtime": 131.6634, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3880 }, { @@ -11656,11 +11656,11 @@ }, { "epoch": 14.89, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7562229633331299, - "eval_runtime": 72.5804, - "eval_samples_per_second": 2.397, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5581773519515991, + "eval_runtime": 132.4108, + "eval_samples_per_second": 1.314, + "eval_steps_per_second": 0.166, "step": 3885 }, { @@ -11671,96 +11671,96 @@ }, { "epoch": 14.9, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7563174962997437, - "eval_runtime": 74.0885, - "eval_samples_per_second": 2.349, - "eval_steps_per_second": 0.297, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5580536127090454, + "eval_runtime": 131.3966, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3890 }, { "epoch": 14.92, "learning_rate": 1.2260536398467433e-06, - "loss": 0.0, + "loss": 0.0001, "step": 3895 }, { "epoch": 14.92, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.756478190422058, - "eval_runtime": 76.613, - "eval_samples_per_second": 2.271, - "eval_steps_per_second": 0.287, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.558103322982788, + "eval_runtime": 131.4551, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3895 }, { "epoch": 14.94, "learning_rate": 9.706257982120052e-07, - "loss": 0.0, + "loss": 0.0001, "step": 3900 }, { "epoch": 14.94, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.756520390510559, - "eval_runtime": 72.5435, - "eval_samples_per_second": 2.399, - "eval_steps_per_second": 0.303, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5582367181777954, + "eval_runtime": 131.4681, + "eval_samples_per_second": 1.324, + "eval_steps_per_second": 0.167, "step": 3900 }, { "epoch": 14.96, "learning_rate": 7.15197956577267e-07, - "loss": 0.0001, + "loss": 0.0, "step": 3905 }, { "epoch": 14.96, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.7564539909362793, - "eval_runtime": 74.574, - "eval_samples_per_second": 2.333, - "eval_steps_per_second": 0.295, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5579785108566284, + "eval_runtime": 131.5649, + "eval_samples_per_second": 1.323, + "eval_steps_per_second": 0.167, "step": 3905 }, { "epoch": 14.98, "learning_rate": 4.5977011494252875e-07, - "loss": 0.1303, + "loss": 0.0969, "step": 3910 }, { "epoch": 14.98, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.756199598312378, - "eval_runtime": 73.9195, - "eval_samples_per_second": 2.354, - "eval_steps_per_second": 0.298, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5584545135498047, + "eval_runtime": 134.4637, + "eval_samples_per_second": 1.294, + "eval_steps_per_second": 0.164, "step": 3910 }, { "epoch": 15.0, "learning_rate": 2.0434227330779057e-07, - "loss": 0.0001, + "loss": 0.0, "step": 3915 }, { "epoch": 15.0, - "eval_accuracy": 0.735632183908046, - "eval_loss": 1.755522608757019, - "eval_runtime": 73.61, - "eval_samples_per_second": 2.364, - "eval_steps_per_second": 0.299, + "eval_accuracy": 0.7528735632183908, + "eval_loss": 1.5588274002075195, + "eval_runtime": 131.5989, + "eval_samples_per_second": 1.322, + "eval_steps_per_second": 0.167, "step": 3915 }, { "epoch": 15.0, "step": 3915, "total_flos": 1.592360968692695e+18, - "train_loss": 0.5163871185302369, - "train_runtime": 118001.491, - "train_samples_per_second": 0.199, - "train_steps_per_second": 0.033 + "train_loss": 0.45436480764952814, + "train_runtime": 152136.0702, + "train_samples_per_second": 0.154, + "train_steps_per_second": 0.026 } ], "logging_steps": 5,