{ "best_metric": 0.9007891770011274, "best_model_checkpoint": "swin-base-patch4-window7-224-in22k-finetuned-lora-ISIC-2019/checkpoint-6000", "epoch": 99.2, "eval_steps": 500, "global_step": 6200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 0.0009983870967741936, "loss": 1.5414, "step": 10 }, { "epoch": 0.32, "learning_rate": 0.0009967741935483871, "loss": 1.079, "step": 20 }, { "epoch": 0.48, "learning_rate": 0.0009951612903225807, "loss": 0.9569, "step": 30 }, { "epoch": 0.64, "learning_rate": 0.0009935483870967743, "loss": 0.9056, "step": 40 }, { "epoch": 0.8, "learning_rate": 0.0009919354838709678, "loss": 0.8682, "step": 50 }, { "epoch": 0.96, "learning_rate": 0.0009903225806451614, "loss": 0.858, "step": 60 }, { "epoch": 0.99, "eval_accuracy": 0.733934611048478, "eval_loss": 0.7348718047142029, "eval_runtime": 45.7088, "eval_samples_per_second": 38.811, "eval_steps_per_second": 0.613, "step": 62 }, { "epoch": 1.12, "learning_rate": 0.000988709677419355, "loss": 0.8064, "step": 70 }, { "epoch": 1.28, "learning_rate": 0.0009870967741935483, "loss": 0.7615, "step": 80 }, { "epoch": 1.44, "learning_rate": 0.000985483870967742, "loss": 0.7629, "step": 90 }, { "epoch": 1.6, "learning_rate": 0.0009838709677419356, "loss": 0.7456, "step": 100 }, { "epoch": 1.76, "learning_rate": 0.0009822580645161292, "loss": 0.7168, "step": 110 }, { "epoch": 1.92, "learning_rate": 0.0009806451612903225, "loss": 0.7403, "step": 120 }, { "epoch": 2.0, "eval_accuracy": 0.7762119503945885, "eval_loss": 0.6363512873649597, "eval_runtime": 46.1661, "eval_samples_per_second": 38.426, "eval_steps_per_second": 0.607, "step": 125 }, { "epoch": 2.08, "learning_rate": 0.000979032258064516, "loss": 0.7194, "step": 130 }, { "epoch": 2.24, "learning_rate": 0.0009774193548387096, "loss": 0.6984, "step": 140 }, { "epoch": 2.4, "learning_rate": 0.0009758064516129033, "loss": 0.6867, "step": 150 }, { "epoch": 2.56, "learning_rate": 0.0009741935483870968, "loss": 0.6767, "step": 160 }, { "epoch": 2.72, "learning_rate": 0.0009725806451612903, "loss": 0.6604, "step": 170 }, { "epoch": 2.88, "learning_rate": 0.0009709677419354839, "loss": 0.675, "step": 180 }, { "epoch": 2.99, "eval_accuracy": 0.7998872604284104, "eval_loss": 0.5776907801628113, "eval_runtime": 36.9833, "eval_samples_per_second": 47.968, "eval_steps_per_second": 0.757, "step": 187 }, { "epoch": 3.04, "learning_rate": 0.0009693548387096774, "loss": 0.6548, "step": 190 }, { "epoch": 3.2, "learning_rate": 0.000967741935483871, "loss": 0.6551, "step": 200 }, { "epoch": 3.36, "learning_rate": 0.0009661290322580646, "loss": 0.6212, "step": 210 }, { "epoch": 3.52, "learning_rate": 0.0009645161290322581, "loss": 0.6351, "step": 220 }, { "epoch": 3.68, "learning_rate": 0.0009629032258064516, "loss": 0.6244, "step": 230 }, { "epoch": 3.84, "learning_rate": 0.0009612903225806452, "loss": 0.6073, "step": 240 }, { "epoch": 4.0, "learning_rate": 0.0009596774193548388, "loss": 0.6309, "step": 250 }, { "epoch": 4.0, "eval_accuracy": 0.7874859075535513, "eval_loss": 0.570148229598999, "eval_runtime": 29.3907, "eval_samples_per_second": 60.359, "eval_steps_per_second": 0.953, "step": 250 }, { "epoch": 4.16, "learning_rate": 0.0009580645161290322, "loss": 0.5976, "step": 260 }, { "epoch": 4.32, "learning_rate": 0.0009564516129032258, "loss": 0.5955, "step": 270 }, { "epoch": 4.48, "learning_rate": 0.0009548387096774193, "loss": 0.5959, "step": 280 }, { "epoch": 4.64, "learning_rate": 0.000953225806451613, "loss": 0.5927, "step": 290 }, { "epoch": 4.8, "learning_rate": 0.0009516129032258065, "loss": 0.5776, "step": 300 }, { "epoch": 4.96, "learning_rate": 0.00095, "loss": 0.5734, "step": 310 }, { "epoch": 4.99, "eval_accuracy": 0.8015783540022547, "eval_loss": 0.5293604135513306, "eval_runtime": 29.4268, "eval_samples_per_second": 60.285, "eval_steps_per_second": 0.952, "step": 312 }, { "epoch": 5.12, "learning_rate": 0.0009483870967741936, "loss": 0.5632, "step": 320 }, { "epoch": 5.28, "learning_rate": 0.0009467741935483871, "loss": 0.5503, "step": 330 }, { "epoch": 5.44, "learning_rate": 0.0009451612903225807, "loss": 0.5658, "step": 340 }, { "epoch": 5.6, "learning_rate": 0.0009435483870967742, "loss": 0.545, "step": 350 }, { "epoch": 5.76, "learning_rate": 0.0009419354838709677, "loss": 0.5667, "step": 360 }, { "epoch": 5.92, "learning_rate": 0.0009403225806451613, "loss": 0.5338, "step": 370 }, { "epoch": 6.0, "eval_accuracy": 0.8010146561443067, "eval_loss": 0.5417840480804443, "eval_runtime": 29.4769, "eval_samples_per_second": 60.183, "eval_steps_per_second": 0.95, "step": 375 }, { "epoch": 6.08, "learning_rate": 0.0009387096774193549, "loss": 0.5827, "step": 380 }, { "epoch": 6.24, "learning_rate": 0.0009370967741935485, "loss": 0.5337, "step": 390 }, { "epoch": 6.4, "learning_rate": 0.0009354838709677419, "loss": 0.5306, "step": 400 }, { "epoch": 6.56, "learning_rate": 0.0009338709677419355, "loss": 0.5738, "step": 410 }, { "epoch": 6.72, "learning_rate": 0.000932258064516129, "loss": 0.5418, "step": 420 }, { "epoch": 6.88, "learning_rate": 0.0009308064516129033, "loss": 0.5104, "step": 430 }, { "epoch": 6.99, "eval_accuracy": 0.8179255918827508, "eval_loss": 0.5056995749473572, "eval_runtime": 29.8376, "eval_samples_per_second": 59.455, "eval_steps_per_second": 0.938, "step": 437 }, { "epoch": 7.04, "learning_rate": 0.0009291935483870968, "loss": 0.5137, "step": 440 }, { "epoch": 7.2, "learning_rate": 0.0009275806451612904, "loss": 0.5276, "step": 450 }, { "epoch": 7.36, "learning_rate": 0.000925967741935484, "loss": 0.5192, "step": 460 }, { "epoch": 7.52, "learning_rate": 0.0009243548387096774, "loss": 0.5012, "step": 470 }, { "epoch": 7.68, "learning_rate": 0.000922741935483871, "loss": 0.519, "step": 480 }, { "epoch": 7.84, "learning_rate": 0.0009211290322580645, "loss": 0.5067, "step": 490 }, { "epoch": 8.0, "learning_rate": 0.0009195161290322581, "loss": 0.5091, "step": 500 }, { "epoch": 8.0, "eval_accuracy": 0.8207440811724915, "eval_loss": 0.5009720325469971, "eval_runtime": 29.8933, "eval_samples_per_second": 59.344, "eval_steps_per_second": 0.937, "step": 500 }, { "epoch": 8.16, "learning_rate": 0.0009179032258064516, "loss": 0.4937, "step": 510 }, { "epoch": 8.32, "learning_rate": 0.0009162903225806452, "loss": 0.4894, "step": 520 }, { "epoch": 8.48, "learning_rate": 0.0009146774193548387, "loss": 0.4887, "step": 530 }, { "epoch": 8.64, "learning_rate": 0.0009130645161290323, "loss": 0.5252, "step": 540 }, { "epoch": 8.8, "learning_rate": 0.0009114516129032259, "loss": 0.5007, "step": 550 }, { "epoch": 8.96, "learning_rate": 0.0009098387096774193, "loss": 0.4678, "step": 560 }, { "epoch": 8.99, "eval_accuracy": 0.8246899661781285, "eval_loss": 0.4757310152053833, "eval_runtime": 29.4337, "eval_samples_per_second": 60.271, "eval_steps_per_second": 0.951, "step": 562 }, { "epoch": 9.12, "learning_rate": 0.0009082258064516129, "loss": 0.4527, "step": 570 }, { "epoch": 9.28, "learning_rate": 0.0009066129032258064, "loss": 0.4553, "step": 580 }, { "epoch": 9.44, "learning_rate": 0.0009050000000000001, "loss": 0.4927, "step": 590 }, { "epoch": 9.6, "learning_rate": 0.0009033870967741937, "loss": 0.4415, "step": 600 }, { "epoch": 9.76, "learning_rate": 0.0009017741935483871, "loss": 0.4628, "step": 610 }, { "epoch": 9.92, "learning_rate": 0.0009001612903225807, "loss": 0.467, "step": 620 }, { "epoch": 10.0, "eval_accuracy": 0.8151071025930101, "eval_loss": 0.4579251706600189, "eval_runtime": 29.9049, "eval_samples_per_second": 59.321, "eval_steps_per_second": 0.936, "step": 625 }, { "epoch": 10.08, "learning_rate": 0.0008985483870967742, "loss": 0.4398, "step": 630 }, { "epoch": 10.24, "learning_rate": 0.0008969354838709678, "loss": 0.4604, "step": 640 }, { "epoch": 10.4, "learning_rate": 0.0008953225806451612, "loss": 0.4375, "step": 650 }, { "epoch": 10.56, "learning_rate": 0.0008937096774193548, "loss": 0.4526, "step": 660 }, { "epoch": 10.72, "learning_rate": 0.0008920967741935483, "loss": 0.4249, "step": 670 }, { "epoch": 10.88, "learning_rate": 0.000890483870967742, "loss": 0.4416, "step": 680 }, { "epoch": 10.99, "eval_accuracy": 0.8314543404735062, "eval_loss": 0.4649556279182434, "eval_runtime": 29.2149, "eval_samples_per_second": 60.722, "eval_steps_per_second": 0.958, "step": 687 }, { "epoch": 11.04, "learning_rate": 0.0008888709677419356, "loss": 0.4438, "step": 690 }, { "epoch": 11.2, "learning_rate": 0.000887258064516129, "loss": 0.404, "step": 700 }, { "epoch": 11.36, "learning_rate": 0.0008856451612903226, "loss": 0.4364, "step": 710 }, { "epoch": 11.52, "learning_rate": 0.0008840322580645161, "loss": 0.4046, "step": 720 }, { "epoch": 11.68, "learning_rate": 0.0008824193548387097, "loss": 0.4116, "step": 730 }, { "epoch": 11.84, "learning_rate": 0.0008808064516129033, "loss": 0.4447, "step": 740 }, { "epoch": 12.0, "learning_rate": 0.0008791935483870967, "loss": 0.4277, "step": 750 }, { "epoch": 12.0, "eval_accuracy": 0.8404735062006764, "eval_loss": 0.440464586019516, "eval_runtime": 29.0063, "eval_samples_per_second": 61.159, "eval_steps_per_second": 0.965, "step": 750 }, { "epoch": 12.16, "learning_rate": 0.0008775806451612904, "loss": 0.4078, "step": 760 }, { "epoch": 12.32, "learning_rate": 0.0008759677419354839, "loss": 0.4216, "step": 770 }, { "epoch": 12.48, "learning_rate": 0.0008743548387096775, "loss": 0.4011, "step": 780 }, { "epoch": 12.64, "learning_rate": 0.000872741935483871, "loss": 0.4047, "step": 790 }, { "epoch": 12.8, "learning_rate": 0.0008711290322580645, "loss": 0.4297, "step": 800 }, { "epoch": 12.96, "learning_rate": 0.000869516129032258, "loss": 0.4261, "step": 810 }, { "epoch": 12.99, "eval_accuracy": 0.8387824126268321, "eval_loss": 0.44135671854019165, "eval_runtime": 29.101, "eval_samples_per_second": 60.96, "eval_steps_per_second": 0.962, "step": 812 }, { "epoch": 13.12, "learning_rate": 0.0008679032258064516, "loss": 0.4045, "step": 820 }, { "epoch": 13.28, "learning_rate": 0.0008662903225806452, "loss": 0.3814, "step": 830 }, { "epoch": 13.44, "learning_rate": 0.0008646774193548387, "loss": 0.3865, "step": 840 }, { "epoch": 13.6, "learning_rate": 0.0008630645161290323, "loss": 0.3665, "step": 850 }, { "epoch": 13.76, "learning_rate": 0.0008614516129032258, "loss": 0.3792, "step": 860 }, { "epoch": 13.92, "learning_rate": 0.0008598387096774194, "loss": 0.4016, "step": 870 }, { "epoch": 14.0, "eval_accuracy": 0.8286358511837655, "eval_loss": 0.43920814990997314, "eval_runtime": 28.9441, "eval_samples_per_second": 61.291, "eval_steps_per_second": 0.967, "step": 875 }, { "epoch": 14.08, "learning_rate": 0.000858225806451613, "loss": 0.3718, "step": 880 }, { "epoch": 14.24, "learning_rate": 0.0008566129032258064, "loss": 0.38, "step": 890 }, { "epoch": 14.4, "learning_rate": 0.000855, "loss": 0.3489, "step": 900 }, { "epoch": 14.56, "learning_rate": 0.0008533870967741935, "loss": 0.3882, "step": 910 }, { "epoch": 14.72, "learning_rate": 0.0008517741935483872, "loss": 0.3825, "step": 920 }, { "epoch": 14.88, "learning_rate": 0.0008501612903225807, "loss": 0.3729, "step": 930 }, { "epoch": 14.99, "eval_accuracy": 0.8280721533258174, "eval_loss": 0.4471096396446228, "eval_runtime": 28.7947, "eval_samples_per_second": 61.609, "eval_steps_per_second": 0.972, "step": 937 }, { "epoch": 15.04, "learning_rate": 0.0008485483870967742, "loss": 0.4284, "step": 940 }, { "epoch": 15.2, "learning_rate": 0.0008469354838709678, "loss": 0.395, "step": 950 }, { "epoch": 15.36, "learning_rate": 0.0008453225806451613, "loss": 0.3531, "step": 960 }, { "epoch": 15.52, "learning_rate": 0.0008437096774193549, "loss": 0.354, "step": 970 }, { "epoch": 15.68, "learning_rate": 0.0008420967741935483, "loss": 0.3698, "step": 980 }, { "epoch": 15.84, "learning_rate": 0.0008404838709677419, "loss": 0.3465, "step": 990 }, { "epoch": 16.0, "learning_rate": 0.0008388709677419355, "loss": 0.3813, "step": 1000 }, { "epoch": 16.0, "eval_accuracy": 0.8432919954904171, "eval_loss": 0.41551458835601807, "eval_runtime": 29.7739, "eval_samples_per_second": 59.582, "eval_steps_per_second": 0.94, "step": 1000 }, { "epoch": 16.16, "learning_rate": 0.0008372580645161291, "loss": 0.3449, "step": 1010 }, { "epoch": 16.32, "learning_rate": 0.0008356451612903227, "loss": 0.3651, "step": 1020 }, { "epoch": 16.48, "learning_rate": 0.0008340322580645161, "loss": 0.3519, "step": 1030 }, { "epoch": 16.64, "learning_rate": 0.0008324193548387097, "loss": 0.3669, "step": 1040 }, { "epoch": 16.8, "learning_rate": 0.0008308064516129032, "loss": 0.3629, "step": 1050 }, { "epoch": 16.96, "learning_rate": 0.0008291935483870968, "loss": 0.3454, "step": 1060 }, { "epoch": 16.99, "eval_accuracy": 0.8365276211950394, "eval_loss": 0.4322459399700165, "eval_runtime": 29.8666, "eval_samples_per_second": 59.397, "eval_steps_per_second": 0.938, "step": 1062 }, { "epoch": 17.12, "learning_rate": 0.0008275806451612903, "loss": 0.3199, "step": 1070 }, { "epoch": 17.28, "learning_rate": 0.0008259677419354839, "loss": 0.3138, "step": 1080 }, { "epoch": 17.44, "learning_rate": 0.0008243548387096775, "loss": 0.3293, "step": 1090 }, { "epoch": 17.6, "learning_rate": 0.000822741935483871, "loss": 0.3411, "step": 1100 }, { "epoch": 17.76, "learning_rate": 0.0008211290322580646, "loss": 0.3326, "step": 1110 }, { "epoch": 17.92, "learning_rate": 0.000819516129032258, "loss": 0.3639, "step": 1120 }, { "epoch": 18.0, "eval_accuracy": 0.8359639233370914, "eval_loss": 0.4332345724105835, "eval_runtime": 29.3927, "eval_samples_per_second": 60.355, "eval_steps_per_second": 0.953, "step": 1125 }, { "epoch": 18.08, "learning_rate": 0.0008179032258064516, "loss": 0.3441, "step": 1130 }, { "epoch": 18.24, "learning_rate": 0.0008162903225806451, "loss": 0.2963, "step": 1140 }, { "epoch": 18.4, "learning_rate": 0.0008146774193548387, "loss": 0.2963, "step": 1150 }, { "epoch": 18.56, "learning_rate": 0.0008130645161290324, "loss": 0.3363, "step": 1160 }, { "epoch": 18.72, "learning_rate": 0.0008114516129032258, "loss": 0.3429, "step": 1170 }, { "epoch": 18.88, "learning_rate": 0.0008098387096774194, "loss": 0.3393, "step": 1180 }, { "epoch": 18.99, "eval_accuracy": 0.8523111612175873, "eval_loss": 0.4190393388271332, "eval_runtime": 29.5495, "eval_samples_per_second": 60.035, "eval_steps_per_second": 0.948, "step": 1187 }, { "epoch": 19.04, "learning_rate": 0.0008082258064516129, "loss": 0.3017, "step": 1190 }, { "epoch": 19.2, "learning_rate": 0.0008066129032258065, "loss": 0.3258, "step": 1200 }, { "epoch": 19.36, "learning_rate": 0.0008051612903225806, "loss": 0.3028, "step": 1210 }, { "epoch": 19.52, "learning_rate": 0.0008035483870967743, "loss": 0.3226, "step": 1220 }, { "epoch": 19.68, "learning_rate": 0.0008019354838709677, "loss": 0.3502, "step": 1230 }, { "epoch": 19.84, "learning_rate": 0.0008003225806451613, "loss": 0.3132, "step": 1240 }, { "epoch": 20.0, "learning_rate": 0.0007987096774193549, "loss": 0.3135, "step": 1250 }, { "epoch": 20.0, "eval_accuracy": 0.8534385569334837, "eval_loss": 0.41664600372314453, "eval_runtime": 29.6803, "eval_samples_per_second": 59.77, "eval_steps_per_second": 0.943, "step": 1250 }, { "epoch": 20.16, "learning_rate": 0.0007970967741935484, "loss": 0.2775, "step": 1260 }, { "epoch": 20.32, "learning_rate": 0.000795483870967742, "loss": 0.284, "step": 1270 }, { "epoch": 20.48, "learning_rate": 0.0007938709677419354, "loss": 0.2786, "step": 1280 }, { "epoch": 20.64, "learning_rate": 0.000792258064516129, "loss": 0.3094, "step": 1290 }, { "epoch": 20.8, "learning_rate": 0.0007906451612903227, "loss": 0.3377, "step": 1300 }, { "epoch": 20.96, "learning_rate": 0.0007890322580645162, "loss": 0.3094, "step": 1310 }, { "epoch": 20.99, "eval_accuracy": 0.8562570462232244, "eval_loss": 0.4004518687725067, "eval_runtime": 29.6498, "eval_samples_per_second": 59.832, "eval_steps_per_second": 0.944, "step": 1312 }, { "epoch": 21.12, "learning_rate": 0.0007874193548387098, "loss": 0.2895, "step": 1320 }, { "epoch": 21.28, "learning_rate": 0.0007858064516129032, "loss": 0.296, "step": 1330 }, { "epoch": 21.44, "learning_rate": 0.0007841935483870968, "loss": 0.275, "step": 1340 }, { "epoch": 21.6, "learning_rate": 0.0007825806451612903, "loss": 0.3178, "step": 1350 }, { "epoch": 21.76, "learning_rate": 0.0007809677419354839, "loss": 0.2489, "step": 1360 }, { "epoch": 21.92, "learning_rate": 0.0007793548387096773, "loss": 0.3263, "step": 1370 }, { "epoch": 22.0, "eval_accuracy": 0.8494926719278467, "eval_loss": 0.4399039149284363, "eval_runtime": 29.8845, "eval_samples_per_second": 59.362, "eval_steps_per_second": 0.937, "step": 1375 }, { "epoch": 22.08, "learning_rate": 0.0007777419354838709, "loss": 0.2737, "step": 1380 }, { "epoch": 22.24, "learning_rate": 0.0007761290322580646, "loss": 0.2847, "step": 1390 }, { "epoch": 22.4, "learning_rate": 0.0007745161290322581, "loss": 0.2611, "step": 1400 }, { "epoch": 22.56, "learning_rate": 0.0007729032258064517, "loss": 0.2686, "step": 1410 }, { "epoch": 22.72, "learning_rate": 0.0007712903225806451, "loss": 0.2846, "step": 1420 }, { "epoch": 22.88, "learning_rate": 0.0007696774193548387, "loss": 0.3009, "step": 1430 }, { "epoch": 22.99, "eval_accuracy": 0.8523111612175873, "eval_loss": 0.4121840298175812, "eval_runtime": 29.94, "eval_samples_per_second": 59.252, "eval_steps_per_second": 0.935, "step": 1437 }, { "epoch": 23.04, "learning_rate": 0.0007680645161290323, "loss": 0.2728, "step": 1440 }, { "epoch": 23.2, "learning_rate": 0.0007664516129032258, "loss": 0.2544, "step": 1450 }, { "epoch": 23.36, "learning_rate": 0.0007648387096774194, "loss": 0.2788, "step": 1460 }, { "epoch": 23.52, "learning_rate": 0.0007632258064516129, "loss": 0.2538, "step": 1470 }, { "epoch": 23.68, "learning_rate": 0.0007616129032258065, "loss": 0.2724, "step": 1480 }, { "epoch": 23.84, "learning_rate": 0.00076, "loss": 0.2891, "step": 1490 }, { "epoch": 24.0, "learning_rate": 0.0007583870967741936, "loss": 0.2804, "step": 1500 }, { "epoch": 24.0, "eval_accuracy": 0.8562570462232244, "eval_loss": 0.429273396730423, "eval_runtime": 29.8298, "eval_samples_per_second": 59.471, "eval_steps_per_second": 0.939, "step": 1500 }, { "epoch": 24.16, "learning_rate": 0.000756774193548387, "loss": 0.2681, "step": 1510 }, { "epoch": 24.32, "learning_rate": 0.0007551612903225806, "loss": 0.2599, "step": 1520 }, { "epoch": 24.48, "learning_rate": 0.0007535483870967742, "loss": 0.2698, "step": 1530 }, { "epoch": 24.64, "learning_rate": 0.0007519354838709677, "loss": 0.2737, "step": 1540 }, { "epoch": 24.8, "learning_rate": 0.0007503225806451614, "loss": 0.264, "step": 1550 }, { "epoch": 24.96, "learning_rate": 0.0007487096774193548, "loss": 0.2516, "step": 1560 }, { "epoch": 24.99, "eval_accuracy": 0.8562570462232244, "eval_loss": 0.42893821001052856, "eval_runtime": 29.8289, "eval_samples_per_second": 59.473, "eval_steps_per_second": 0.939, "step": 1562 }, { "epoch": 25.12, "learning_rate": 0.0007470967741935484, "loss": 0.2671, "step": 1570 }, { "epoch": 25.28, "learning_rate": 0.000745483870967742, "loss": 0.2435, "step": 1580 }, { "epoch": 25.44, "learning_rate": 0.0007438709677419355, "loss": 0.2477, "step": 1590 }, { "epoch": 25.6, "learning_rate": 0.0007422580645161291, "loss": 0.2631, "step": 1600 }, { "epoch": 25.76, "learning_rate": 0.0007406451612903225, "loss": 0.2423, "step": 1610 }, { "epoch": 25.92, "learning_rate": 0.0007390322580645161, "loss": 0.2763, "step": 1620 }, { "epoch": 26.0, "eval_accuracy": 0.8647125140924464, "eval_loss": 0.41249939799308777, "eval_runtime": 29.4606, "eval_samples_per_second": 60.216, "eval_steps_per_second": 0.95, "step": 1625 }, { "epoch": 26.08, "learning_rate": 0.0007374193548387097, "loss": 0.2435, "step": 1630 }, { "epoch": 26.24, "learning_rate": 0.0007358064516129033, "loss": 0.2289, "step": 1640 }, { "epoch": 26.4, "learning_rate": 0.0007341935483870969, "loss": 0.2346, "step": 1650 }, { "epoch": 26.56, "learning_rate": 0.0007325806451612903, "loss": 0.2556, "step": 1660 }, { "epoch": 26.72, "learning_rate": 0.0007309677419354839, "loss": 0.2724, "step": 1670 }, { "epoch": 26.88, "learning_rate": 0.0007293548387096774, "loss": 0.2707, "step": 1680 }, { "epoch": 26.99, "eval_accuracy": 0.8664036076662909, "eval_loss": 0.42308202385902405, "eval_runtime": 29.606, "eval_samples_per_second": 59.92, "eval_steps_per_second": 0.946, "step": 1687 }, { "epoch": 27.04, "learning_rate": 0.000727741935483871, "loss": 0.265, "step": 1690 }, { "epoch": 27.2, "learning_rate": 0.0007261290322580644, "loss": 0.2335, "step": 1700 }, { "epoch": 27.36, "learning_rate": 0.0007245161290322581, "loss": 0.2423, "step": 1710 }, { "epoch": 27.52, "learning_rate": 0.0007229032258064517, "loss": 0.2363, "step": 1720 }, { "epoch": 27.68, "learning_rate": 0.0007212903225806452, "loss": 0.2543, "step": 1730 }, { "epoch": 27.84, "learning_rate": 0.0007196774193548388, "loss": 0.2564, "step": 1740 }, { "epoch": 28.0, "learning_rate": 0.0007180645161290322, "loss": 0.2585, "step": 1750 }, { "epoch": 28.0, "eval_accuracy": 0.8596392333709132, "eval_loss": 0.420967698097229, "eval_runtime": 29.7608, "eval_samples_per_second": 59.609, "eval_steps_per_second": 0.941, "step": 1750 }, { "epoch": 28.16, "learning_rate": 0.0007164516129032258, "loss": 0.2505, "step": 1760 }, { "epoch": 28.32, "learning_rate": 0.0007148387096774193, "loss": 0.2182, "step": 1770 }, { "epoch": 28.48, "learning_rate": 0.0007132258064516129, "loss": 0.2261, "step": 1780 }, { "epoch": 28.64, "learning_rate": 0.0007116129032258066, "loss": 0.2171, "step": 1790 }, { "epoch": 28.8, "learning_rate": 0.00071, "loss": 0.2013, "step": 1800 }, { "epoch": 28.96, "learning_rate": 0.0007083870967741936, "loss": 0.2317, "step": 1810 }, { "epoch": 28.99, "eval_accuracy": 0.8602029312288614, "eval_loss": 0.4295957386493683, "eval_runtime": 29.3564, "eval_samples_per_second": 60.43, "eval_steps_per_second": 0.954, "step": 1812 }, { "epoch": 29.12, "learning_rate": 0.0007067741935483871, "loss": 0.2085, "step": 1820 }, { "epoch": 29.28, "learning_rate": 0.0007051612903225807, "loss": 0.2212, "step": 1830 }, { "epoch": 29.44, "learning_rate": 0.0007035483870967741, "loss": 0.236, "step": 1840 }, { "epoch": 29.6, "learning_rate": 0.0007019354838709677, "loss": 0.2215, "step": 1850 }, { "epoch": 29.76, "learning_rate": 0.0007003225806451613, "loss": 0.2208, "step": 1860 }, { "epoch": 29.92, "learning_rate": 0.0006987096774193549, "loss": 0.2118, "step": 1870 }, { "epoch": 30.0, "eval_accuracy": 0.8635851183765502, "eval_loss": 0.44403260946273804, "eval_runtime": 29.7699, "eval_samples_per_second": 59.59, "eval_steps_per_second": 0.941, "step": 1875 }, { "epoch": 30.08, "learning_rate": 0.0006970967741935485, "loss": 0.218, "step": 1880 }, { "epoch": 30.24, "learning_rate": 0.0006954838709677419, "loss": 0.2068, "step": 1890 }, { "epoch": 30.4, "learning_rate": 0.0006938709677419355, "loss": 0.2266, "step": 1900 }, { "epoch": 30.56, "learning_rate": 0.000692258064516129, "loss": 0.2188, "step": 1910 }, { "epoch": 30.72, "learning_rate": 0.0006906451612903226, "loss": 0.2149, "step": 1920 }, { "epoch": 30.88, "learning_rate": 0.0006890322580645162, "loss": 0.2224, "step": 1930 }, { "epoch": 30.99, "eval_accuracy": 0.8726042841037204, "eval_loss": 0.39281362295150757, "eval_runtime": 28.941, "eval_samples_per_second": 61.297, "eval_steps_per_second": 0.967, "step": 1937 }, { "epoch": 31.04, "learning_rate": 0.0006874193548387096, "loss": 0.1924, "step": 1940 }, { "epoch": 31.2, "learning_rate": 0.0006858064516129032, "loss": 0.2095, "step": 1950 }, { "epoch": 31.36, "learning_rate": 0.0006841935483870968, "loss": 0.2074, "step": 1960 }, { "epoch": 31.52, "learning_rate": 0.0006825806451612904, "loss": 0.22, "step": 1970 }, { "epoch": 31.68, "learning_rate": 0.0006809677419354838, "loss": 0.2318, "step": 1980 }, { "epoch": 31.84, "learning_rate": 0.0006793548387096774, "loss": 0.2026, "step": 1990 }, { "epoch": 32.0, "learning_rate": 0.000677741935483871, "loss": 0.2166, "step": 2000 }, { "epoch": 32.0, "eval_accuracy": 0.8602029312288614, "eval_loss": 0.424617737531662, "eval_runtime": 28.9601, "eval_samples_per_second": 61.257, "eval_steps_per_second": 0.967, "step": 2000 }, { "epoch": 32.16, "learning_rate": 0.0006761290322580645, "loss": 0.1975, "step": 2010 }, { "epoch": 32.32, "learning_rate": 0.0006745161290322581, "loss": 0.1867, "step": 2020 }, { "epoch": 32.48, "learning_rate": 0.0006729032258064515, "loss": 0.2128, "step": 2030 }, { "epoch": 32.64, "learning_rate": 0.0006712903225806452, "loss": 0.1861, "step": 2040 }, { "epoch": 32.8, "learning_rate": 0.0006696774193548388, "loss": 0.2282, "step": 2050 }, { "epoch": 32.96, "learning_rate": 0.0006680645161290323, "loss": 0.2038, "step": 2060 }, { "epoch": 32.99, "eval_accuracy": 0.870913190529876, "eval_loss": 0.41463395953178406, "eval_runtime": 29.1556, "eval_samples_per_second": 60.846, "eval_steps_per_second": 0.96, "step": 2062 }, { "epoch": 33.12, "learning_rate": 0.0006664516129032259, "loss": 0.1901, "step": 2070 }, { "epoch": 33.28, "learning_rate": 0.0006648387096774193, "loss": 0.2058, "step": 2080 }, { "epoch": 33.44, "learning_rate": 0.0006632258064516129, "loss": 0.2051, "step": 2090 }, { "epoch": 33.6, "learning_rate": 0.0006616129032258064, "loss": 0.2176, "step": 2100 }, { "epoch": 33.76, "learning_rate": 0.00066, "loss": 0.2031, "step": 2110 }, { "epoch": 33.92, "learning_rate": 0.0006583870967741937, "loss": 0.2183, "step": 2120 }, { "epoch": 34.0, "eval_accuracy": 0.8697857948139797, "eval_loss": 0.416454941034317, "eval_runtime": 28.8978, "eval_samples_per_second": 61.389, "eval_steps_per_second": 0.969, "step": 2125 }, { "epoch": 34.08, "learning_rate": 0.0006567741935483871, "loss": 0.1896, "step": 2130 }, { "epoch": 34.24, "learning_rate": 0.0006551612903225807, "loss": 0.1851, "step": 2140 }, { "epoch": 34.4, "learning_rate": 0.0006535483870967742, "loss": 0.1863, "step": 2150 }, { "epoch": 34.56, "learning_rate": 0.0006519354838709678, "loss": 0.1937, "step": 2160 }, { "epoch": 34.72, "learning_rate": 0.0006503225806451612, "loss": 0.1886, "step": 2170 }, { "epoch": 34.88, "learning_rate": 0.0006487096774193548, "loss": 0.22, "step": 2180 }, { "epoch": 34.99, "eval_accuracy": 0.8765501691093573, "eval_loss": 0.42124196887016296, "eval_runtime": 28.9343, "eval_samples_per_second": 61.311, "eval_steps_per_second": 0.968, "step": 2187 }, { "epoch": 35.04, "learning_rate": 0.0006470967741935484, "loss": 0.2051, "step": 2190 }, { "epoch": 35.2, "learning_rate": 0.000645483870967742, "loss": 0.1762, "step": 2200 }, { "epoch": 35.36, "learning_rate": 0.0006438709677419356, "loss": 0.2054, "step": 2210 }, { "epoch": 35.52, "learning_rate": 0.000642258064516129, "loss": 0.1593, "step": 2220 }, { "epoch": 35.68, "learning_rate": 0.0006406451612903226, "loss": 0.2051, "step": 2230 }, { "epoch": 35.84, "learning_rate": 0.0006390322580645161, "loss": 0.1926, "step": 2240 }, { "epoch": 36.0, "learning_rate": 0.0006374193548387097, "loss": 0.206, "step": 2250 }, { "epoch": 36.0, "eval_accuracy": 0.8726042841037204, "eval_loss": 0.41393008828163147, "eval_runtime": 29.1092, "eval_samples_per_second": 60.943, "eval_steps_per_second": 0.962, "step": 2250 }, { "epoch": 36.16, "learning_rate": 0.0006358064516129033, "loss": 0.183, "step": 2260 }, { "epoch": 36.32, "learning_rate": 0.0006341935483870967, "loss": 0.1939, "step": 2270 }, { "epoch": 36.48, "learning_rate": 0.0006325806451612904, "loss": 0.1733, "step": 2280 }, { "epoch": 36.64, "learning_rate": 0.0006309677419354839, "loss": 0.1825, "step": 2290 }, { "epoch": 36.8, "learning_rate": 0.0006293548387096775, "loss": 0.2011, "step": 2300 }, { "epoch": 36.96, "learning_rate": 0.0006277419354838709, "loss": 0.199, "step": 2310 }, { "epoch": 36.99, "eval_accuracy": 0.883314543404735, "eval_loss": 0.3792937695980072, "eval_runtime": 29.534, "eval_samples_per_second": 60.066, "eval_steps_per_second": 0.948, "step": 2312 }, { "epoch": 37.12, "learning_rate": 0.0006261290322580645, "loss": 0.1911, "step": 2320 }, { "epoch": 37.28, "learning_rate": 0.0006245161290322581, "loss": 0.192, "step": 2330 }, { "epoch": 37.44, "learning_rate": 0.0006229032258064516, "loss": 0.1849, "step": 2340 }, { "epoch": 37.6, "learning_rate": 0.0006212903225806452, "loss": 0.1872, "step": 2350 }, { "epoch": 37.76, "learning_rate": 0.0006196774193548386, "loss": 0.1891, "step": 2360 }, { "epoch": 37.92, "learning_rate": 0.0006180645161290323, "loss": 0.1926, "step": 2370 }, { "epoch": 38.0, "eval_accuracy": 0.8838782412626832, "eval_loss": 0.41269081830978394, "eval_runtime": 35.7673, "eval_samples_per_second": 49.598, "eval_steps_per_second": 0.783, "step": 2375 }, { "epoch": 38.08, "learning_rate": 0.0006164516129032258, "loss": 0.1762, "step": 2380 }, { "epoch": 38.24, "learning_rate": 0.0006148387096774194, "loss": 0.183, "step": 2390 }, { "epoch": 38.4, "learning_rate": 0.000613225806451613, "loss": 0.1815, "step": 2400 }, { "epoch": 38.56, "learning_rate": 0.0006116129032258064, "loss": 0.1943, "step": 2410 }, { "epoch": 38.72, "learning_rate": 0.00061, "loss": 0.1557, "step": 2420 }, { "epoch": 38.88, "learning_rate": 0.0006083870967741935, "loss": 0.1648, "step": 2430 }, { "epoch": 38.99, "eval_accuracy": 0.8821871476888388, "eval_loss": 0.42961573600769043, "eval_runtime": 29.689, "eval_samples_per_second": 59.753, "eval_steps_per_second": 0.943, "step": 2437 }, { "epoch": 39.04, "learning_rate": 0.0006067741935483871, "loss": 0.1698, "step": 2440 }, { "epoch": 39.2, "learning_rate": 0.0006051612903225806, "loss": 0.1508, "step": 2450 }, { "epoch": 39.36, "learning_rate": 0.0006035483870967742, "loss": 0.1622, "step": 2460 }, { "epoch": 39.52, "learning_rate": 0.0006019354838709678, "loss": 0.1719, "step": 2470 }, { "epoch": 39.68, "learning_rate": 0.0006003225806451613, "loss": 0.1916, "step": 2480 }, { "epoch": 39.84, "learning_rate": 0.0005987096774193549, "loss": 0.1853, "step": 2490 }, { "epoch": 40.0, "learning_rate": 0.0005970967741935483, "loss": 0.1578, "step": 2500 }, { "epoch": 40.0, "eval_accuracy": 0.883314543404735, "eval_loss": 0.4131587743759155, "eval_runtime": 29.7946, "eval_samples_per_second": 59.541, "eval_steps_per_second": 0.94, "step": 2500 }, { "epoch": 40.16, "learning_rate": 0.0005954838709677419, "loss": 0.1591, "step": 2510 }, { "epoch": 40.32, "learning_rate": 0.0005938709677419354, "loss": 0.1674, "step": 2520 }, { "epoch": 40.48, "learning_rate": 0.0005922580645161291, "loss": 0.1614, "step": 2530 }, { "epoch": 40.64, "learning_rate": 0.0005906451612903227, "loss": 0.1501, "step": 2540 }, { "epoch": 40.8, "learning_rate": 0.0005890322580645161, "loss": 0.1819, "step": 2550 }, { "epoch": 40.96, "learning_rate": 0.0005874193548387097, "loss": 0.181, "step": 2560 }, { "epoch": 40.99, "eval_accuracy": 0.8776775648252536, "eval_loss": 0.4216878414154053, "eval_runtime": 29.999, "eval_samples_per_second": 59.135, "eval_steps_per_second": 0.933, "step": 2562 }, { "epoch": 41.12, "learning_rate": 0.0005858064516129032, "loss": 0.179, "step": 2570 }, { "epoch": 41.28, "learning_rate": 0.0005841935483870968, "loss": 0.1754, "step": 2580 }, { "epoch": 41.44, "learning_rate": 0.0005825806451612904, "loss": 0.1836, "step": 2590 }, { "epoch": 41.6, "learning_rate": 0.0005809677419354838, "loss": 0.1896, "step": 2600 }, { "epoch": 41.76, "learning_rate": 0.0005793548387096775, "loss": 0.1587, "step": 2610 }, { "epoch": 41.92, "learning_rate": 0.000577741935483871, "loss": 0.1735, "step": 2620 }, { "epoch": 42.0, "eval_accuracy": 0.8714768883878241, "eval_loss": 0.41855669021606445, "eval_runtime": 30.1587, "eval_samples_per_second": 58.822, "eval_steps_per_second": 0.928, "step": 2625 }, { "epoch": 42.08, "learning_rate": 0.0005761290322580646, "loss": 0.1581, "step": 2630 }, { "epoch": 42.24, "learning_rate": 0.000574516129032258, "loss": 0.1453, "step": 2640 }, { "epoch": 42.4, "learning_rate": 0.0005729032258064516, "loss": 0.166, "step": 2650 }, { "epoch": 42.56, "learning_rate": 0.0005712903225806452, "loss": 0.1674, "step": 2660 }, { "epoch": 42.72, "learning_rate": 0.0005696774193548387, "loss": 0.1543, "step": 2670 }, { "epoch": 42.88, "learning_rate": 0.0005680645161290323, "loss": 0.1603, "step": 2680 }, { "epoch": 42.99, "eval_accuracy": 0.8804960541149943, "eval_loss": 0.411676824092865, "eval_runtime": 29.9513, "eval_samples_per_second": 59.229, "eval_steps_per_second": 0.935, "step": 2687 }, { "epoch": 43.04, "learning_rate": 0.0005664516129032258, "loss": 0.1608, "step": 2690 }, { "epoch": 43.2, "learning_rate": 0.0005648387096774194, "loss": 0.1442, "step": 2700 }, { "epoch": 43.36, "learning_rate": 0.0005632258064516129, "loss": 0.1625, "step": 2710 }, { "epoch": 43.52, "learning_rate": 0.0005616129032258065, "loss": 0.1844, "step": 2720 }, { "epoch": 43.68, "learning_rate": 0.0005600000000000001, "loss": 0.1512, "step": 2730 }, { "epoch": 43.84, "learning_rate": 0.0005583870967741935, "loss": 0.1777, "step": 2740 }, { "epoch": 44.0, "learning_rate": 0.0005567741935483871, "loss": 0.1516, "step": 2750 }, { "epoch": 44.0, "eval_accuracy": 0.8816234498308907, "eval_loss": 0.424953430891037, "eval_runtime": 29.9383, "eval_samples_per_second": 59.255, "eval_steps_per_second": 0.935, "step": 2750 }, { "epoch": 44.16, "learning_rate": 0.0005551612903225806, "loss": 0.1486, "step": 2760 }, { "epoch": 44.32, "learning_rate": 0.0005535483870967743, "loss": 0.1565, "step": 2770 }, { "epoch": 44.48, "learning_rate": 0.0005519354838709677, "loss": 0.1871, "step": 2780 }, { "epoch": 44.64, "learning_rate": 0.0005503225806451613, "loss": 0.1471, "step": 2790 }, { "epoch": 44.8, "learning_rate": 0.0005487096774193549, "loss": 0.1569, "step": 2800 }, { "epoch": 44.96, "learning_rate": 0.0005470967741935484, "loss": 0.1733, "step": 2810 }, { "epoch": 44.99, "eval_accuracy": 0.8844419391206313, "eval_loss": 0.3913884162902832, "eval_runtime": 30.3051, "eval_samples_per_second": 58.538, "eval_steps_per_second": 0.924, "step": 2812 }, { "epoch": 45.12, "learning_rate": 0.000545483870967742, "loss": 0.1656, "step": 2820 }, { "epoch": 45.28, "learning_rate": 0.0005438709677419354, "loss": 0.1761, "step": 2830 }, { "epoch": 45.44, "learning_rate": 0.000542258064516129, "loss": 0.156, "step": 2840 }, { "epoch": 45.6, "learning_rate": 0.0005406451612903226, "loss": 0.1559, "step": 2850 }, { "epoch": 45.76, "learning_rate": 0.0005390322580645162, "loss": 0.1577, "step": 2860 }, { "epoch": 45.92, "learning_rate": 0.0005374193548387098, "loss": 0.164, "step": 2870 }, { "epoch": 46.0, "eval_accuracy": 0.882750845546787, "eval_loss": 0.4368877112865448, "eval_runtime": 29.8462, "eval_samples_per_second": 59.438, "eval_steps_per_second": 0.938, "step": 2875 }, { "epoch": 46.08, "learning_rate": 0.0005358064516129032, "loss": 0.1496, "step": 2880 }, { "epoch": 46.24, "learning_rate": 0.0005341935483870968, "loss": 0.146, "step": 2890 }, { "epoch": 46.4, "learning_rate": 0.0005325806451612903, "loss": 0.154, "step": 2900 }, { "epoch": 46.56, "learning_rate": 0.0005309677419354839, "loss": 0.1374, "step": 2910 }, { "epoch": 46.72, "learning_rate": 0.0005293548387096773, "loss": 0.1429, "step": 2920 }, { "epoch": 46.88, "learning_rate": 0.0005277419354838709, "loss": 0.1519, "step": 2930 }, { "epoch": 46.99, "eval_accuracy": 0.8771138669673055, "eval_loss": 0.4275626540184021, "eval_runtime": 28.9785, "eval_samples_per_second": 61.218, "eval_steps_per_second": 0.966, "step": 2937 }, { "epoch": 47.04, "learning_rate": 0.0005261290322580646, "loss": 0.1551, "step": 2940 }, { "epoch": 47.2, "learning_rate": 0.0005245161290322581, "loss": 0.1462, "step": 2950 }, { "epoch": 47.36, "learning_rate": 0.0005229032258064517, "loss": 0.156, "step": 2960 }, { "epoch": 47.52, "learning_rate": 0.0005212903225806451, "loss": 0.1605, "step": 2970 }, { "epoch": 47.68, "learning_rate": 0.0005196774193548387, "loss": 0.1502, "step": 2980 }, { "epoch": 47.84, "learning_rate": 0.0005180645161290322, "loss": 0.1584, "step": 2990 }, { "epoch": 48.0, "learning_rate": 0.0005164516129032258, "loss": 0.1534, "step": 3000 }, { "epoch": 48.0, "eval_accuracy": 0.8821871476888388, "eval_loss": 0.4420623481273651, "eval_runtime": 29.0293, "eval_samples_per_second": 61.111, "eval_steps_per_second": 0.965, "step": 3000 }, { "epoch": 48.16, "learning_rate": 0.0005148387096774194, "loss": 0.1391, "step": 3010 }, { "epoch": 48.32, "learning_rate": 0.0005132258064516129, "loss": 0.1403, "step": 3020 }, { "epoch": 48.48, "learning_rate": 0.0005116129032258065, "loss": 0.1518, "step": 3030 }, { "epoch": 48.64, "learning_rate": 0.00051, "loss": 0.1225, "step": 3040 }, { "epoch": 48.8, "learning_rate": 0.0005083870967741936, "loss": 0.1386, "step": 3050 }, { "epoch": 48.96, "learning_rate": 0.000506774193548387, "loss": 0.158, "step": 3060 }, { "epoch": 48.99, "eval_accuracy": 0.887260428410372, "eval_loss": 0.4240320920944214, "eval_runtime": 29.0851, "eval_samples_per_second": 60.993, "eval_steps_per_second": 0.963, "step": 3062 }, { "epoch": 49.12, "learning_rate": 0.0005051612903225806, "loss": 0.1512, "step": 3070 }, { "epoch": 49.28, "learning_rate": 0.0005035483870967742, "loss": 0.1547, "step": 3080 }, { "epoch": 49.44, "learning_rate": 0.0005019354838709677, "loss": 0.1441, "step": 3090 }, { "epoch": 49.6, "learning_rate": 0.0005003225806451614, "loss": 0.1515, "step": 3100 }, { "epoch": 49.76, "learning_rate": 0.0004987096774193548, "loss": 0.1486, "step": 3110 }, { "epoch": 49.92, "learning_rate": 0.0004970967741935484, "loss": 0.1531, "step": 3120 }, { "epoch": 50.0, "eval_accuracy": 0.8793686583990981, "eval_loss": 0.42499276995658875, "eval_runtime": 28.8635, "eval_samples_per_second": 61.462, "eval_steps_per_second": 0.97, "step": 3125 }, { "epoch": 50.08, "learning_rate": 0.000495483870967742, "loss": 0.1479, "step": 3130 }, { "epoch": 50.24, "learning_rate": 0.0004938709677419355, "loss": 0.1335, "step": 3140 }, { "epoch": 50.4, "learning_rate": 0.0004922580645161291, "loss": 0.1371, "step": 3150 }, { "epoch": 50.56, "learning_rate": 0.0004906451612903226, "loss": 0.1207, "step": 3160 }, { "epoch": 50.72, "learning_rate": 0.0004890322580645161, "loss": 0.1377, "step": 3170 }, { "epoch": 50.88, "learning_rate": 0.0004874193548387097, "loss": 0.1286, "step": 3180 }, { "epoch": 50.99, "eval_accuracy": 0.8731679819616686, "eval_loss": 0.4228157103061676, "eval_runtime": 29.0583, "eval_samples_per_second": 61.05, "eval_steps_per_second": 0.964, "step": 3187 }, { "epoch": 51.04, "learning_rate": 0.0004858064516129032, "loss": 0.129, "step": 3190 }, { "epoch": 51.2, "learning_rate": 0.0004841935483870968, "loss": 0.1243, "step": 3200 }, { "epoch": 51.36, "learning_rate": 0.00048258064516129036, "loss": 0.1601, "step": 3210 }, { "epoch": 51.52, "learning_rate": 0.00048096774193548387, "loss": 0.1425, "step": 3220 }, { "epoch": 51.68, "learning_rate": 0.0004793548387096774, "loss": 0.165, "step": 3230 }, { "epoch": 51.84, "learning_rate": 0.000477741935483871, "loss": 0.1281, "step": 3240 }, { "epoch": 52.0, "learning_rate": 0.00047612903225806454, "loss": 0.1396, "step": 3250 }, { "epoch": 52.0, "eval_accuracy": 0.8782412626832018, "eval_loss": 0.43168801069259644, "eval_runtime": 28.9514, "eval_samples_per_second": 61.275, "eval_steps_per_second": 0.967, "step": 3250 }, { "epoch": 52.16, "learning_rate": 0.00047451612903225804, "loss": 0.1302, "step": 3260 }, { "epoch": 52.32, "learning_rate": 0.0004729032258064516, "loss": 0.1402, "step": 3270 }, { "epoch": 52.48, "learning_rate": 0.0004712903225806452, "loss": 0.1431, "step": 3280 }, { "epoch": 52.64, "learning_rate": 0.0004696774193548387, "loss": 0.1347, "step": 3290 }, { "epoch": 52.8, "learning_rate": 0.0004680645161290323, "loss": 0.1198, "step": 3300 }, { "epoch": 52.96, "learning_rate": 0.0004664516129032258, "loss": 0.1436, "step": 3310 }, { "epoch": 52.99, "eval_accuracy": 0.8855693348365277, "eval_loss": 0.4360513389110565, "eval_runtime": 28.9788, "eval_samples_per_second": 61.217, "eval_steps_per_second": 0.966, "step": 3312 }, { "epoch": 53.12, "learning_rate": 0.0004648387096774194, "loss": 0.1304, "step": 3320 }, { "epoch": 53.28, "learning_rate": 0.0004632258064516129, "loss": 0.1353, "step": 3330 }, { "epoch": 53.44, "learning_rate": 0.00046161290322580646, "loss": 0.1354, "step": 3340 }, { "epoch": 53.6, "learning_rate": 0.00046, "loss": 0.1217, "step": 3350 }, { "epoch": 53.76, "learning_rate": 0.00045838709677419357, "loss": 0.1266, "step": 3360 }, { "epoch": 53.92, "learning_rate": 0.00045677419354838713, "loss": 0.1411, "step": 3370 }, { "epoch": 54.0, "eval_accuracy": 0.8850056369785795, "eval_loss": 0.44017305970191956, "eval_runtime": 28.9504, "eval_samples_per_second": 61.277, "eval_steps_per_second": 0.967, "step": 3375 }, { "epoch": 54.08, "learning_rate": 0.00045516129032258063, "loss": 0.1574, "step": 3380 }, { "epoch": 54.24, "learning_rate": 0.0004535483870967742, "loss": 0.1372, "step": 3390 }, { "epoch": 54.4, "learning_rate": 0.0004519354838709678, "loss": 0.1302, "step": 3400 }, { "epoch": 54.56, "learning_rate": 0.0004503225806451613, "loss": 0.132, "step": 3410 }, { "epoch": 54.72, "learning_rate": 0.00044870967741935487, "loss": 0.122, "step": 3420 }, { "epoch": 54.88, "learning_rate": 0.00044709677419354837, "loss": 0.1312, "step": 3430 }, { "epoch": 54.99, "eval_accuracy": 0.8883878241262683, "eval_loss": 0.43266087770462036, "eval_runtime": 28.9781, "eval_samples_per_second": 61.219, "eval_steps_per_second": 0.966, "step": 3437 }, { "epoch": 55.04, "learning_rate": 0.00044548387096774193, "loss": 0.1421, "step": 3440 }, { "epoch": 55.2, "learning_rate": 0.0004438709677419355, "loss": 0.1223, "step": 3450 }, { "epoch": 55.36, "learning_rate": 0.00044225806451612905, "loss": 0.1147, "step": 3460 }, { "epoch": 55.52, "learning_rate": 0.0004406451612903226, "loss": 0.1318, "step": 3470 }, { "epoch": 55.68, "learning_rate": 0.0004390322580645161, "loss": 0.1324, "step": 3480 }, { "epoch": 55.84, "learning_rate": 0.0004374193548387097, "loss": 0.1436, "step": 3490 }, { "epoch": 56.0, "learning_rate": 0.0004358064516129032, "loss": 0.1359, "step": 3500 }, { "epoch": 56.0, "eval_accuracy": 0.8855693348365277, "eval_loss": 0.41437384486198425, "eval_runtime": 29.1061, "eval_samples_per_second": 60.949, "eval_steps_per_second": 0.962, "step": 3500 }, { "epoch": 56.16, "learning_rate": 0.0004341935483870968, "loss": 0.1191, "step": 3510 }, { "epoch": 56.32, "learning_rate": 0.0004325806451612903, "loss": 0.1179, "step": 3520 }, { "epoch": 56.48, "learning_rate": 0.0004309677419354839, "loss": 0.1278, "step": 3530 }, { "epoch": 56.64, "learning_rate": 0.00042935483870967746, "loss": 0.1383, "step": 3540 }, { "epoch": 56.8, "learning_rate": 0.00042774193548387096, "loss": 0.1281, "step": 3550 }, { "epoch": 56.96, "learning_rate": 0.0004261290322580645, "loss": 0.1361, "step": 3560 }, { "epoch": 56.99, "eval_accuracy": 0.8866967305524239, "eval_loss": 0.4180738031864166, "eval_runtime": 29.0289, "eval_samples_per_second": 61.112, "eval_steps_per_second": 0.965, "step": 3562 }, { "epoch": 57.12, "learning_rate": 0.0004245161290322581, "loss": 0.1415, "step": 3570 }, { "epoch": 57.28, "learning_rate": 0.00042290322580645163, "loss": 0.1205, "step": 3580 }, { "epoch": 57.44, "learning_rate": 0.00042129032258064514, "loss": 0.1225, "step": 3590 }, { "epoch": 57.6, "learning_rate": 0.0004196774193548387, "loss": 0.1222, "step": 3600 }, { "epoch": 57.76, "learning_rate": 0.0004180645161290323, "loss": 0.1174, "step": 3610 }, { "epoch": 57.92, "learning_rate": 0.0004164516129032258, "loss": 0.1272, "step": 3620 }, { "epoch": 58.0, "eval_accuracy": 0.8878241262683202, "eval_loss": 0.4203573763370514, "eval_runtime": 28.9973, "eval_samples_per_second": 61.178, "eval_steps_per_second": 0.966, "step": 3625 }, { "epoch": 58.08, "learning_rate": 0.00041483870967741937, "loss": 0.1359, "step": 3630 }, { "epoch": 58.24, "learning_rate": 0.0004132258064516129, "loss": 0.1267, "step": 3640 }, { "epoch": 58.4, "learning_rate": 0.0004116129032258065, "loss": 0.1251, "step": 3650 }, { "epoch": 58.56, "learning_rate": 0.00041, "loss": 0.1107, "step": 3660 }, { "epoch": 58.72, "learning_rate": 0.00040838709677419355, "loss": 0.1147, "step": 3670 }, { "epoch": 58.88, "learning_rate": 0.0004067741935483871, "loss": 0.1222, "step": 3680 }, { "epoch": 58.99, "eval_accuracy": 0.8883878241262683, "eval_loss": 0.4137117862701416, "eval_runtime": 28.9138, "eval_samples_per_second": 61.355, "eval_steps_per_second": 0.968, "step": 3687 }, { "epoch": 59.04, "learning_rate": 0.00040516129032258067, "loss": 0.1276, "step": 3690 }, { "epoch": 59.2, "learning_rate": 0.0004035483870967742, "loss": 0.1144, "step": 3700 }, { "epoch": 59.36, "learning_rate": 0.00040193548387096773, "loss": 0.1372, "step": 3710 }, { "epoch": 59.52, "learning_rate": 0.0004003225806451613, "loss": 0.1258, "step": 3720 }, { "epoch": 59.68, "learning_rate": 0.00039870967741935484, "loss": 0.1208, "step": 3730 }, { "epoch": 59.84, "learning_rate": 0.0003970967741935484, "loss": 0.1129, "step": 3740 }, { "epoch": 60.0, "learning_rate": 0.00039548387096774196, "loss": 0.1272, "step": 3750 }, { "epoch": 60.0, "eval_accuracy": 0.8889515219842165, "eval_loss": 0.4316939115524292, "eval_runtime": 29.1565, "eval_samples_per_second": 60.844, "eval_steps_per_second": 0.96, "step": 3750 }, { "epoch": 60.16, "learning_rate": 0.00039387096774193546, "loss": 0.1011, "step": 3760 }, { "epoch": 60.32, "learning_rate": 0.0003922580645161291, "loss": 0.1191, "step": 3770 }, { "epoch": 60.48, "learning_rate": 0.0003906451612903226, "loss": 0.1279, "step": 3780 }, { "epoch": 60.64, "learning_rate": 0.00038903225806451614, "loss": 0.1138, "step": 3790 }, { "epoch": 60.8, "learning_rate": 0.00038741935483870964, "loss": 0.1091, "step": 3800 }, { "epoch": 60.96, "learning_rate": 0.00038580645161290325, "loss": 0.1132, "step": 3810 }, { "epoch": 60.99, "eval_accuracy": 0.8917700112739572, "eval_loss": 0.43509015440940857, "eval_runtime": 29.022, "eval_samples_per_second": 61.126, "eval_steps_per_second": 0.965, "step": 3812 }, { "epoch": 61.12, "learning_rate": 0.0003841935483870968, "loss": 0.1279, "step": 3820 }, { "epoch": 61.28, "learning_rate": 0.0003825806451612903, "loss": 0.1127, "step": 3830 }, { "epoch": 61.44, "learning_rate": 0.0003809677419354839, "loss": 0.1205, "step": 3840 }, { "epoch": 61.6, "learning_rate": 0.00037935483870967743, "loss": 0.1224, "step": 3850 }, { "epoch": 61.76, "learning_rate": 0.000377741935483871, "loss": 0.1191, "step": 3860 }, { "epoch": 61.92, "learning_rate": 0.0003761290322580645, "loss": 0.1239, "step": 3870 }, { "epoch": 62.0, "eval_accuracy": 0.882750845546787, "eval_loss": 0.43482401967048645, "eval_runtime": 29.0447, "eval_samples_per_second": 61.078, "eval_steps_per_second": 0.964, "step": 3875 }, { "epoch": 62.08, "learning_rate": 0.00037451612903225805, "loss": 0.1195, "step": 3880 }, { "epoch": 62.24, "learning_rate": 0.00037290322580645167, "loss": 0.1266, "step": 3890 }, { "epoch": 62.4, "learning_rate": 0.00037129032258064517, "loss": 0.1219, "step": 3900 }, { "epoch": 62.56, "learning_rate": 0.00036967741935483873, "loss": 0.1191, "step": 3910 }, { "epoch": 62.72, "learning_rate": 0.00036806451612903223, "loss": 0.116, "step": 3920 }, { "epoch": 62.88, "learning_rate": 0.00036645161290322584, "loss": 0.1188, "step": 3930 }, { "epoch": 62.99, "eval_accuracy": 0.8861330326944757, "eval_loss": 0.42578133940696716, "eval_runtime": 29.0436, "eval_samples_per_second": 61.081, "eval_steps_per_second": 0.964, "step": 3937 }, { "epoch": 63.04, "learning_rate": 0.0003648387096774194, "loss": 0.1066, "step": 3940 }, { "epoch": 63.2, "learning_rate": 0.0003632258064516129, "loss": 0.1082, "step": 3950 }, { "epoch": 63.36, "learning_rate": 0.00036161290322580646, "loss": 0.0991, "step": 3960 }, { "epoch": 63.52, "learning_rate": 0.00035999999999999997, "loss": 0.1223, "step": 3970 }, { "epoch": 63.68, "learning_rate": 0.0003583870967741936, "loss": 0.1219, "step": 3980 }, { "epoch": 63.84, "learning_rate": 0.0003567741935483871, "loss": 0.1172, "step": 3990 }, { "epoch": 64.0, "learning_rate": 0.00035516129032258064, "loss": 0.1203, "step": 4000 }, { "epoch": 64.0, "eval_accuracy": 0.891206313416009, "eval_loss": 0.43181148171424866, "eval_runtime": 28.9224, "eval_samples_per_second": 61.337, "eval_steps_per_second": 0.968, "step": 4000 }, { "epoch": 64.16, "learning_rate": 0.0003535483870967742, "loss": 0.1237, "step": 4010 }, { "epoch": 64.32, "learning_rate": 0.00035193548387096776, "loss": 0.1162, "step": 4020 }, { "epoch": 64.48, "learning_rate": 0.0003503225806451613, "loss": 0.1224, "step": 4030 }, { "epoch": 64.64, "learning_rate": 0.0003487096774193548, "loss": 0.1086, "step": 4040 }, { "epoch": 64.8, "learning_rate": 0.0003470967741935484, "loss": 0.1034, "step": 4050 }, { "epoch": 64.96, "learning_rate": 0.00034548387096774194, "loss": 0.1204, "step": 4060 }, { "epoch": 64.99, "eval_accuracy": 0.895152198421646, "eval_loss": 0.4054950177669525, "eval_runtime": 29.0688, "eval_samples_per_second": 61.028, "eval_steps_per_second": 0.963, "step": 4062 }, { "epoch": 65.12, "learning_rate": 0.0003438709677419355, "loss": 0.107, "step": 4070 }, { "epoch": 65.28, "learning_rate": 0.00034225806451612905, "loss": 0.1162, "step": 4080 }, { "epoch": 65.44, "learning_rate": 0.00034064516129032256, "loss": 0.1244, "step": 4090 }, { "epoch": 65.6, "learning_rate": 0.00033903225806451617, "loss": 0.0922, "step": 4100 }, { "epoch": 65.76, "learning_rate": 0.0003374193548387097, "loss": 0.1148, "step": 4110 }, { "epoch": 65.92, "learning_rate": 0.00033580645161290323, "loss": 0.1053, "step": 4120 }, { "epoch": 66.0, "eval_accuracy": 0.8917700112739572, "eval_loss": 0.4222296476364136, "eval_runtime": 29.1218, "eval_samples_per_second": 60.917, "eval_steps_per_second": 0.961, "step": 4125 }, { "epoch": 66.08, "learning_rate": 0.00033419354838709674, "loss": 0.1089, "step": 4130 }, { "epoch": 66.24, "learning_rate": 0.00033258064516129035, "loss": 0.1069, "step": 4140 }, { "epoch": 66.4, "learning_rate": 0.0003309677419354839, "loss": 0.1049, "step": 4150 }, { "epoch": 66.56, "learning_rate": 0.0003293548387096774, "loss": 0.1042, "step": 4160 }, { "epoch": 66.72, "learning_rate": 0.00032774193548387097, "loss": 0.1095, "step": 4170 }, { "epoch": 66.88, "learning_rate": 0.0003261290322580645, "loss": 0.1187, "step": 4180 }, { "epoch": 66.99, "eval_accuracy": 0.8945885005636979, "eval_loss": 0.4248427152633667, "eval_runtime": 29.0376, "eval_samples_per_second": 61.093, "eval_steps_per_second": 0.964, "step": 4187 }, { "epoch": 67.04, "learning_rate": 0.0003245161290322581, "loss": 0.1181, "step": 4190 }, { "epoch": 67.2, "learning_rate": 0.0003229032258064516, "loss": 0.1154, "step": 4200 }, { "epoch": 67.36, "learning_rate": 0.00032129032258064515, "loss": 0.1186, "step": 4210 }, { "epoch": 67.52, "learning_rate": 0.00031967741935483876, "loss": 0.122, "step": 4220 }, { "epoch": 67.68, "learning_rate": 0.00031806451612903226, "loss": 0.107, "step": 4230 }, { "epoch": 67.84, "learning_rate": 0.0003164516129032258, "loss": 0.0939, "step": 4240 }, { "epoch": 68.0, "learning_rate": 0.0003148387096774193, "loss": 0.1129, "step": 4250 }, { "epoch": 68.0, "eval_accuracy": 0.8923337091319054, "eval_loss": 0.4301997125148773, "eval_runtime": 28.9069, "eval_samples_per_second": 61.369, "eval_steps_per_second": 0.969, "step": 4250 }, { "epoch": 68.16, "learning_rate": 0.00031322580645161294, "loss": 0.097, "step": 4260 }, { "epoch": 68.32, "learning_rate": 0.00031161290322580644, "loss": 0.0899, "step": 4270 }, { "epoch": 68.48, "learning_rate": 0.00031, "loss": 0.1098, "step": 4280 }, { "epoch": 68.64, "learning_rate": 0.00030838709677419356, "loss": 0.1109, "step": 4290 }, { "epoch": 68.8, "learning_rate": 0.0003067741935483871, "loss": 0.1023, "step": 4300 }, { "epoch": 68.96, "learning_rate": 0.0003051612903225807, "loss": 0.1117, "step": 4310 }, { "epoch": 68.99, "eval_accuracy": 0.8968432919954904, "eval_loss": 0.414861261844635, "eval_runtime": 29.0625, "eval_samples_per_second": 61.041, "eval_steps_per_second": 0.963, "step": 4312 }, { "epoch": 69.12, "learning_rate": 0.0003035483870967742, "loss": 0.1158, "step": 4320 }, { "epoch": 69.28, "learning_rate": 0.00030193548387096774, "loss": 0.1051, "step": 4330 }, { "epoch": 69.44, "learning_rate": 0.0003003225806451613, "loss": 0.1004, "step": 4340 }, { "epoch": 69.6, "learning_rate": 0.00029870967741935485, "loss": 0.1205, "step": 4350 }, { "epoch": 69.76, "learning_rate": 0.0002970967741935484, "loss": 0.1114, "step": 4360 }, { "epoch": 69.92, "learning_rate": 0.0002954838709677419, "loss": 0.1194, "step": 4370 }, { "epoch": 70.0, "eval_accuracy": 0.8895152198421646, "eval_loss": 0.41601237654685974, "eval_runtime": 29.1078, "eval_samples_per_second": 60.946, "eval_steps_per_second": 0.962, "step": 4375 }, { "epoch": 70.08, "learning_rate": 0.00029387096774193553, "loss": 0.0931, "step": 4380 }, { "epoch": 70.24, "learning_rate": 0.00029225806451612903, "loss": 0.107, "step": 4390 }, { "epoch": 70.4, "learning_rate": 0.0002906451612903226, "loss": 0.1052, "step": 4400 }, { "epoch": 70.56, "learning_rate": 0.00028903225806451615, "loss": 0.1057, "step": 4410 }, { "epoch": 70.72, "learning_rate": 0.0002874193548387097, "loss": 0.1104, "step": 4420 }, { "epoch": 70.88, "learning_rate": 0.00028580645161290326, "loss": 0.1003, "step": 4430 }, { "epoch": 70.99, "eval_accuracy": 0.8945885005636979, "eval_loss": 0.425587922334671, "eval_runtime": 29.0303, "eval_samples_per_second": 61.109, "eval_steps_per_second": 0.965, "step": 4437 }, { "epoch": 71.04, "learning_rate": 0.00028419354838709677, "loss": 0.1053, "step": 4440 }, { "epoch": 71.2, "learning_rate": 0.0002825806451612903, "loss": 0.0978, "step": 4450 }, { "epoch": 71.36, "learning_rate": 0.00028096774193548383, "loss": 0.0927, "step": 4460 }, { "epoch": 71.52, "learning_rate": 0.00027935483870967744, "loss": 0.1161, "step": 4470 }, { "epoch": 71.68, "learning_rate": 0.000277741935483871, "loss": 0.1061, "step": 4480 }, { "epoch": 71.84, "learning_rate": 0.0002761290322580645, "loss": 0.1018, "step": 4490 }, { "epoch": 72.0, "learning_rate": 0.00027451612903225806, "loss": 0.1088, "step": 4500 }, { "epoch": 72.0, "eval_accuracy": 0.8917700112739572, "eval_loss": 0.4356169104576111, "eval_runtime": 29.136, "eval_samples_per_second": 60.887, "eval_steps_per_second": 0.961, "step": 4500 }, { "epoch": 72.16, "learning_rate": 0.0002729032258064516, "loss": 0.0891, "step": 4510 }, { "epoch": 72.32, "learning_rate": 0.0002712903225806452, "loss": 0.1021, "step": 4520 }, { "epoch": 72.48, "learning_rate": 0.0002696774193548387, "loss": 0.0923, "step": 4530 }, { "epoch": 72.64, "learning_rate": 0.00026806451612903224, "loss": 0.0987, "step": 4540 }, { "epoch": 72.8, "learning_rate": 0.00026645161290322585, "loss": 0.0909, "step": 4550 }, { "epoch": 72.96, "learning_rate": 0.00026483870967741936, "loss": 0.11, "step": 4560 }, { "epoch": 72.99, "eval_accuracy": 0.8934611048478016, "eval_loss": 0.42773857712745667, "eval_runtime": 29.2049, "eval_samples_per_second": 60.743, "eval_steps_per_second": 0.959, "step": 4562 }, { "epoch": 73.12, "learning_rate": 0.0002632258064516129, "loss": 0.0995, "step": 4570 }, { "epoch": 73.28, "learning_rate": 0.0002616129032258064, "loss": 0.1036, "step": 4580 }, { "epoch": 73.44, "learning_rate": 0.00026000000000000003, "loss": 0.1089, "step": 4590 }, { "epoch": 73.6, "learning_rate": 0.00025838709677419354, "loss": 0.093, "step": 4600 }, { "epoch": 73.76, "learning_rate": 0.0002567741935483871, "loss": 0.1, "step": 4610 }, { "epoch": 73.92, "learning_rate": 0.00025516129032258065, "loss": 0.1016, "step": 4620 }, { "epoch": 74.0, "eval_accuracy": 0.895152198421646, "eval_loss": 0.4094755947589874, "eval_runtime": 28.9726, "eval_samples_per_second": 61.23, "eval_steps_per_second": 0.966, "step": 4625 }, { "epoch": 74.08, "learning_rate": 0.0002535483870967742, "loss": 0.1106, "step": 4630 }, { "epoch": 74.24, "learning_rate": 0.00025193548387096777, "loss": 0.1058, "step": 4640 }, { "epoch": 74.4, "learning_rate": 0.00025032258064516127, "loss": 0.1011, "step": 4650 }, { "epoch": 74.56, "learning_rate": 0.00024870967741935483, "loss": 0.0881, "step": 4660 }, { "epoch": 74.72, "learning_rate": 0.0002470967741935484, "loss": 0.0912, "step": 4670 }, { "epoch": 74.88, "learning_rate": 0.00024548387096774195, "loss": 0.0906, "step": 4680 }, { "epoch": 74.99, "eval_accuracy": 0.8934611048478016, "eval_loss": 0.4261699616909027, "eval_runtime": 29.3184, "eval_samples_per_second": 60.508, "eval_steps_per_second": 0.955, "step": 4687 }, { "epoch": 75.04, "learning_rate": 0.00024387096774193548, "loss": 0.0994, "step": 4690 }, { "epoch": 75.2, "learning_rate": 0.00024225806451612904, "loss": 0.0971, "step": 4700 }, { "epoch": 75.36, "learning_rate": 0.00024064516129032257, "loss": 0.1087, "step": 4710 }, { "epoch": 75.52, "learning_rate": 0.00023903225806451615, "loss": 0.105, "step": 4720 }, { "epoch": 75.68, "learning_rate": 0.00023741935483870968, "loss": 0.0975, "step": 4730 }, { "epoch": 75.84, "learning_rate": 0.00023580645161290324, "loss": 0.1142, "step": 4740 }, { "epoch": 76.0, "learning_rate": 0.00023419354838709677, "loss": 0.0969, "step": 4750 }, { "epoch": 76.0, "eval_accuracy": 0.8940248027057497, "eval_loss": 0.4057069718837738, "eval_runtime": 28.9742, "eval_samples_per_second": 61.227, "eval_steps_per_second": 0.966, "step": 4750 }, { "epoch": 76.16, "learning_rate": 0.00023258064516129033, "loss": 0.1035, "step": 4760 }, { "epoch": 76.32, "learning_rate": 0.00023096774193548386, "loss": 0.0803, "step": 4770 }, { "epoch": 76.48, "learning_rate": 0.00022935483870967742, "loss": 0.0898, "step": 4780 }, { "epoch": 76.64, "learning_rate": 0.00022774193548387098, "loss": 0.1007, "step": 4790 }, { "epoch": 76.8, "learning_rate": 0.00022612903225806454, "loss": 0.1113, "step": 4800 }, { "epoch": 76.96, "learning_rate": 0.00022451612903225807, "loss": 0.111, "step": 4810 }, { "epoch": 76.99, "eval_accuracy": 0.8996617812852311, "eval_loss": 0.40985479950904846, "eval_runtime": 29.09, "eval_samples_per_second": 60.983, "eval_steps_per_second": 0.963, "step": 4812 }, { "epoch": 77.12, "learning_rate": 0.00022290322580645162, "loss": 0.118, "step": 4820 }, { "epoch": 77.28, "learning_rate": 0.00022129032258064516, "loss": 0.0928, "step": 4830 }, { "epoch": 77.44, "learning_rate": 0.00021967741935483871, "loss": 0.0967, "step": 4840 }, { "epoch": 77.6, "learning_rate": 0.00021806451612903227, "loss": 0.0979, "step": 4850 }, { "epoch": 77.76, "learning_rate": 0.00021645161290322583, "loss": 0.0879, "step": 4860 }, { "epoch": 77.92, "learning_rate": 0.00021483870967741936, "loss": 0.091, "step": 4870 }, { "epoch": 78.0, "eval_accuracy": 0.8962795941375423, "eval_loss": 0.42323029041290283, "eval_runtime": 29.0776, "eval_samples_per_second": 61.009, "eval_steps_per_second": 0.963, "step": 4875 }, { "epoch": 78.08, "learning_rate": 0.00021322580645161292, "loss": 0.0962, "step": 4880 }, { "epoch": 78.24, "learning_rate": 0.00021161290322580645, "loss": 0.105, "step": 4890 }, { "epoch": 78.4, "learning_rate": 0.00021, "loss": 0.099, "step": 4900 }, { "epoch": 78.56, "learning_rate": 0.00020838709677419354, "loss": 0.1132, "step": 4910 }, { "epoch": 78.72, "learning_rate": 0.00020677419354838713, "loss": 0.0909, "step": 4920 }, { "epoch": 78.88, "learning_rate": 0.00020516129032258066, "loss": 0.1013, "step": 4930 }, { "epoch": 78.99, "eval_accuracy": 0.8883878241262683, "eval_loss": 0.4311448037624359, "eval_runtime": 28.999, "eval_samples_per_second": 61.174, "eval_steps_per_second": 0.966, "step": 4937 }, { "epoch": 79.04, "learning_rate": 0.00020354838709677421, "loss": 0.0946, "step": 4940 }, { "epoch": 79.2, "learning_rate": 0.00020193548387096775, "loss": 0.0842, "step": 4950 }, { "epoch": 79.36, "learning_rate": 0.00020032258064516128, "loss": 0.1172, "step": 4960 }, { "epoch": 79.52, "learning_rate": 0.00019870967741935483, "loss": 0.0936, "step": 4970 }, { "epoch": 79.68, "learning_rate": 0.00019709677419354837, "loss": 0.0993, "step": 4980 }, { "epoch": 79.84, "learning_rate": 0.00019548387096774195, "loss": 0.1049, "step": 4990 }, { "epoch": 80.0, "learning_rate": 0.00019387096774193548, "loss": 0.119, "step": 5000 }, { "epoch": 80.0, "eval_accuracy": 0.8928974069898534, "eval_loss": 0.43020305037498474, "eval_runtime": 29.1182, "eval_samples_per_second": 60.924, "eval_steps_per_second": 0.962, "step": 5000 }, { "epoch": 80.16, "learning_rate": 0.00019225806451612904, "loss": 0.1009, "step": 5010 }, { "epoch": 80.32, "learning_rate": 0.00019064516129032257, "loss": 0.099, "step": 5020 }, { "epoch": 80.48, "learning_rate": 0.00018903225806451613, "loss": 0.0966, "step": 5030 }, { "epoch": 80.64, "learning_rate": 0.00018741935483870966, "loss": 0.1195, "step": 5040 }, { "epoch": 80.8, "learning_rate": 0.00018580645161290325, "loss": 0.0997, "step": 5050 }, { "epoch": 80.96, "learning_rate": 0.00018419354838709678, "loss": 0.0877, "step": 5060 }, { "epoch": 80.99, "eval_accuracy": 0.8923337091319054, "eval_loss": 0.4369201958179474, "eval_runtime": 29.0539, "eval_samples_per_second": 61.059, "eval_steps_per_second": 0.964, "step": 5062 }, { "epoch": 81.12, "learning_rate": 0.00018258064516129033, "loss": 0.1046, "step": 5070 }, { "epoch": 81.28, "learning_rate": 0.00018096774193548387, "loss": 0.1, "step": 5080 }, { "epoch": 81.44, "learning_rate": 0.00017935483870967742, "loss": 0.0967, "step": 5090 }, { "epoch": 81.6, "learning_rate": 0.00017774193548387095, "loss": 0.0919, "step": 5100 }, { "epoch": 81.76, "learning_rate": 0.0001761290322580645, "loss": 0.1022, "step": 5110 }, { "epoch": 81.92, "learning_rate": 0.00017451612903225807, "loss": 0.0926, "step": 5120 }, { "epoch": 82.0, "eval_accuracy": 0.8968432919954904, "eval_loss": 0.4353014826774597, "eval_runtime": 28.9657, "eval_samples_per_second": 61.245, "eval_steps_per_second": 0.967, "step": 5125 }, { "epoch": 82.08, "learning_rate": 0.00017290322580645163, "loss": 0.0967, "step": 5130 }, { "epoch": 82.24, "learning_rate": 0.00017129032258064516, "loss": 0.0901, "step": 5140 }, { "epoch": 82.4, "learning_rate": 0.00016967741935483872, "loss": 0.1059, "step": 5150 }, { "epoch": 82.56, "learning_rate": 0.00016806451612903225, "loss": 0.0999, "step": 5160 }, { "epoch": 82.72, "learning_rate": 0.0001664516129032258, "loss": 0.0993, "step": 5170 }, { "epoch": 82.88, "learning_rate": 0.00016483870967741934, "loss": 0.0969, "step": 5180 }, { "epoch": 82.99, "eval_accuracy": 0.895152198421646, "eval_loss": 0.4335944652557373, "eval_runtime": 29.177, "eval_samples_per_second": 60.801, "eval_steps_per_second": 0.96, "step": 5187 }, { "epoch": 83.04, "learning_rate": 0.00016322580645161292, "loss": 0.088, "step": 5190 }, { "epoch": 83.2, "learning_rate": 0.00016161290322580645, "loss": 0.08, "step": 5200 }, { "epoch": 83.36, "learning_rate": 0.00016, "loss": 0.0899, "step": 5210 }, { "epoch": 83.52, "learning_rate": 0.00015838709677419354, "loss": 0.0938, "step": 5220 }, { "epoch": 83.68, "learning_rate": 0.0001567741935483871, "loss": 0.0986, "step": 5230 }, { "epoch": 83.84, "learning_rate": 0.00015516129032258063, "loss": 0.0947, "step": 5240 }, { "epoch": 84.0, "learning_rate": 0.0001535483870967742, "loss": 0.092, "step": 5250 }, { "epoch": 84.0, "eval_accuracy": 0.8934611048478016, "eval_loss": 0.4213511347770691, "eval_runtime": 28.9325, "eval_samples_per_second": 61.315, "eval_steps_per_second": 0.968, "step": 5250 }, { "epoch": 84.16, "learning_rate": 0.00015193548387096775, "loss": 0.0881, "step": 5260 }, { "epoch": 84.32, "learning_rate": 0.0001503225806451613, "loss": 0.0864, "step": 5270 }, { "epoch": 84.48, "learning_rate": 0.00014870967741935484, "loss": 0.0917, "step": 5280 }, { "epoch": 84.64, "learning_rate": 0.0001470967741935484, "loss": 0.0817, "step": 5290 }, { "epoch": 84.8, "learning_rate": 0.00014548387096774193, "loss": 0.1022, "step": 5300 }, { "epoch": 84.96, "learning_rate": 0.00014387096774193549, "loss": 0.0914, "step": 5310 }, { "epoch": 84.99, "eval_accuracy": 0.8889515219842165, "eval_loss": 0.4403364360332489, "eval_runtime": 28.9736, "eval_samples_per_second": 61.228, "eval_steps_per_second": 0.966, "step": 5312 }, { "epoch": 85.12, "learning_rate": 0.00014225806451612904, "loss": 0.0802, "step": 5320 }, { "epoch": 85.28, "learning_rate": 0.0001406451612903226, "loss": 0.0943, "step": 5330 }, { "epoch": 85.44, "learning_rate": 0.00013903225806451613, "loss": 0.1016, "step": 5340 }, { "epoch": 85.6, "learning_rate": 0.0001374193548387097, "loss": 0.1008, "step": 5350 }, { "epoch": 85.76, "learning_rate": 0.00013596774193548386, "loss": 0.0812, "step": 5360 }, { "epoch": 85.92, "learning_rate": 0.00013435483870967744, "loss": 0.0924, "step": 5370 }, { "epoch": 86.0, "eval_accuracy": 0.8928974069898534, "eval_loss": 0.4285109043121338, "eval_runtime": 28.8946, "eval_samples_per_second": 61.395, "eval_steps_per_second": 0.969, "step": 5375 }, { "epoch": 86.08, "learning_rate": 0.00013274193548387097, "loss": 0.0871, "step": 5380 }, { "epoch": 86.24, "learning_rate": 0.00013112903225806453, "loss": 0.0741, "step": 5390 }, { "epoch": 86.4, "learning_rate": 0.00012951612903225806, "loss": 0.0924, "step": 5400 }, { "epoch": 86.56, "learning_rate": 0.00012790322580645162, "loss": 0.0943, "step": 5410 }, { "epoch": 86.72, "learning_rate": 0.00012629032258064515, "loss": 0.0796, "step": 5420 }, { "epoch": 86.88, "learning_rate": 0.0001246774193548387, "loss": 0.0964, "step": 5430 }, { "epoch": 86.99, "eval_accuracy": 0.8968432919954904, "eval_loss": 0.4207240641117096, "eval_runtime": 28.9818, "eval_samples_per_second": 61.211, "eval_steps_per_second": 0.966, "step": 5437 }, { "epoch": 87.04, "learning_rate": 0.00012306451612903227, "loss": 0.0912, "step": 5440 }, { "epoch": 87.2, "learning_rate": 0.0001214516129032258, "loss": 0.0889, "step": 5450 }, { "epoch": 87.36, "learning_rate": 0.00011983870967741936, "loss": 0.1015, "step": 5460 }, { "epoch": 87.52, "learning_rate": 0.0001182258064516129, "loss": 0.0959, "step": 5470 }, { "epoch": 87.68, "learning_rate": 0.00011661290322580645, "loss": 0.0663, "step": 5480 }, { "epoch": 87.84, "learning_rate": 0.000115, "loss": 0.0792, "step": 5490 }, { "epoch": 88.0, "learning_rate": 0.00011338709677419355, "loss": 0.0916, "step": 5500 }, { "epoch": 88.0, "eval_accuracy": 0.8945885005636979, "eval_loss": 0.42535075545310974, "eval_runtime": 29.0954, "eval_samples_per_second": 60.972, "eval_steps_per_second": 0.962, "step": 5500 }, { "epoch": 88.16, "learning_rate": 0.00011177419354838709, "loss": 0.0945, "step": 5510 }, { "epoch": 88.32, "learning_rate": 0.00011016129032258065, "loss": 0.0906, "step": 5520 }, { "epoch": 88.48, "learning_rate": 0.0001085483870967742, "loss": 0.0795, "step": 5530 }, { "epoch": 88.64, "learning_rate": 0.00010693548387096774, "loss": 0.0868, "step": 5540 }, { "epoch": 88.8, "learning_rate": 0.00010532258064516128, "loss": 0.1157, "step": 5550 }, { "epoch": 88.96, "learning_rate": 0.00010370967741935484, "loss": 0.0962, "step": 5560 }, { "epoch": 88.99, "eval_accuracy": 0.8979706877113867, "eval_loss": 0.42487961053848267, "eval_runtime": 28.9603, "eval_samples_per_second": 61.256, "eval_steps_per_second": 0.967, "step": 5562 }, { "epoch": 89.12, "learning_rate": 0.00010209677419354839, "loss": 0.0886, "step": 5570 }, { "epoch": 89.28, "learning_rate": 0.00010048387096774193, "loss": 0.0987, "step": 5580 }, { "epoch": 89.44, "learning_rate": 9.887096774193549e-05, "loss": 0.0893, "step": 5590 }, { "epoch": 89.6, "learning_rate": 9.725806451612903e-05, "loss": 0.0829, "step": 5600 }, { "epoch": 89.76, "learning_rate": 9.564516129032258e-05, "loss": 0.0913, "step": 5610 }, { "epoch": 89.92, "learning_rate": 9.403225806451612e-05, "loss": 0.0927, "step": 5620 }, { "epoch": 90.0, "eval_accuracy": 0.8934611048478016, "eval_loss": 0.42423465847969055, "eval_runtime": 29.0096, "eval_samples_per_second": 61.152, "eval_steps_per_second": 0.965, "step": 5625 }, { "epoch": 90.08, "learning_rate": 9.241935483870968e-05, "loss": 0.0781, "step": 5630 }, { "epoch": 90.24, "learning_rate": 9.080645161290323e-05, "loss": 0.0949, "step": 5640 }, { "epoch": 90.4, "learning_rate": 8.919354838709677e-05, "loss": 0.0794, "step": 5650 }, { "epoch": 90.56, "learning_rate": 8.758064516129033e-05, "loss": 0.077, "step": 5660 }, { "epoch": 90.72, "learning_rate": 8.596774193548387e-05, "loss": 0.0918, "step": 5670 }, { "epoch": 90.88, "learning_rate": 8.435483870967742e-05, "loss": 0.0993, "step": 5680 }, { "epoch": 90.99, "eval_accuracy": 0.8985343855693348, "eval_loss": 0.423031747341156, "eval_runtime": 29.0485, "eval_samples_per_second": 61.07, "eval_steps_per_second": 0.964, "step": 5687 }, { "epoch": 91.04, "learning_rate": 8.274193548387098e-05, "loss": 0.0744, "step": 5690 }, { "epoch": 91.2, "learning_rate": 8.112903225806452e-05, "loss": 0.0964, "step": 5700 }, { "epoch": 91.36, "learning_rate": 7.951612903225807e-05, "loss": 0.0918, "step": 5710 }, { "epoch": 91.52, "learning_rate": 7.790322580645161e-05, "loss": 0.0899, "step": 5720 }, { "epoch": 91.68, "learning_rate": 7.629032258064517e-05, "loss": 0.091, "step": 5730 }, { "epoch": 91.84, "learning_rate": 7.467741935483871e-05, "loss": 0.0965, "step": 5740 }, { "epoch": 92.0, "learning_rate": 7.306451612903226e-05, "loss": 0.0893, "step": 5750 }, { "epoch": 92.0, "eval_accuracy": 0.8979706877113867, "eval_loss": 0.4228670597076416, "eval_runtime": 28.916, "eval_samples_per_second": 61.35, "eval_steps_per_second": 0.968, "step": 5750 }, { "epoch": 92.16, "learning_rate": 7.145161290322582e-05, "loss": 0.0952, "step": 5760 }, { "epoch": 92.32, "learning_rate": 6.983870967741936e-05, "loss": 0.0878, "step": 5770 }, { "epoch": 92.48, "learning_rate": 6.82258064516129e-05, "loss": 0.0769, "step": 5780 }, { "epoch": 92.64, "learning_rate": 6.661290322580646e-05, "loss": 0.0854, "step": 5790 }, { "epoch": 92.8, "learning_rate": 6.500000000000001e-05, "loss": 0.0816, "step": 5800 }, { "epoch": 92.96, "learning_rate": 6.338709677419355e-05, "loss": 0.0878, "step": 5810 }, { "epoch": 92.99, "eval_accuracy": 0.8985343855693348, "eval_loss": 0.4215339124202728, "eval_runtime": 29.07, "eval_samples_per_second": 61.025, "eval_steps_per_second": 0.963, "step": 5812 }, { "epoch": 93.12, "learning_rate": 6.17741935483871e-05, "loss": 0.0916, "step": 5820 }, { "epoch": 93.28, "learning_rate": 6.016129032258064e-05, "loss": 0.0768, "step": 5830 }, { "epoch": 93.44, "learning_rate": 5.854838709677419e-05, "loss": 0.088, "step": 5840 }, { "epoch": 93.6, "learning_rate": 5.6935483870967744e-05, "loss": 0.0733, "step": 5850 }, { "epoch": 93.76, "learning_rate": 5.532258064516129e-05, "loss": 0.1046, "step": 5860 }, { "epoch": 93.92, "learning_rate": 5.370967741935484e-05, "loss": 0.0882, "step": 5870 }, { "epoch": 94.0, "eval_accuracy": 0.8979706877113867, "eval_loss": 0.42621222138404846, "eval_runtime": 29.148, "eval_samples_per_second": 60.862, "eval_steps_per_second": 0.961, "step": 5875 }, { "epoch": 94.08, "learning_rate": 5.2096774193548385e-05, "loss": 0.0759, "step": 5880 }, { "epoch": 94.24, "learning_rate": 5.0483870967741936e-05, "loss": 0.105, "step": 5890 }, { "epoch": 94.4, "learning_rate": 4.887096774193549e-05, "loss": 0.0882, "step": 5900 }, { "epoch": 94.56, "learning_rate": 4.725806451612903e-05, "loss": 0.0826, "step": 5910 }, { "epoch": 94.72, "learning_rate": 4.5645161290322584e-05, "loss": 0.0725, "step": 5920 }, { "epoch": 94.88, "learning_rate": 4.403225806451613e-05, "loss": 0.0854, "step": 5930 }, { "epoch": 94.99, "eval_accuracy": 0.8974069898534386, "eval_loss": 0.4256262481212616, "eval_runtime": 28.9502, "eval_samples_per_second": 61.278, "eval_steps_per_second": 0.967, "step": 5937 }, { "epoch": 95.04, "learning_rate": 4.241935483870968e-05, "loss": 0.0928, "step": 5940 }, { "epoch": 95.2, "learning_rate": 4.080645161290323e-05, "loss": 0.0837, "step": 5950 }, { "epoch": 95.36, "learning_rate": 3.9193548387096776e-05, "loss": 0.0878, "step": 5960 }, { "epoch": 95.52, "learning_rate": 3.758064516129033e-05, "loss": 0.0809, "step": 5970 }, { "epoch": 95.68, "learning_rate": 3.596774193548387e-05, "loss": 0.0779, "step": 5980 }, { "epoch": 95.84, "learning_rate": 3.435483870967742e-05, "loss": 0.0817, "step": 5990 }, { "epoch": 96.0, "learning_rate": 3.2741935483870974e-05, "loss": 0.0795, "step": 6000 }, { "epoch": 96.0, "eval_accuracy": 0.9007891770011274, "eval_loss": 0.42286553978919983, "eval_runtime": 28.8836, "eval_samples_per_second": 61.419, "eval_steps_per_second": 0.969, "step": 6000 }, { "epoch": 96.16, "learning_rate": 3.112903225806452e-05, "loss": 0.0956, "step": 6010 }, { "epoch": 96.32, "learning_rate": 2.9516129032258063e-05, "loss": 0.0795, "step": 6020 }, { "epoch": 96.48, "learning_rate": 2.7903225806451615e-05, "loss": 0.0884, "step": 6030 }, { "epoch": 96.64, "learning_rate": 2.6290322580645163e-05, "loss": 0.0963, "step": 6040 }, { "epoch": 96.8, "learning_rate": 2.467741935483871e-05, "loss": 0.0844, "step": 6050 }, { "epoch": 96.96, "learning_rate": 2.306451612903226e-05, "loss": 0.0931, "step": 6060 }, { "epoch": 96.99, "eval_accuracy": 0.8990980834272829, "eval_loss": 0.42177480459213257, "eval_runtime": 28.9325, "eval_samples_per_second": 61.315, "eval_steps_per_second": 0.968, "step": 6062 }, { "epoch": 97.12, "learning_rate": 2.1451612903225807e-05, "loss": 0.0921, "step": 6070 }, { "epoch": 97.28, "learning_rate": 1.9838709677419358e-05, "loss": 0.0805, "step": 6080 }, { "epoch": 97.44, "learning_rate": 1.8225806451612903e-05, "loss": 0.0815, "step": 6090 }, { "epoch": 97.6, "learning_rate": 1.661290322580645e-05, "loss": 0.0887, "step": 6100 }, { "epoch": 97.76, "learning_rate": 1.5e-05, "loss": 0.0939, "step": 6110 }, { "epoch": 97.92, "learning_rate": 1.3387096774193548e-05, "loss": 0.0826, "step": 6120 }, { "epoch": 98.0, "eval_accuracy": 0.8985343855693348, "eval_loss": 0.42353904247283936, "eval_runtime": 29.0565, "eval_samples_per_second": 61.053, "eval_steps_per_second": 0.964, "step": 6125 }, { "epoch": 98.08, "learning_rate": 1.1774193548387096e-05, "loss": 0.0768, "step": 6130 }, { "epoch": 98.24, "learning_rate": 1.0161290322580644e-05, "loss": 0.0682, "step": 6140 }, { "epoch": 98.4, "learning_rate": 8.548387096774194e-06, "loss": 0.0767, "step": 6150 }, { "epoch": 98.56, "learning_rate": 6.935483870967742e-06, "loss": 0.081, "step": 6160 }, { "epoch": 98.72, "learning_rate": 5.32258064516129e-06, "loss": 0.0798, "step": 6170 }, { "epoch": 98.88, "learning_rate": 3.709677419354839e-06, "loss": 0.0926, "step": 6180 }, { "epoch": 98.99, "eval_accuracy": 0.8985343855693348, "eval_loss": 0.42374834418296814, "eval_runtime": 28.8905, "eval_samples_per_second": 61.404, "eval_steps_per_second": 0.969, "step": 6187 }, { "epoch": 99.04, "learning_rate": 2.096774193548387e-06, "loss": 0.0832, "step": 6190 }, { "epoch": 99.2, "learning_rate": 4.838709677419355e-07, "loss": 0.0829, "step": 6200 }, { "epoch": 99.2, "eval_accuracy": 0.8985343855693348, "eval_loss": 0.4238373041152954, "eval_runtime": 29.1144, "eval_samples_per_second": 60.932, "eval_steps_per_second": 0.962, "step": 6200 }, { "epoch": 99.2, "step": 6200, "total_flos": 1.2844070249593489e+20, "train_loss": 0.21447962582111357, "train_runtime": 32541.0222, "train_samples_per_second": 49.037, "train_steps_per_second": 0.191 } ], "logging_steps": 10, "max_steps": 6200, "num_train_epochs": 100, "save_steps": 500, "total_flos": 1.2844070249593489e+20, "trial_name": null, "trial_params": null }