|
{ |
|
"best_metric": 0.728110599078341, |
|
"best_model_checkpoint": "videomae-base-finetuned-subset-check10/checkpoint-1960", |
|
"epoch": 39.016216216216215, |
|
"eval_steps": 500, |
|
"global_step": 2220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.504504504504505e-07, |
|
"loss": 1.5494, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.00900900900901e-07, |
|
"loss": 1.5329, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.3513513513513515e-06, |
|
"loss": 1.5277, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.801801801801802e-06, |
|
"loss": 1.5543, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.2522522522522524e-06, |
|
"loss": 1.5175, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.2073732718894009, |
|
"eval_loss": 1.6040809154510498, |
|
"eval_runtime": 183.7832, |
|
"eval_samples_per_second": 1.181, |
|
"eval_steps_per_second": 0.152, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 1.4668, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.1531531531531532e-06, |
|
"loss": 1.4494, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.603603603603604e-06, |
|
"loss": 1.5305, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.0540540540540545e-06, |
|
"loss": 1.4669, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.504504504504505e-06, |
|
"loss": 1.4467, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.954954954954955e-06, |
|
"loss": 1.4397, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_accuracy": 0.3870967741935484, |
|
"eval_loss": 1.4559013843536377, |
|
"eval_runtime": 165.5917, |
|
"eval_samples_per_second": 1.31, |
|
"eval_steps_per_second": 0.169, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.405405405405406e-06, |
|
"loss": 1.3919, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.855855855855856e-06, |
|
"loss": 1.4257, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 6.3063063063063065e-06, |
|
"loss": 1.402, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"loss": 1.3609, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 7.207207207207208e-06, |
|
"loss": 1.464, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.39631336405529954, |
|
"eval_loss": 1.3637099266052246, |
|
"eval_runtime": 165.2932, |
|
"eval_samples_per_second": 1.313, |
|
"eval_steps_per_second": 0.169, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.657657657657658e-06, |
|
"loss": 1.3384, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 8.108108108108109e-06, |
|
"loss": 1.4317, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 8.55855855855856e-06, |
|
"loss": 1.1669, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 9.00900900900901e-06, |
|
"loss": 1.4449, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 9.45945945945946e-06, |
|
"loss": 1.3429, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 9.90990990990991e-06, |
|
"loss": 1.3404, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_accuracy": 0.4470046082949309, |
|
"eval_loss": 1.2467495203018188, |
|
"eval_runtime": 166.7691, |
|
"eval_samples_per_second": 1.301, |
|
"eval_steps_per_second": 0.168, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 9.95995995995996e-06, |
|
"loss": 1.2682, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 9.90990990990991e-06, |
|
"loss": 1.2307, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 9.85985985985986e-06, |
|
"loss": 1.14, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 9.80980980980981e-06, |
|
"loss": 1.091, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 9.75975975975976e-06, |
|
"loss": 1.1822, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 9.70970970970971e-06, |
|
"loss": 1.3284, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.3317972350230415, |
|
"eval_loss": 1.3115123510360718, |
|
"eval_runtime": 165.1237, |
|
"eval_samples_per_second": 1.314, |
|
"eval_steps_per_second": 0.17, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.65965965965966e-06, |
|
"loss": 1.1944, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 9.60960960960961e-06, |
|
"loss": 1.3055, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 9.55955955955956e-06, |
|
"loss": 1.0722, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 9.50950950950951e-06, |
|
"loss": 1.0467, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 9.45945945945946e-06, |
|
"loss": 1.1598, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_accuracy": 0.4470046082949309, |
|
"eval_loss": 1.2488903999328613, |
|
"eval_runtime": 168.3906, |
|
"eval_samples_per_second": 1.289, |
|
"eval_steps_per_second": 0.166, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 9.40940940940941e-06, |
|
"loss": 1.2285, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 9.35935935935936e-06, |
|
"loss": 1.0002, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 9.30930930930931e-06, |
|
"loss": 1.2506, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 9.25925925925926e-06, |
|
"loss": 1.0963, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 9.20920920920921e-06, |
|
"loss": 1.1495, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 9.15915915915916e-06, |
|
"loss": 0.9615, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_accuracy": 0.4009216589861751, |
|
"eval_loss": 1.3057225942611694, |
|
"eval_runtime": 165.4477, |
|
"eval_samples_per_second": 1.312, |
|
"eval_steps_per_second": 0.169, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 9.10910910910911e-06, |
|
"loss": 1.0211, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 9.05905905905906e-06, |
|
"loss": 1.0622, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 9.00900900900901e-06, |
|
"loss": 1.1759, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 8.95895895895896e-06, |
|
"loss": 1.0414, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 8.90890890890891e-06, |
|
"loss": 0.9357, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"eval_accuracy": 0.6497695852534562, |
|
"eval_loss": 0.9200783967971802, |
|
"eval_runtime": 165.3979, |
|
"eval_samples_per_second": 1.312, |
|
"eval_steps_per_second": 0.169, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 8.85885885885886e-06, |
|
"loss": 1.2506, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 8.80880880880881e-06, |
|
"loss": 1.0342, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 8.75875875875876e-06, |
|
"loss": 1.0815, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 8.70870870870871e-06, |
|
"loss": 1.0373, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 8.65865865865866e-06, |
|
"loss": 0.9051, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 8.60860860860861e-06, |
|
"loss": 0.9785, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_accuracy": 0.6774193548387096, |
|
"eval_loss": 0.8629115223884583, |
|
"eval_runtime": 165.2531, |
|
"eval_samples_per_second": 1.313, |
|
"eval_steps_per_second": 0.169, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 8.55855855855856e-06, |
|
"loss": 0.9915, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 8.50850850850851e-06, |
|
"loss": 0.9176, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 8.45845845845846e-06, |
|
"loss": 1.1561, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 8.408408408408409e-06, |
|
"loss": 0.8966, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 8.358358358358359e-06, |
|
"loss": 0.894, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 8.308308308308309e-06, |
|
"loss": 1.0862, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_accuracy": 0.5069124423963134, |
|
"eval_loss": 1.0976766347885132, |
|
"eval_runtime": 165.1522, |
|
"eval_samples_per_second": 1.314, |
|
"eval_steps_per_second": 0.17, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 8.258258258258259e-06, |
|
"loss": 0.7806, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 8.208208208208209e-06, |
|
"loss": 0.9904, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 8.158158158158159e-06, |
|
"loss": 0.9099, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 8.108108108108109e-06, |
|
"loss": 0.9169, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 8.058058058058059e-06, |
|
"loss": 0.9315, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"eval_accuracy": 0.7096774193548387, |
|
"eval_loss": 0.7867635488510132, |
|
"eval_runtime": 165.0158, |
|
"eval_samples_per_second": 1.315, |
|
"eval_steps_per_second": 0.17, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 8.00800800800801e-06, |
|
"loss": 0.7636, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 7.95795795795796e-06, |
|
"loss": 0.9198, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 7.90790790790791e-06, |
|
"loss": 1.0816, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 7.85785785785786e-06, |
|
"loss": 0.8409, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 7.807807807807808e-06, |
|
"loss": 0.7348, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 7.757757757757758e-06, |
|
"loss": 0.9404, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"eval_accuracy": 0.6728110599078341, |
|
"eval_loss": 0.8170290589332581, |
|
"eval_runtime": 165.2251, |
|
"eval_samples_per_second": 1.313, |
|
"eval_steps_per_second": 0.169, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 7.707707707707708e-06, |
|
"loss": 0.8669, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 7.657657657657658e-06, |
|
"loss": 0.7773, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 7.607607607607608e-06, |
|
"loss": 0.7599, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 7.557557557557558e-06, |
|
"loss": 0.9216, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 7.507507507507507e-06, |
|
"loss": 0.939, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"eval_accuracy": 0.663594470046083, |
|
"eval_loss": 0.9246166348457336, |
|
"eval_runtime": 165.7726, |
|
"eval_samples_per_second": 1.309, |
|
"eval_steps_per_second": 0.169, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 7.457457457457457e-06, |
|
"loss": 0.9703, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.6688, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 7.3573573573573575e-06, |
|
"loss": 0.9097, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 7.307307307307308e-06, |
|
"loss": 0.9963, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 7.257257257257258e-06, |
|
"loss": 0.8594, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 7.207207207207208e-06, |
|
"loss": 0.8205, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"eval_accuracy": 0.6866359447004609, |
|
"eval_loss": 0.8420272469520569, |
|
"eval_runtime": 165.0282, |
|
"eval_samples_per_second": 1.315, |
|
"eval_steps_per_second": 0.17, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 7.157157157157158e-06, |
|
"loss": 0.8206, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 7.107107107107107e-06, |
|
"loss": 0.8183, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 7.057057057057057e-06, |
|
"loss": 0.7596, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 7.007007007007007e-06, |
|
"loss": 0.799, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 6.956956956956957e-06, |
|
"loss": 0.7377, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 6.906906906906907e-06, |
|
"loss": 0.6719, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"eval_accuracy": 0.5898617511520737, |
|
"eval_loss": 1.07249116897583, |
|
"eval_runtime": 164.1063, |
|
"eval_samples_per_second": 1.322, |
|
"eval_steps_per_second": 0.171, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 6.856856856856857e-06, |
|
"loss": 0.6874, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 6.8068068068068075e-06, |
|
"loss": 0.8207, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"loss": 0.7454, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 6.706706706706707e-06, |
|
"loss": 0.7188, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 6.656656656656657e-06, |
|
"loss": 0.8308, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"eval_accuracy": 0.6912442396313364, |
|
"eval_loss": 0.8682537078857422, |
|
"eval_runtime": 165.7327, |
|
"eval_samples_per_second": 1.309, |
|
"eval_steps_per_second": 0.169, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 6.606606606606607e-06, |
|
"loss": 0.891, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 6.556556556556557e-06, |
|
"loss": 0.7597, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 6.506506506506507e-06, |
|
"loss": 0.8318, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 6.456456456456457e-06, |
|
"loss": 0.7681, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 6.406406406406407e-06, |
|
"loss": 1.0094, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 6.356356356356357e-06, |
|
"loss": 0.7554, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"eval_accuracy": 0.5990783410138248, |
|
"eval_loss": 0.9684067368507385, |
|
"eval_runtime": 164.5634, |
|
"eval_samples_per_second": 1.319, |
|
"eval_steps_per_second": 0.17, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 6.3063063063063065e-06, |
|
"loss": 0.9087, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 6.2562562562562565e-06, |
|
"loss": 0.7401, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 6.206206206206207e-06, |
|
"loss": 0.7611, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 6.156156156156157e-06, |
|
"loss": 0.6347, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 6.106106106106107e-06, |
|
"loss": 0.6962, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"eval_accuracy": 0.5483870967741935, |
|
"eval_loss": 1.1106446981430054, |
|
"eval_runtime": 165.0022, |
|
"eval_samples_per_second": 1.315, |
|
"eval_steps_per_second": 0.17, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 6.056056056056057e-06, |
|
"loss": 0.6861, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 6.006006006006007e-06, |
|
"loss": 0.7999, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 5.955955955955957e-06, |
|
"loss": 0.7412, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 5.905905905905906e-06, |
|
"loss": 0.8145, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 5.855855855855856e-06, |
|
"loss": 0.6653, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 5.805805805805806e-06, |
|
"loss": 0.7995, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"eval_accuracy": 0.6497695852534562, |
|
"eval_loss": 0.9750950336456299, |
|
"eval_runtime": 163.7137, |
|
"eval_samples_per_second": 1.325, |
|
"eval_steps_per_second": 0.171, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 5.755755755755756e-06, |
|
"loss": 0.6923, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 5.7057057057057065e-06, |
|
"loss": 0.7874, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 5.6556556556556565e-06, |
|
"loss": 0.728, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 5.605605605605607e-06, |
|
"loss": 0.6329, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.8939, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 5.505505505505506e-06, |
|
"loss": 0.8298, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"eval_accuracy": 0.5299539170506913, |
|
"eval_loss": 1.0630956888198853, |
|
"eval_runtime": 165.0711, |
|
"eval_samples_per_second": 1.315, |
|
"eval_steps_per_second": 0.17, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 5.455455455455456e-06, |
|
"loss": 0.7025, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 5.405405405405406e-06, |
|
"loss": 0.6251, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 5.355355355355356e-06, |
|
"loss": 0.7818, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 5.305305305305306e-06, |
|
"loss": 0.8259, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 5.255255255255256e-06, |
|
"loss": 0.6607, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 20.03, |
|
"eval_accuracy": 0.6175115207373272, |
|
"eval_loss": 0.9457883834838867, |
|
"eval_runtime": 164.6389, |
|
"eval_samples_per_second": 1.318, |
|
"eval_steps_per_second": 0.17, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 5.205205205205206e-06, |
|
"loss": 0.6344, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 5.155155155155156e-06, |
|
"loss": 0.637, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 5.105105105105106e-06, |
|
"loss": 0.858, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 5.055055055055056e-06, |
|
"loss": 0.6934, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 5.005005005005006e-06, |
|
"loss": 0.5714, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 4.954954954954955e-06, |
|
"loss": 0.688, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 21.03, |
|
"eval_accuracy": 0.6036866359447005, |
|
"eval_loss": 1.029563546180725, |
|
"eval_runtime": 163.856, |
|
"eval_samples_per_second": 1.324, |
|
"eval_steps_per_second": 0.171, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 4.904904904904905e-06, |
|
"loss": 0.5114, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 4.854854854854855e-06, |
|
"loss": 0.6638, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 4.804804804804805e-06, |
|
"loss": 0.7833, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 4.754754754754755e-06, |
|
"loss": 0.7556, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 4.704704704704705e-06, |
|
"loss": 0.5835, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"eval_accuracy": 0.6774193548387096, |
|
"eval_loss": 0.8947747349739075, |
|
"eval_runtime": 163.875, |
|
"eval_samples_per_second": 1.324, |
|
"eval_steps_per_second": 0.171, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 4.654654654654655e-06, |
|
"loss": 0.7209, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 4.604604604604605e-06, |
|
"loss": 0.6087, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 4.554554554554555e-06, |
|
"loss": 0.6529, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 4.504504504504505e-06, |
|
"loss": 0.7221, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 4.454454454454455e-06, |
|
"loss": 0.6167, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 4.404404404404405e-06, |
|
"loss": 0.6987, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"eval_accuracy": 0.7188940092165899, |
|
"eval_loss": 0.7882533669471741, |
|
"eval_runtime": 163.4201, |
|
"eval_samples_per_second": 1.328, |
|
"eval_steps_per_second": 0.171, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 4.354354354354355e-06, |
|
"loss": 0.7234, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 4.304304304304305e-06, |
|
"loss": 0.808, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 4.254254254254255e-06, |
|
"loss": 0.6323, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 4.204204204204204e-06, |
|
"loss": 0.5463, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 4.154154154154154e-06, |
|
"loss": 0.7978, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 24.03, |
|
"learning_rate": 4.1041041041041045e-06, |
|
"loss": 0.4979, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 24.03, |
|
"eval_accuracy": 0.7188940092165899, |
|
"eval_loss": 0.7089133262634277, |
|
"eval_runtime": 164.3791, |
|
"eval_samples_per_second": 1.32, |
|
"eval_steps_per_second": 0.17, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 4.0540540540540545e-06, |
|
"loss": 0.4685, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 4.004004004004005e-06, |
|
"loss": 0.665, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 3.953953953953955e-06, |
|
"loss": 0.6872, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 3.903903903903904e-06, |
|
"loss": 0.5854, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 3.853853853853854e-06, |
|
"loss": 0.6163, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 25.03, |
|
"eval_accuracy": 0.7235023041474654, |
|
"eval_loss": 0.7633541226387024, |
|
"eval_runtime": 163.5735, |
|
"eval_samples_per_second": 1.327, |
|
"eval_steps_per_second": 0.171, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 3.803803803803804e-06, |
|
"loss": 0.7065, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 3.7537537537537537e-06, |
|
"loss": 0.6684, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.786, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 3.653653653653654e-06, |
|
"loss": 0.437, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 3.603603603603604e-06, |
|
"loss": 0.6708, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 3.5535535535535535e-06, |
|
"loss": 0.6754, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 26.03, |
|
"eval_accuracy": 0.6359447004608295, |
|
"eval_loss": 0.9443588852882385, |
|
"eval_runtime": 163.9867, |
|
"eval_samples_per_second": 1.323, |
|
"eval_steps_per_second": 0.171, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 3.5035035035035036e-06, |
|
"loss": 0.7284, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 3.4534534534534537e-06, |
|
"loss": 0.5413, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 3.4034034034034037e-06, |
|
"loss": 0.6884, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 3.3533533533533534e-06, |
|
"loss": 0.5541, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 3.3033033033033035e-06, |
|
"loss": 0.6673, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"eval_accuracy": 0.6543778801843319, |
|
"eval_loss": 0.8390823602676392, |
|
"eval_runtime": 164.572, |
|
"eval_samples_per_second": 1.319, |
|
"eval_steps_per_second": 0.17, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 3.2532532532532535e-06, |
|
"loss": 0.6895, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 3.2032032032032036e-06, |
|
"loss": 0.553, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 3.1531531531531532e-06, |
|
"loss": 0.4311, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 3.1031031031031033e-06, |
|
"loss": 0.6969, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 3.0530530530530534e-06, |
|
"loss": 0.7507, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 3.0030030030030034e-06, |
|
"loss": 0.4924, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 28.03, |
|
"eval_accuracy": 0.6682027649769585, |
|
"eval_loss": 0.8288503885269165, |
|
"eval_runtime": 164.6437, |
|
"eval_samples_per_second": 1.318, |
|
"eval_steps_per_second": 0.17, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2.952952952952953e-06, |
|
"loss": 0.7695, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.902902902902903e-06, |
|
"loss": 0.5981, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 2.8528528528528532e-06, |
|
"loss": 0.613, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 2.8028028028028033e-06, |
|
"loss": 0.5841, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 2.752752752752753e-06, |
|
"loss": 0.7704, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 0.6438, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"eval_accuracy": 0.6129032258064516, |
|
"eval_loss": 0.9605286121368408, |
|
"eval_runtime": 164.5062, |
|
"eval_samples_per_second": 1.319, |
|
"eval_steps_per_second": 0.17, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2.652652652652653e-06, |
|
"loss": 0.583, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.602602602602603e-06, |
|
"loss": 0.7539, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.552552552552553e-06, |
|
"loss": 0.5418, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"learning_rate": 2.502502502502503e-06, |
|
"loss": 0.6689, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"learning_rate": 2.4524524524524525e-06, |
|
"loss": 0.5714, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 30.03, |
|
"eval_accuracy": 0.6451612903225806, |
|
"eval_loss": 0.8838080763816833, |
|
"eval_runtime": 163.4686, |
|
"eval_samples_per_second": 1.327, |
|
"eval_steps_per_second": 0.171, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2.4024024024024026e-06, |
|
"loss": 0.4737, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.3523523523523527e-06, |
|
"loss": 0.6563, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.3023023023023023e-06, |
|
"loss": 0.4469, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"learning_rate": 2.2522522522522524e-06, |
|
"loss": 0.5655, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"learning_rate": 2.2022022022022024e-06, |
|
"loss": 0.5421, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"learning_rate": 2.1521521521521525e-06, |
|
"loss": 0.6726, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 31.03, |
|
"eval_accuracy": 0.6589861751152074, |
|
"eval_loss": 0.8412278890609741, |
|
"eval_runtime": 164.9282, |
|
"eval_samples_per_second": 1.316, |
|
"eval_steps_per_second": 0.17, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2.102102102102102e-06, |
|
"loss": 0.7254, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 2.0520520520520522e-06, |
|
"loss": 0.5397, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 2.0020020020020023e-06, |
|
"loss": 0.596, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 32.02, |
|
"learning_rate": 1.951951951951952e-06, |
|
"loss": 0.5603, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 32.02, |
|
"learning_rate": 1.901901901901902e-06, |
|
"loss": 0.5027, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 32.03, |
|
"eval_accuracy": 0.6728110599078341, |
|
"eval_loss": 0.8439391255378723, |
|
"eval_runtime": 168.1229, |
|
"eval_samples_per_second": 1.291, |
|
"eval_steps_per_second": 0.167, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.6769, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 1.801801801801802e-06, |
|
"loss": 0.565, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 1.7517517517517518e-06, |
|
"loss": 0.556, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 1.7017017017017019e-06, |
|
"loss": 0.4187, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 1.6516516516516517e-06, |
|
"loss": 0.4718, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 1.6016016016016018e-06, |
|
"loss": 0.4649, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 33.03, |
|
"eval_accuracy": 0.6267281105990783, |
|
"eval_loss": 0.9524617791175842, |
|
"eval_runtime": 169.6407, |
|
"eval_samples_per_second": 1.279, |
|
"eval_steps_per_second": 0.165, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 1.5515515515515517e-06, |
|
"loss": 0.4505, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 1.5015015015015017e-06, |
|
"loss": 0.5412, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 1.4514514514514516e-06, |
|
"loss": 0.5931, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 1.4014014014014016e-06, |
|
"loss": 0.4183, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 1.3513513513513515e-06, |
|
"loss": 0.6046, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 34.03, |
|
"learning_rate": 1.3013013013013016e-06, |
|
"loss": 0.6625, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 34.03, |
|
"eval_accuracy": 0.728110599078341, |
|
"eval_loss": 0.7850246429443359, |
|
"eval_runtime": 164.4991, |
|
"eval_samples_per_second": 1.319, |
|
"eval_steps_per_second": 0.17, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 1.2512512512512514e-06, |
|
"loss": 0.5967, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 1.2012012012012013e-06, |
|
"loss": 0.5503, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 1.1511511511511512e-06, |
|
"loss": 0.5159, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"learning_rate": 1.1011011011011012e-06, |
|
"loss": 0.4377, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 35.02, |
|
"learning_rate": 1.051051051051051e-06, |
|
"loss": 0.5793, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"eval_accuracy": 0.6728110599078341, |
|
"eval_loss": 0.8481199741363525, |
|
"eval_runtime": 164.3359, |
|
"eval_samples_per_second": 1.32, |
|
"eval_steps_per_second": 0.17, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 1.0010010010010011e-06, |
|
"loss": 0.7009, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 9.50950950950951e-07, |
|
"loss": 0.5902, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 9.00900900900901e-07, |
|
"loss": 0.5366, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"learning_rate": 8.508508508508509e-07, |
|
"loss": 0.5119, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"learning_rate": 8.008008008008009e-07, |
|
"loss": 0.4946, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"learning_rate": 7.507507507507509e-07, |
|
"loss": 0.6411, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 36.03, |
|
"eval_accuracy": 0.6589861751152074, |
|
"eval_loss": 0.8842198252677917, |
|
"eval_runtime": 164.6218, |
|
"eval_samples_per_second": 1.318, |
|
"eval_steps_per_second": 0.17, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 7.007007007007008e-07, |
|
"loss": 0.5172, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 6.506506506506508e-07, |
|
"loss": 0.3986, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 6.006006006006006e-07, |
|
"loss": 0.4807, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"learning_rate": 5.505505505505506e-07, |
|
"loss": 0.4681, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"learning_rate": 5.005005005005006e-07, |
|
"loss": 0.6592, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 37.03, |
|
"eval_accuracy": 0.6912442396313364, |
|
"eval_loss": 0.802787184715271, |
|
"eval_runtime": 163.9059, |
|
"eval_samples_per_second": 1.324, |
|
"eval_steps_per_second": 0.171, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 4.504504504504505e-07, |
|
"loss": 0.5769, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 4.0040040040040045e-07, |
|
"loss": 0.6323, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 3.503503503503504e-07, |
|
"loss": 0.5319, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 3.003003003003003e-07, |
|
"loss": 0.6306, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"learning_rate": 2.502502502502503e-07, |
|
"loss": 0.6192, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 38.02, |
|
"learning_rate": 2.0020020020020022e-07, |
|
"loss": 0.5524, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 38.03, |
|
"eval_accuracy": 0.6866359447004609, |
|
"eval_loss": 0.821592390537262, |
|
"eval_runtime": 165.1926, |
|
"eval_samples_per_second": 1.314, |
|
"eval_steps_per_second": 0.169, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 1.5015015015015016e-07, |
|
"loss": 0.5701, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.0010010010010011e-07, |
|
"loss": 0.5891, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 5.0050050050050056e-08, |
|
"loss": 0.5807, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"learning_rate": 0.0, |
|
"loss": 0.5697, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"eval_accuracy": 0.6774193548387096, |
|
"eval_loss": 0.833946943283081, |
|
"eval_runtime": 164.2083, |
|
"eval_samples_per_second": 1.321, |
|
"eval_steps_per_second": 0.171, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"step": 2220, |
|
"total_flos": 2.208334746599719e+19, |
|
"train_loss": 0.8170982454274152, |
|
"train_runtime": 23121.8467, |
|
"train_samples_per_second": 0.768, |
|
"train_steps_per_second": 0.096 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"eval_accuracy": 0.6342592592592593, |
|
"eval_loss": 0.8682467341423035, |
|
"eval_runtime": 165.599, |
|
"eval_samples_per_second": 1.304, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"eval_accuracy": 0.6342592592592593, |
|
"eval_loss": 0.8682467341423035, |
|
"eval_runtime": 165.9338, |
|
"eval_samples_per_second": 1.302, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2220 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2220, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 2.208334746599719e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|