|
{ |
|
"best_metric": 0.5358908772468567, |
|
"best_model_checkpoint": "./vit-brain-tumour-v2/checkpoint-200", |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 780, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05128205128205128, |
|
"grad_norm": 1.8319804668426514, |
|
"learning_rate": 0.00019743589743589744, |
|
"loss": 0.8818, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10256410256410256, |
|
"grad_norm": 0.8907442092895508, |
|
"learning_rate": 0.00019487179487179487, |
|
"loss": 0.3732, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 0.5684014558792114, |
|
"learning_rate": 0.00019230769230769233, |
|
"loss": 0.2491, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20512820512820512, |
|
"grad_norm": 0.4306720495223999, |
|
"learning_rate": 0.00018974358974358974, |
|
"loss": 0.1864, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 5.443202018737793, |
|
"learning_rate": 0.0001871794871794872, |
|
"loss": 0.2069, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 0.45025336742401123, |
|
"learning_rate": 0.00018461538461538463, |
|
"loss": 0.2037, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.358974358974359, |
|
"grad_norm": 0.8258907198905945, |
|
"learning_rate": 0.00018205128205128207, |
|
"loss": 0.1452, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.41025641025641024, |
|
"grad_norm": 0.5631661415100098, |
|
"learning_rate": 0.0001794871794871795, |
|
"loss": 0.2959, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 0.5860559940338135, |
|
"learning_rate": 0.00017692307692307693, |
|
"loss": 0.0992, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 3.4091548919677734, |
|
"learning_rate": 0.00017435897435897436, |
|
"loss": 0.1236, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"eval_accuracy": 0.8481481481481481, |
|
"eval_loss": 0.5989510416984558, |
|
"eval_runtime": 15.9913, |
|
"eval_samples_per_second": 50.652, |
|
"eval_steps_per_second": 6.378, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5641025641025641, |
|
"grad_norm": 1.3914088010787964, |
|
"learning_rate": 0.0001717948717948718, |
|
"loss": 0.1454, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 2.7670581340789795, |
|
"learning_rate": 0.00016923076923076923, |
|
"loss": 0.1344, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.14079055190086365, |
|
"learning_rate": 0.0001666666666666667, |
|
"loss": 0.2146, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.717948717948718, |
|
"grad_norm": 0.2149924784898758, |
|
"learning_rate": 0.0001641025641025641, |
|
"loss": 0.1724, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 4.1687703132629395, |
|
"learning_rate": 0.00016153846153846155, |
|
"loss": 0.1342, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8205128205128205, |
|
"grad_norm": 0.09244555979967117, |
|
"learning_rate": 0.00015897435897435896, |
|
"loss": 0.1174, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8717948717948718, |
|
"grad_norm": 3.8858108520507812, |
|
"learning_rate": 0.00015641025641025642, |
|
"loss": 0.1445, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 0.13063883781433105, |
|
"learning_rate": 0.00015384615384615385, |
|
"loss": 0.086, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9743589743589743, |
|
"grad_norm": 0.4698440432548523, |
|
"learning_rate": 0.00015128205128205128, |
|
"loss": 0.0978, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"grad_norm": 0.7778797149658203, |
|
"learning_rate": 0.00014871794871794872, |
|
"loss": 0.1695, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"eval_accuracy": 0.8703703703703703, |
|
"eval_loss": 0.5358908772468567, |
|
"eval_runtime": 15.8447, |
|
"eval_samples_per_second": 51.121, |
|
"eval_steps_per_second": 6.437, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0769230769230769, |
|
"grad_norm": 0.09852629154920578, |
|
"learning_rate": 0.00014615384615384615, |
|
"loss": 0.0657, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1282051282051282, |
|
"grad_norm": 3.54819655418396, |
|
"learning_rate": 0.0001435897435897436, |
|
"loss": 0.0461, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1794871794871795, |
|
"grad_norm": 0.0645141452550888, |
|
"learning_rate": 0.00014102564102564104, |
|
"loss": 0.0754, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.2307692307692308, |
|
"grad_norm": 0.2788432240486145, |
|
"learning_rate": 0.00013846153846153847, |
|
"loss": 0.0592, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.282051282051282, |
|
"grad_norm": 0.9937513470649719, |
|
"learning_rate": 0.0001358974358974359, |
|
"loss": 0.0509, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 1.003893494606018, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.0319, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3846153846153846, |
|
"grad_norm": 0.04893897473812103, |
|
"learning_rate": 0.00013076923076923077, |
|
"loss": 0.0348, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.435897435897436, |
|
"grad_norm": 1.5978337526321411, |
|
"learning_rate": 0.00012820512820512823, |
|
"loss": 0.0434, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4871794871794872, |
|
"grad_norm": 0.044942498207092285, |
|
"learning_rate": 0.00012564102564102564, |
|
"loss": 0.0787, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 0.04497215896844864, |
|
"learning_rate": 0.0001230769230769231, |
|
"loss": 0.0186, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"eval_accuracy": 0.8975308641975308, |
|
"eval_loss": 0.570486843585968, |
|
"eval_runtime": 16.7695, |
|
"eval_samples_per_second": 48.302, |
|
"eval_steps_per_second": 6.082, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5897435897435899, |
|
"grad_norm": 0.03568781167268753, |
|
"learning_rate": 0.00012051282051282052, |
|
"loss": 0.0339, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.641025641025641, |
|
"grad_norm": 0.034062109887599945, |
|
"learning_rate": 0.00011794871794871796, |
|
"loss": 0.0072, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.6923076923076923, |
|
"grad_norm": 0.03320033475756645, |
|
"learning_rate": 0.00011538461538461538, |
|
"loss": 0.0062, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.7435897435897436, |
|
"grad_norm": 0.03138517960906029, |
|
"learning_rate": 0.00011282051282051283, |
|
"loss": 0.0063, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7948717948717947, |
|
"grad_norm": 0.04579005390405655, |
|
"learning_rate": 0.00011025641025641027, |
|
"loss": 0.0064, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"grad_norm": 0.02792205847799778, |
|
"learning_rate": 0.0001076923076923077, |
|
"loss": 0.0076, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.8974358974358974, |
|
"grad_norm": 0.03118390031158924, |
|
"learning_rate": 0.00010512820512820514, |
|
"loss": 0.0068, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.9487179487179487, |
|
"grad_norm": 0.02970140054821968, |
|
"learning_rate": 0.00010256410256410256, |
|
"loss": 0.006, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.09769738465547562, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0374, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.051282051282051, |
|
"grad_norm": 0.09561528265476227, |
|
"learning_rate": 9.743589743589744e-05, |
|
"loss": 0.0368, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.051282051282051, |
|
"eval_accuracy": 0.8975308641975308, |
|
"eval_loss": 0.6135894656181335, |
|
"eval_runtime": 16.0452, |
|
"eval_samples_per_second": 50.482, |
|
"eval_steps_per_second": 6.357, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.1025641025641026, |
|
"grad_norm": 0.02449757605791092, |
|
"learning_rate": 9.487179487179487e-05, |
|
"loss": 0.0051, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.1538461538461537, |
|
"grad_norm": 0.02481868490576744, |
|
"learning_rate": 9.230769230769232e-05, |
|
"loss": 0.0044, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.2051282051282053, |
|
"grad_norm": 0.02576032653450966, |
|
"learning_rate": 8.974358974358975e-05, |
|
"loss": 0.0044, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.2564102564102564, |
|
"grad_norm": 0.026232751086354256, |
|
"learning_rate": 8.717948717948718e-05, |
|
"loss": 0.0043, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 0.02455274388194084, |
|
"learning_rate": 8.461538461538461e-05, |
|
"loss": 0.0039, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.358974358974359, |
|
"grad_norm": 0.02121666818857193, |
|
"learning_rate": 8.205128205128205e-05, |
|
"loss": 0.007, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.41025641025641, |
|
"grad_norm": 0.022693779319524765, |
|
"learning_rate": 7.948717948717948e-05, |
|
"loss": 0.0045, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.4615384615384617, |
|
"grad_norm": 0.8521128296852112, |
|
"learning_rate": 7.692307692307693e-05, |
|
"loss": 0.0044, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.5128205128205128, |
|
"grad_norm": 0.01888178288936615, |
|
"learning_rate": 7.435897435897436e-05, |
|
"loss": 0.0117, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.564102564102564, |
|
"grad_norm": 0.026412444189190865, |
|
"learning_rate": 7.17948717948718e-05, |
|
"loss": 0.0036, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.564102564102564, |
|
"eval_accuracy": 0.9012345679012346, |
|
"eval_loss": 0.6121538877487183, |
|
"eval_runtime": 16.7144, |
|
"eval_samples_per_second": 48.461, |
|
"eval_steps_per_second": 6.103, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.6153846153846154, |
|
"grad_norm": 0.0200443584471941, |
|
"learning_rate": 6.923076923076924e-05, |
|
"loss": 0.0037, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.021741095930337906, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0162, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.717948717948718, |
|
"grad_norm": 1.7510725259780884, |
|
"learning_rate": 6.410256410256412e-05, |
|
"loss": 0.0044, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.769230769230769, |
|
"grad_norm": 0.018649321049451828, |
|
"learning_rate": 6.153846153846155e-05, |
|
"loss": 0.0034, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.8205128205128203, |
|
"grad_norm": 0.018490077927708626, |
|
"learning_rate": 5.897435897435898e-05, |
|
"loss": 0.0032, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.871794871794872, |
|
"grad_norm": 0.018017606809735298, |
|
"learning_rate": 5.6410256410256414e-05, |
|
"loss": 0.003, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.9230769230769234, |
|
"grad_norm": 0.01674257032573223, |
|
"learning_rate": 5.384615384615385e-05, |
|
"loss": 0.0031, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.9743589743589745, |
|
"grad_norm": 0.017541637644171715, |
|
"learning_rate": 5.128205128205128e-05, |
|
"loss": 0.0031, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.0256410256410255, |
|
"grad_norm": 0.016599087044596672, |
|
"learning_rate": 4.871794871794872e-05, |
|
"loss": 0.0029, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 0.017430290579795837, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 0.0029, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"eval_accuracy": 0.9024691358024691, |
|
"eval_loss": 0.6067318320274353, |
|
"eval_runtime": 16.2775, |
|
"eval_samples_per_second": 49.762, |
|
"eval_steps_per_second": 6.266, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.128205128205128, |
|
"grad_norm": 0.016984164714813232, |
|
"learning_rate": 4.358974358974359e-05, |
|
"loss": 0.003, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.1794871794871793, |
|
"grad_norm": 0.01852291077375412, |
|
"learning_rate": 4.1025641025641023e-05, |
|
"loss": 0.0028, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.230769230769231, |
|
"grad_norm": 0.016155986115336418, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.0066, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.282051282051282, |
|
"grad_norm": 0.015048054046928883, |
|
"learning_rate": 3.58974358974359e-05, |
|
"loss": 0.0027, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.014590720646083355, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0028, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.3846153846153846, |
|
"grad_norm": 0.015942472964525223, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 0.0027, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.435897435897436, |
|
"grad_norm": 0.015167794190347195, |
|
"learning_rate": 2.8205128205128207e-05, |
|
"loss": 0.0031, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.4871794871794872, |
|
"grad_norm": 0.015961118042469025, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.0027, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.5384615384615383, |
|
"grad_norm": 0.01679452508687973, |
|
"learning_rate": 2.307692307692308e-05, |
|
"loss": 0.0029, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.58974358974359, |
|
"grad_norm": 0.014717783778905869, |
|
"learning_rate": 2.0512820512820512e-05, |
|
"loss": 0.0027, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.58974358974359, |
|
"eval_accuracy": 0.9024691358024691, |
|
"eval_loss": 0.644864022731781, |
|
"eval_runtime": 16.2975, |
|
"eval_samples_per_second": 49.701, |
|
"eval_steps_per_second": 6.259, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.641025641025641, |
|
"grad_norm": 0.01574333943426609, |
|
"learning_rate": 1.794871794871795e-05, |
|
"loss": 0.0028, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.6923076923076925, |
|
"grad_norm": 0.014611059799790382, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 0.0029, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.7435897435897436, |
|
"grad_norm": 0.014431651681661606, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.0026, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.7948717948717947, |
|
"grad_norm": 0.015641087666153908, |
|
"learning_rate": 1.0256410256410256e-05, |
|
"loss": 0.0025, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"grad_norm": 0.021887468174099922, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 0.0027, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.8974358974358974, |
|
"grad_norm": 0.015822410583496094, |
|
"learning_rate": 5.128205128205128e-06, |
|
"loss": 0.0025, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.948717948717949, |
|
"grad_norm": 0.015171283856034279, |
|
"learning_rate": 2.564102564102564e-06, |
|
"loss": 0.0026, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.0153021439909935, |
|
"learning_rate": 0.0, |
|
"loss": 0.0028, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 780, |
|
"total_flos": 9.636990200991498e+17, |
|
"train_loss": 0.06401206638472967, |
|
"train_runtime": 946.5564, |
|
"train_samples_per_second": 13.138, |
|
"train_steps_per_second": 0.824 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 780, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.636990200991498e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|