{ "best_metric": 0.5358908772468567, "best_model_checkpoint": "./vit-brain-tumour-v2/checkpoint-200", "epoch": 4.0, "eval_steps": 100, "global_step": 780, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05128205128205128, "grad_norm": 1.8319804668426514, "learning_rate": 0.00019743589743589744, "loss": 0.8818, "step": 10 }, { "epoch": 0.10256410256410256, "grad_norm": 0.8907442092895508, "learning_rate": 0.00019487179487179487, "loss": 0.3732, "step": 20 }, { "epoch": 0.15384615384615385, "grad_norm": 0.5684014558792114, "learning_rate": 0.00019230769230769233, "loss": 0.2491, "step": 30 }, { "epoch": 0.20512820512820512, "grad_norm": 0.4306720495223999, "learning_rate": 0.00018974358974358974, "loss": 0.1864, "step": 40 }, { "epoch": 0.2564102564102564, "grad_norm": 5.443202018737793, "learning_rate": 0.0001871794871794872, "loss": 0.2069, "step": 50 }, { "epoch": 0.3076923076923077, "grad_norm": 0.45025336742401123, "learning_rate": 0.00018461538461538463, "loss": 0.2037, "step": 60 }, { "epoch": 0.358974358974359, "grad_norm": 0.8258907198905945, "learning_rate": 0.00018205128205128207, "loss": 0.1452, "step": 70 }, { "epoch": 0.41025641025641024, "grad_norm": 0.5631661415100098, "learning_rate": 0.0001794871794871795, "loss": 0.2959, "step": 80 }, { "epoch": 0.46153846153846156, "grad_norm": 0.5860559940338135, "learning_rate": 0.00017692307692307693, "loss": 0.0992, "step": 90 }, { "epoch": 0.5128205128205128, "grad_norm": 3.4091548919677734, "learning_rate": 0.00017435897435897436, "loss": 0.1236, "step": 100 }, { "epoch": 0.5128205128205128, "eval_accuracy": 0.8481481481481481, "eval_loss": 0.5989510416984558, "eval_runtime": 15.9913, "eval_samples_per_second": 50.652, "eval_steps_per_second": 6.378, "step": 100 }, { "epoch": 0.5641025641025641, "grad_norm": 1.3914088010787964, "learning_rate": 0.0001717948717948718, "loss": 0.1454, "step": 110 }, { "epoch": 0.6153846153846154, "grad_norm": 2.7670581340789795, "learning_rate": 0.00016923076923076923, "loss": 0.1344, "step": 120 }, { "epoch": 0.6666666666666666, "grad_norm": 0.14079055190086365, "learning_rate": 0.0001666666666666667, "loss": 0.2146, "step": 130 }, { "epoch": 0.717948717948718, "grad_norm": 0.2149924784898758, "learning_rate": 0.0001641025641025641, "loss": 0.1724, "step": 140 }, { "epoch": 0.7692307692307693, "grad_norm": 4.1687703132629395, "learning_rate": 0.00016153846153846155, "loss": 0.1342, "step": 150 }, { "epoch": 0.8205128205128205, "grad_norm": 0.09244555979967117, "learning_rate": 0.00015897435897435896, "loss": 0.1174, "step": 160 }, { "epoch": 0.8717948717948718, "grad_norm": 3.8858108520507812, "learning_rate": 0.00015641025641025642, "loss": 0.1445, "step": 170 }, { "epoch": 0.9230769230769231, "grad_norm": 0.13063883781433105, "learning_rate": 0.00015384615384615385, "loss": 0.086, "step": 180 }, { "epoch": 0.9743589743589743, "grad_norm": 0.4698440432548523, "learning_rate": 0.00015128205128205128, "loss": 0.0978, "step": 190 }, { "epoch": 1.0256410256410255, "grad_norm": 0.7778797149658203, "learning_rate": 0.00014871794871794872, "loss": 0.1695, "step": 200 }, { "epoch": 1.0256410256410255, "eval_accuracy": 0.8703703703703703, "eval_loss": 0.5358908772468567, "eval_runtime": 15.8447, "eval_samples_per_second": 51.121, "eval_steps_per_second": 6.437, "step": 200 }, { "epoch": 1.0769230769230769, "grad_norm": 0.09852629154920578, "learning_rate": 0.00014615384615384615, "loss": 0.0657, "step": 210 }, { "epoch": 1.1282051282051282, "grad_norm": 3.54819655418396, "learning_rate": 0.0001435897435897436, "loss": 0.0461, "step": 220 }, { "epoch": 1.1794871794871795, "grad_norm": 0.0645141452550888, "learning_rate": 0.00014102564102564104, "loss": 0.0754, "step": 230 }, { "epoch": 1.2307692307692308, "grad_norm": 0.2788432240486145, "learning_rate": 0.00013846153846153847, "loss": 0.0592, "step": 240 }, { "epoch": 1.282051282051282, "grad_norm": 0.9937513470649719, "learning_rate": 0.0001358974358974359, "loss": 0.0509, "step": 250 }, { "epoch": 1.3333333333333333, "grad_norm": 1.003893494606018, "learning_rate": 0.00013333333333333334, "loss": 0.0319, "step": 260 }, { "epoch": 1.3846153846153846, "grad_norm": 0.04893897473812103, "learning_rate": 0.00013076923076923077, "loss": 0.0348, "step": 270 }, { "epoch": 1.435897435897436, "grad_norm": 1.5978337526321411, "learning_rate": 0.00012820512820512823, "loss": 0.0434, "step": 280 }, { "epoch": 1.4871794871794872, "grad_norm": 0.044942498207092285, "learning_rate": 0.00012564102564102564, "loss": 0.0787, "step": 290 }, { "epoch": 1.5384615384615383, "grad_norm": 0.04497215896844864, "learning_rate": 0.0001230769230769231, "loss": 0.0186, "step": 300 }, { "epoch": 1.5384615384615383, "eval_accuracy": 0.8975308641975308, "eval_loss": 0.570486843585968, "eval_runtime": 16.7695, "eval_samples_per_second": 48.302, "eval_steps_per_second": 6.082, "step": 300 }, { "epoch": 1.5897435897435899, "grad_norm": 0.03568781167268753, "learning_rate": 0.00012051282051282052, "loss": 0.0339, "step": 310 }, { "epoch": 1.641025641025641, "grad_norm": 0.034062109887599945, "learning_rate": 0.00011794871794871796, "loss": 0.0072, "step": 320 }, { "epoch": 1.6923076923076923, "grad_norm": 0.03320033475756645, "learning_rate": 0.00011538461538461538, "loss": 0.0062, "step": 330 }, { "epoch": 1.7435897435897436, "grad_norm": 0.03138517960906029, "learning_rate": 0.00011282051282051283, "loss": 0.0063, "step": 340 }, { "epoch": 1.7948717948717947, "grad_norm": 0.04579005390405655, "learning_rate": 0.00011025641025641027, "loss": 0.0064, "step": 350 }, { "epoch": 1.8461538461538463, "grad_norm": 0.02792205847799778, "learning_rate": 0.0001076923076923077, "loss": 0.0076, "step": 360 }, { "epoch": 1.8974358974358974, "grad_norm": 0.03118390031158924, "learning_rate": 0.00010512820512820514, "loss": 0.0068, "step": 370 }, { "epoch": 1.9487179487179487, "grad_norm": 0.02970140054821968, "learning_rate": 0.00010256410256410256, "loss": 0.006, "step": 380 }, { "epoch": 2.0, "grad_norm": 0.09769738465547562, "learning_rate": 0.0001, "loss": 0.0374, "step": 390 }, { "epoch": 2.051282051282051, "grad_norm": 0.09561528265476227, "learning_rate": 9.743589743589744e-05, "loss": 0.0368, "step": 400 }, { "epoch": 2.051282051282051, "eval_accuracy": 0.8975308641975308, "eval_loss": 0.6135894656181335, "eval_runtime": 16.0452, "eval_samples_per_second": 50.482, "eval_steps_per_second": 6.357, "step": 400 }, { "epoch": 2.1025641025641026, "grad_norm": 0.02449757605791092, "learning_rate": 9.487179487179487e-05, "loss": 0.0051, "step": 410 }, { "epoch": 2.1538461538461537, "grad_norm": 0.02481868490576744, "learning_rate": 9.230769230769232e-05, "loss": 0.0044, "step": 420 }, { "epoch": 2.2051282051282053, "grad_norm": 0.02576032653450966, "learning_rate": 8.974358974358975e-05, "loss": 0.0044, "step": 430 }, { "epoch": 2.2564102564102564, "grad_norm": 0.026232751086354256, "learning_rate": 8.717948717948718e-05, "loss": 0.0043, "step": 440 }, { "epoch": 2.3076923076923075, "grad_norm": 0.02455274388194084, "learning_rate": 8.461538461538461e-05, "loss": 0.0039, "step": 450 }, { "epoch": 2.358974358974359, "grad_norm": 0.02121666818857193, "learning_rate": 8.205128205128205e-05, "loss": 0.007, "step": 460 }, { "epoch": 2.41025641025641, "grad_norm": 0.022693779319524765, "learning_rate": 7.948717948717948e-05, "loss": 0.0045, "step": 470 }, { "epoch": 2.4615384615384617, "grad_norm": 0.8521128296852112, "learning_rate": 7.692307692307693e-05, "loss": 0.0044, "step": 480 }, { "epoch": 2.5128205128205128, "grad_norm": 0.01888178288936615, "learning_rate": 7.435897435897436e-05, "loss": 0.0117, "step": 490 }, { "epoch": 2.564102564102564, "grad_norm": 0.026412444189190865, "learning_rate": 7.17948717948718e-05, "loss": 0.0036, "step": 500 }, { "epoch": 2.564102564102564, "eval_accuracy": 0.9012345679012346, "eval_loss": 0.6121538877487183, "eval_runtime": 16.7144, "eval_samples_per_second": 48.461, "eval_steps_per_second": 6.103, "step": 500 }, { "epoch": 2.6153846153846154, "grad_norm": 0.0200443584471941, "learning_rate": 6.923076923076924e-05, "loss": 0.0037, "step": 510 }, { "epoch": 2.6666666666666665, "grad_norm": 0.021741095930337906, "learning_rate": 6.666666666666667e-05, "loss": 0.0162, "step": 520 }, { "epoch": 2.717948717948718, "grad_norm": 1.7510725259780884, "learning_rate": 6.410256410256412e-05, "loss": 0.0044, "step": 530 }, { "epoch": 2.769230769230769, "grad_norm": 0.018649321049451828, "learning_rate": 6.153846153846155e-05, "loss": 0.0034, "step": 540 }, { "epoch": 2.8205128205128203, "grad_norm": 0.018490077927708626, "learning_rate": 5.897435897435898e-05, "loss": 0.0032, "step": 550 }, { "epoch": 2.871794871794872, "grad_norm": 0.018017606809735298, "learning_rate": 5.6410256410256414e-05, "loss": 0.003, "step": 560 }, { "epoch": 2.9230769230769234, "grad_norm": 0.01674257032573223, "learning_rate": 5.384615384615385e-05, "loss": 0.0031, "step": 570 }, { "epoch": 2.9743589743589745, "grad_norm": 0.017541637644171715, "learning_rate": 5.128205128205128e-05, "loss": 0.0031, "step": 580 }, { "epoch": 3.0256410256410255, "grad_norm": 0.016599087044596672, "learning_rate": 4.871794871794872e-05, "loss": 0.0029, "step": 590 }, { "epoch": 3.076923076923077, "grad_norm": 0.017430290579795837, "learning_rate": 4.615384615384616e-05, "loss": 0.0029, "step": 600 }, { "epoch": 3.076923076923077, "eval_accuracy": 0.9024691358024691, "eval_loss": 0.6067318320274353, "eval_runtime": 16.2775, "eval_samples_per_second": 49.762, "eval_steps_per_second": 6.266, "step": 600 }, { "epoch": 3.128205128205128, "grad_norm": 0.016984164714813232, "learning_rate": 4.358974358974359e-05, "loss": 0.003, "step": 610 }, { "epoch": 3.1794871794871793, "grad_norm": 0.01852291077375412, "learning_rate": 4.1025641025641023e-05, "loss": 0.0028, "step": 620 }, { "epoch": 3.230769230769231, "grad_norm": 0.016155986115336418, "learning_rate": 3.846153846153846e-05, "loss": 0.0066, "step": 630 }, { "epoch": 3.282051282051282, "grad_norm": 0.015048054046928883, "learning_rate": 3.58974358974359e-05, "loss": 0.0027, "step": 640 }, { "epoch": 3.3333333333333335, "grad_norm": 0.014590720646083355, "learning_rate": 3.3333333333333335e-05, "loss": 0.0028, "step": 650 }, { "epoch": 3.3846153846153846, "grad_norm": 0.015942472964525223, "learning_rate": 3.0769230769230774e-05, "loss": 0.0027, "step": 660 }, { "epoch": 3.435897435897436, "grad_norm": 0.015167794190347195, "learning_rate": 2.8205128205128207e-05, "loss": 0.0031, "step": 670 }, { "epoch": 3.4871794871794872, "grad_norm": 0.015961118042469025, "learning_rate": 2.564102564102564e-05, "loss": 0.0027, "step": 680 }, { "epoch": 3.5384615384615383, "grad_norm": 0.01679452508687973, "learning_rate": 2.307692307692308e-05, "loss": 0.0029, "step": 690 }, { "epoch": 3.58974358974359, "grad_norm": 0.014717783778905869, "learning_rate": 2.0512820512820512e-05, "loss": 0.0027, "step": 700 }, { "epoch": 3.58974358974359, "eval_accuracy": 0.9024691358024691, "eval_loss": 0.644864022731781, "eval_runtime": 16.2975, "eval_samples_per_second": 49.701, "eval_steps_per_second": 6.259, "step": 700 }, { "epoch": 3.641025641025641, "grad_norm": 0.01574333943426609, "learning_rate": 1.794871794871795e-05, "loss": 0.0028, "step": 710 }, { "epoch": 3.6923076923076925, "grad_norm": 0.014611059799790382, "learning_rate": 1.5384615384615387e-05, "loss": 0.0029, "step": 720 }, { "epoch": 3.7435897435897436, "grad_norm": 0.014431651681661606, "learning_rate": 1.282051282051282e-05, "loss": 0.0026, "step": 730 }, { "epoch": 3.7948717948717947, "grad_norm": 0.015641087666153908, "learning_rate": 1.0256410256410256e-05, "loss": 0.0025, "step": 740 }, { "epoch": 3.8461538461538463, "grad_norm": 0.021887468174099922, "learning_rate": 7.692307692307694e-06, "loss": 0.0027, "step": 750 }, { "epoch": 3.8974358974358974, "grad_norm": 0.015822410583496094, "learning_rate": 5.128205128205128e-06, "loss": 0.0025, "step": 760 }, { "epoch": 3.948717948717949, "grad_norm": 0.015171283856034279, "learning_rate": 2.564102564102564e-06, "loss": 0.0026, "step": 770 }, { "epoch": 4.0, "grad_norm": 0.0153021439909935, "learning_rate": 0.0, "loss": 0.0028, "step": 780 }, { "epoch": 4.0, "step": 780, "total_flos": 9.636990200991498e+17, "train_loss": 0.06401206638472967, "train_runtime": 946.5564, "train_samples_per_second": 13.138, "train_steps_per_second": 0.824 } ], "logging_steps": 10, "max_steps": 780, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.636990200991498e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }