{ "best_metric": 1.0, "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset\\checkpoint-237", "epoch": 4.18974358974359, "eval_steps": 500, "global_step": 390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02564102564102564, "grad_norm": 3.9795846939086914, "learning_rate": 1.282051282051282e-05, "loss": 1.2941, "step": 10 }, { "epoch": 0.05128205128205128, "grad_norm": 4.371654510498047, "learning_rate": 2.564102564102564e-05, "loss": 0.9964, "step": 20 }, { "epoch": 0.07692307692307693, "grad_norm": 3.8371341228485107, "learning_rate": 3.846153846153846e-05, "loss": 0.6155, "step": 30 }, { "epoch": 0.10256410256410256, "grad_norm": 6.883916854858398, "learning_rate": 4.985754985754986e-05, "loss": 0.2122, "step": 40 }, { "epoch": 0.1282051282051282, "grad_norm": 0.4472762644290924, "learning_rate": 4.8433048433048433e-05, "loss": 0.0886, "step": 50 }, { "epoch": 0.15384615384615385, "grad_norm": 1.2847106456756592, "learning_rate": 4.700854700854701e-05, "loss": 0.152, "step": 60 }, { "epoch": 0.1794871794871795, "grad_norm": 2.8317058086395264, "learning_rate": 4.558404558404559e-05, "loss": 0.1648, "step": 70 }, { "epoch": 0.20256410256410257, "eval_accuracy": 0.965, "eval_loss": 0.08287694305181503, "eval_runtime": 46.4491, "eval_samples_per_second": 4.306, "eval_steps_per_second": 0.215, "step": 79 }, { "epoch": 1.0025641025641026, "grad_norm": 0.3571479618549347, "learning_rate": 4.415954415954416e-05, "loss": 0.0616, "step": 80 }, { "epoch": 1.028205128205128, "grad_norm": 0.1090891882777214, "learning_rate": 4.2735042735042735e-05, "loss": 0.0393, "step": 90 }, { "epoch": 1.0538461538461539, "grad_norm": 1.746414065361023, "learning_rate": 4.131054131054131e-05, "loss": 0.0735, "step": 100 }, { "epoch": 1.0794871794871794, "grad_norm": 1.1220694780349731, "learning_rate": 3.988603988603989e-05, "loss": 0.0842, "step": 110 }, { "epoch": 1.1051282051282052, "grad_norm": 0.10681042075157166, "learning_rate": 3.846153846153846e-05, "loss": 0.0238, "step": 120 }, { "epoch": 1.1307692307692307, "grad_norm": 2.7694716453552246, "learning_rate": 3.7037037037037037e-05, "loss": 0.089, "step": 130 }, { "epoch": 1.1564102564102563, "grad_norm": 1.5847387313842773, "learning_rate": 3.561253561253561e-05, "loss": 0.019, "step": 140 }, { "epoch": 1.182051282051282, "grad_norm": 5.06767463684082, "learning_rate": 3.418803418803419e-05, "loss": 0.0751, "step": 150 }, { "epoch": 1.2025641025641025, "eval_accuracy": 0.96, "eval_loss": 0.07137803733348846, "eval_runtime": 45.3454, "eval_samples_per_second": 4.411, "eval_steps_per_second": 0.221, "step": 158 }, { "epoch": 2.005128205128205, "grad_norm": 0.07452063262462616, "learning_rate": 3.2763532763532764e-05, "loss": 0.0373, "step": 160 }, { "epoch": 2.0307692307692307, "grad_norm": 0.04081596061587334, "learning_rate": 3.133903133903134e-05, "loss": 0.0629, "step": 170 }, { "epoch": 2.056410256410256, "grad_norm": 0.3062734305858612, "learning_rate": 2.9914529914529915e-05, "loss": 0.0289, "step": 180 }, { "epoch": 2.082051282051282, "grad_norm": 0.08387012779712677, "learning_rate": 2.8490028490028492e-05, "loss": 0.0287, "step": 190 }, { "epoch": 2.1076923076923078, "grad_norm": 0.26985999941825867, "learning_rate": 2.706552706552707e-05, "loss": 0.0052, "step": 200 }, { "epoch": 2.1333333333333333, "grad_norm": 0.11916792392730713, "learning_rate": 2.564102564102564e-05, "loss": 0.0571, "step": 210 }, { "epoch": 2.158974358974359, "grad_norm": 0.18876294791698456, "learning_rate": 2.4216524216524217e-05, "loss": 0.0347, "step": 220 }, { "epoch": 2.184615384615385, "grad_norm": 0.23059749603271484, "learning_rate": 2.2792022792022794e-05, "loss": 0.03, "step": 230 }, { "epoch": 2.2025641025641027, "eval_accuracy": 1.0, "eval_loss": 0.007003530394285917, "eval_runtime": 44.7765, "eval_samples_per_second": 4.467, "eval_steps_per_second": 0.223, "step": 237 }, { "epoch": 3.0076923076923077, "grad_norm": 0.06344188004732132, "learning_rate": 2.1367521367521368e-05, "loss": 0.005, "step": 240 }, { "epoch": 3.033333333333333, "grad_norm": 0.05598565936088562, "learning_rate": 1.9943019943019945e-05, "loss": 0.0032, "step": 250 }, { "epoch": 3.0589743589743588, "grad_norm": 0.05869239196181297, "learning_rate": 1.8518518518518518e-05, "loss": 0.0211, "step": 260 }, { "epoch": 3.0846153846153848, "grad_norm": 0.013849779032170773, "learning_rate": 1.7094017094017095e-05, "loss": 0.0016, "step": 270 }, { "epoch": 3.1102564102564103, "grad_norm": 0.019467826932668686, "learning_rate": 1.566951566951567e-05, "loss": 0.0018, "step": 280 }, { "epoch": 3.135897435897436, "grad_norm": 0.012761811725795269, "learning_rate": 1.4245014245014246e-05, "loss": 0.0015, "step": 290 }, { "epoch": 3.1615384615384614, "grad_norm": 0.026674192398786545, "learning_rate": 1.282051282051282e-05, "loss": 0.0098, "step": 300 }, { "epoch": 3.1871794871794874, "grad_norm": 0.06651380658149719, "learning_rate": 1.1396011396011397e-05, "loss": 0.0029, "step": 310 }, { "epoch": 3.2025641025641027, "eval_accuracy": 1.0, "eval_loss": 0.0017288248054683208, "eval_runtime": 45.3086, "eval_samples_per_second": 4.414, "eval_steps_per_second": 0.221, "step": 316 }, { "epoch": 4.01025641025641, "grad_norm": 0.013002458028495312, "learning_rate": 9.971509971509972e-06, "loss": 0.002, "step": 320 }, { "epoch": 4.035897435897436, "grad_norm": 0.014323906041681767, "learning_rate": 8.547008547008548e-06, "loss": 0.0014, "step": 330 }, { "epoch": 4.061538461538461, "grad_norm": 0.05807347595691681, "learning_rate": 7.122507122507123e-06, "loss": 0.0014, "step": 340 }, { "epoch": 4.087179487179487, "grad_norm": 0.013779590837657452, "learning_rate": 5.6980056980056985e-06, "loss": 0.0021, "step": 350 }, { "epoch": 4.112820512820512, "grad_norm": 0.048702094703912735, "learning_rate": 4.273504273504274e-06, "loss": 0.0013, "step": 360 }, { "epoch": 4.138461538461539, "grad_norm": 0.020250266417860985, "learning_rate": 2.8490028490028492e-06, "loss": 0.0013, "step": 370 }, { "epoch": 4.164102564102564, "grad_norm": 0.011229473166167736, "learning_rate": 1.4245014245014246e-06, "loss": 0.0012, "step": 380 }, { "epoch": 4.18974358974359, "grad_norm": 0.014126256108283997, "learning_rate": 0.0, "loss": 0.0012, "step": 390 }, { "epoch": 4.18974358974359, "eval_accuracy": 1.0, "eval_loss": 0.0015335038769990206, "eval_runtime": 45.755, "eval_samples_per_second": 4.371, "eval_steps_per_second": 0.219, "step": 390 }, { "epoch": 4.18974358974359, "step": 390, "total_flos": 9.66963368152793e+18, "train_loss": 0.11110094871180944, "train_runtime": 2152.4894, "train_samples_per_second": 3.624, "train_steps_per_second": 0.181 }, { "epoch": 4.18974358974359, "eval_accuracy": 1.0, "eval_loss": 0.005171784199774265, "eval_runtime": 49.0405, "eval_samples_per_second": 4.017, "eval_steps_per_second": 0.204, "step": 390 }, { "epoch": 4.18974358974359, "eval_accuracy": 1.0, "eval_loss": 0.005171784199774265, "eval_runtime": 47.2902, "eval_samples_per_second": 4.166, "eval_steps_per_second": 0.211, "step": 390 } ], "logging_steps": 10, "max_steps": 390, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.66963368152793e+18, "train_batch_size": 20, "trial_name": null, "trial_params": null }