{ "best_metric": 0.25752508361204013, "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-465", "epoch": 15.03125, "eval_steps": 500, "global_step": 480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020833333333333332, "grad_norm": 4.865664958953857, "learning_rate": 1.0416666666666668e-05, "loss": 4.2486, "step": 10 }, { "epoch": 0.041666666666666664, "grad_norm": 5.374879837036133, "learning_rate": 2.0833333333333336e-05, "loss": 4.2806, "step": 20 }, { "epoch": 0.0625, "grad_norm": 4.254001140594482, "learning_rate": 3.125e-05, "loss": 4.2682, "step": 30 }, { "epoch": 0.06458333333333334, "eval_accuracy": 0.0033444816053511705, "eval_loss": 4.24931526184082, "eval_runtime": 156.0528, "eval_samples_per_second": 1.916, "eval_steps_per_second": 0.064, "step": 31 }, { "epoch": 1.01875, "grad_norm": 3.580606698989868, "learning_rate": 4.166666666666667e-05, "loss": 4.249, "step": 40 }, { "epoch": 1.0395833333333333, "grad_norm": 3.6750593185424805, "learning_rate": 4.976851851851852e-05, "loss": 4.2497, "step": 50 }, { "epoch": 1.0604166666666666, "grad_norm": 3.427530527114868, "learning_rate": 4.8611111111111115e-05, "loss": 4.2584, "step": 60 }, { "epoch": 1.0645833333333334, "eval_accuracy": 0.013377926421404682, "eval_loss": 4.243381023406982, "eval_runtime": 184.9, "eval_samples_per_second": 1.617, "eval_steps_per_second": 0.054, "step": 62 }, { "epoch": 2.0166666666666666, "grad_norm": 3.5070836544036865, "learning_rate": 4.745370370370371e-05, "loss": 4.221, "step": 70 }, { "epoch": 2.0375, "grad_norm": 3.814365863800049, "learning_rate": 4.62962962962963e-05, "loss": 4.249, "step": 80 }, { "epoch": 2.058333333333333, "grad_norm": 3.279081344604492, "learning_rate": 4.5138888888888894e-05, "loss": 4.2518, "step": 90 }, { "epoch": 2.064583333333333, "eval_accuracy": 0.016722408026755852, "eval_loss": 4.224606990814209, "eval_runtime": 182.615, "eval_samples_per_second": 1.637, "eval_steps_per_second": 0.055, "step": 93 }, { "epoch": 3.0145833333333334, "grad_norm": 2.9108073711395264, "learning_rate": 4.3981481481481486e-05, "loss": 4.2168, "step": 100 }, { "epoch": 3.035416666666667, "grad_norm": 3.1853749752044678, "learning_rate": 4.282407407407408e-05, "loss": 4.2103, "step": 110 }, { "epoch": 3.05625, "grad_norm": 3.1384546756744385, "learning_rate": 4.166666666666667e-05, "loss": 4.2445, "step": 120 }, { "epoch": 3.064583333333333, "eval_accuracy": 0.006688963210702341, "eval_loss": 4.220835208892822, "eval_runtime": 172.1522, "eval_samples_per_second": 1.737, "eval_steps_per_second": 0.058, "step": 124 }, { "epoch": 4.0125, "grad_norm": 2.7196435928344727, "learning_rate": 4.0509259259259265e-05, "loss": 4.2159, "step": 130 }, { "epoch": 4.033333333333333, "grad_norm": 2.6634223461151123, "learning_rate": 3.935185185185186e-05, "loss": 4.2063, "step": 140 }, { "epoch": 4.054166666666666, "grad_norm": 2.5531411170959473, "learning_rate": 3.8194444444444444e-05, "loss": 4.2272, "step": 150 }, { "epoch": 4.064583333333333, "eval_accuracy": 0.010033444816053512, "eval_loss": 4.223015308380127, "eval_runtime": 163.7094, "eval_samples_per_second": 1.826, "eval_steps_per_second": 0.061, "step": 155 }, { "epoch": 5.010416666666667, "grad_norm": 2.824289321899414, "learning_rate": 3.7037037037037037e-05, "loss": 4.2151, "step": 160 }, { "epoch": 5.03125, "grad_norm": 2.8366966247558594, "learning_rate": 3.587962962962963e-05, "loss": 4.183, "step": 170 }, { "epoch": 5.052083333333333, "grad_norm": 3.340677499771118, "learning_rate": 3.472222222222222e-05, "loss": 4.205, "step": 180 }, { "epoch": 5.064583333333333, "eval_accuracy": 0.023411371237458192, "eval_loss": 4.211067199707031, "eval_runtime": 166.937, "eval_samples_per_second": 1.791, "eval_steps_per_second": 0.06, "step": 186 }, { "epoch": 6.008333333333334, "grad_norm": 2.784593105316162, "learning_rate": 3.3564814814814815e-05, "loss": 4.1898, "step": 190 }, { "epoch": 6.029166666666667, "grad_norm": 3.389150381088257, "learning_rate": 3.240740740740741e-05, "loss": 4.1386, "step": 200 }, { "epoch": 6.05, "grad_norm": 3.5054867267608643, "learning_rate": 3.125e-05, "loss": 4.1238, "step": 210 }, { "epoch": 6.064583333333333, "eval_accuracy": 0.03678929765886288, "eval_loss": 4.111179828643799, "eval_runtime": 161.8854, "eval_samples_per_second": 1.847, "eval_steps_per_second": 0.062, "step": 217 }, { "epoch": 7.00625, "grad_norm": 4.150498867034912, "learning_rate": 3.0092592592592593e-05, "loss": 4.0898, "step": 220 }, { "epoch": 7.027083333333334, "grad_norm": 4.683104038238525, "learning_rate": 2.8935185185185186e-05, "loss": 4.0073, "step": 230 }, { "epoch": 7.047916666666667, "grad_norm": 4.379587650299072, "learning_rate": 2.777777777777778e-05, "loss": 3.9136, "step": 240 }, { "epoch": 7.064583333333333, "eval_accuracy": 0.07357859531772576, "eval_loss": 3.8529512882232666, "eval_runtime": 164.2718, "eval_samples_per_second": 1.82, "eval_steps_per_second": 0.061, "step": 248 }, { "epoch": 8.004166666666666, "grad_norm": 5.4900078773498535, "learning_rate": 2.6620370370370372e-05, "loss": 3.8304, "step": 250 }, { "epoch": 8.025, "grad_norm": 4.974089622497559, "learning_rate": 2.5462962962962965e-05, "loss": 3.7169, "step": 260 }, { "epoch": 8.045833333333333, "grad_norm": 5.386253833770752, "learning_rate": 2.4305555555555558e-05, "loss": 3.6241, "step": 270 }, { "epoch": 8.064583333333333, "eval_accuracy": 0.11705685618729098, "eval_loss": 3.6734354496002197, "eval_runtime": 159.295, "eval_samples_per_second": 1.877, "eval_steps_per_second": 0.063, "step": 279 }, { "epoch": 9.002083333333333, "grad_norm": 5.429062843322754, "learning_rate": 2.314814814814815e-05, "loss": 3.4885, "step": 280 }, { "epoch": 9.022916666666667, "grad_norm": 5.275113105773926, "learning_rate": 2.1990740740740743e-05, "loss": 3.4022, "step": 290 }, { "epoch": 9.04375, "grad_norm": 6.2369704246521, "learning_rate": 2.0833333333333336e-05, "loss": 3.2977, "step": 300 }, { "epoch": 9.064583333333333, "grad_norm": 13.635384559631348, "learning_rate": 1.967592592592593e-05, "loss": 3.3103, "step": 310 }, { "epoch": 9.064583333333333, "eval_accuracy": 0.10702341137123746, "eval_loss": 3.5260610580444336, "eval_runtime": 170.757, "eval_samples_per_second": 1.751, "eval_steps_per_second": 0.059, "step": 310 }, { "epoch": 10.020833333333334, "grad_norm": 6.099160194396973, "learning_rate": 1.8518518518518518e-05, "loss": 3.1017, "step": 320 }, { "epoch": 10.041666666666666, "grad_norm": 8.271078109741211, "learning_rate": 1.736111111111111e-05, "loss": 3.0604, "step": 330 }, { "epoch": 10.0625, "grad_norm": 6.345729827880859, "learning_rate": 1.6203703703703704e-05, "loss": 3.0981, "step": 340 }, { "epoch": 10.064583333333333, "eval_accuracy": 0.16387959866220736, "eval_loss": 3.3859572410583496, "eval_runtime": 165.4502, "eval_samples_per_second": 1.807, "eval_steps_per_second": 0.06, "step": 341 }, { "epoch": 11.01875, "grad_norm": 6.985031604766846, "learning_rate": 1.5046296296296297e-05, "loss": 2.9706, "step": 350 }, { "epoch": 11.039583333333333, "grad_norm": 6.81059455871582, "learning_rate": 1.388888888888889e-05, "loss": 2.916, "step": 360 }, { "epoch": 11.060416666666667, "grad_norm": 6.5202436447143555, "learning_rate": 1.2731481481481482e-05, "loss": 2.8216, "step": 370 }, { "epoch": 11.064583333333333, "eval_accuracy": 0.2140468227424749, "eval_loss": 3.179076671600342, "eval_runtime": 163.8066, "eval_samples_per_second": 1.825, "eval_steps_per_second": 0.061, "step": 372 }, { "epoch": 12.016666666666667, "grad_norm": 5.686159610748291, "learning_rate": 1.1574074074074075e-05, "loss": 2.7755, "step": 380 }, { "epoch": 12.0375, "grad_norm": 6.596237659454346, "learning_rate": 1.0416666666666668e-05, "loss": 2.824, "step": 390 }, { "epoch": 12.058333333333334, "grad_norm": 7.551661968231201, "learning_rate": 9.259259259259259e-06, "loss": 2.6108, "step": 400 }, { "epoch": 12.064583333333333, "eval_accuracy": 0.24414715719063546, "eval_loss": 3.1618316173553467, "eval_runtime": 177.1262, "eval_samples_per_second": 1.688, "eval_steps_per_second": 0.056, "step": 403 }, { "epoch": 13.014583333333333, "grad_norm": 6.725275039672852, "learning_rate": 8.101851851851852e-06, "loss": 2.7309, "step": 410 }, { "epoch": 13.035416666666666, "grad_norm": 7.1535563468933105, "learning_rate": 6.944444444444445e-06, "loss": 2.6451, "step": 420 }, { "epoch": 13.05625, "grad_norm": 7.705063819885254, "learning_rate": 5.787037037037038e-06, "loss": 2.598, "step": 430 }, { "epoch": 13.064583333333333, "eval_accuracy": 0.23411371237458195, "eval_loss": 3.0792758464813232, "eval_runtime": 186.6022, "eval_samples_per_second": 1.602, "eval_steps_per_second": 0.054, "step": 434 }, { "epoch": 14.0125, "grad_norm": 5.5132927894592285, "learning_rate": 4.6296296296296296e-06, "loss": 2.5532, "step": 440 }, { "epoch": 14.033333333333333, "grad_norm": 6.853929042816162, "learning_rate": 3.4722222222222224e-06, "loss": 2.56, "step": 450 }, { "epoch": 14.054166666666667, "grad_norm": 6.731771469116211, "learning_rate": 2.3148148148148148e-06, "loss": 2.5023, "step": 460 }, { "epoch": 14.064583333333333, "eval_accuracy": 0.25752508361204013, "eval_loss": 3.019373893737793, "eval_runtime": 155.2839, "eval_samples_per_second": 1.926, "eval_steps_per_second": 0.064, "step": 465 }, { "epoch": 15.010416666666666, "grad_norm": 6.570545673370361, "learning_rate": 1.1574074074074074e-06, "loss": 2.5538, "step": 470 }, { "epoch": 15.03125, "grad_norm": 6.480160236358643, "learning_rate": 0.0, "loss": 2.513, "step": 480 }, { "epoch": 15.03125, "eval_accuracy": 0.23745819397993312, "eval_loss": 3.0668206214904785, "eval_runtime": 187.3659, "eval_samples_per_second": 1.596, "eval_steps_per_second": 0.053, "step": 480 }, { "epoch": 15.03125, "step": 480, "total_flos": 1.8664399999458017e+19, "train_loss": 3.5868410070737204, "train_runtime": 12231.8484, "train_samples_per_second": 1.256, "train_steps_per_second": 0.039 }, { "epoch": 15.03125, "eval_accuracy": 0.25752508361204013, "eval_loss": 3.0240976810455322, "eval_runtime": 158.7962, "eval_samples_per_second": 1.883, "eval_steps_per_second": 0.063, "step": 480 }, { "epoch": 15.03125, "eval_accuracy": 0.25752508361204013, "eval_loss": 3.0235204696655273, "eval_runtime": 157.8403, "eval_samples_per_second": 1.894, "eval_steps_per_second": 0.063, "step": 480 } ], "logging_steps": 10, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8664399999458017e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }