|
{ |
|
"best_metric": 0.9916765755053508, |
|
"best_model_checkpoint": "/home/user/Desktop/ViT/MalImg/vit_finetuned/checkpoint-2271", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 3785, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06605019815059446, |
|
"grad_norm": 5.9336066246032715, |
|
"learning_rate": 1.9735799207397622e-05, |
|
"loss": 1.3733, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13210039630118892, |
|
"grad_norm": 4.252811908721924, |
|
"learning_rate": 1.9471598414795246e-05, |
|
"loss": 0.4773, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19815059445178335, |
|
"grad_norm": 1.3301128149032593, |
|
"learning_rate": 1.9207397622192866e-05, |
|
"loss": 0.2271, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26420079260237783, |
|
"grad_norm": 0.1664501279592514, |
|
"learning_rate": 1.894319682959049e-05, |
|
"loss": 0.133, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33025099075297226, |
|
"grad_norm": 13.456730842590332, |
|
"learning_rate": 1.8678996036988114e-05, |
|
"loss": 0.1293, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3963011889035667, |
|
"grad_norm": 0.6937832832336426, |
|
"learning_rate": 1.8414795244385734e-05, |
|
"loss": 0.0888, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4623513870541612, |
|
"grad_norm": 0.14598596096038818, |
|
"learning_rate": 1.8150594451783358e-05, |
|
"loss": 0.0692, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5284015852047557, |
|
"grad_norm": 0.15269458293914795, |
|
"learning_rate": 1.7886393659180978e-05, |
|
"loss": 0.0578, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5944517833553501, |
|
"grad_norm": 0.07667677849531174, |
|
"learning_rate": 1.76221928665786e-05, |
|
"loss": 0.0573, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6605019815059445, |
|
"grad_norm": 0.3432880640029907, |
|
"learning_rate": 1.7357992073976226e-05, |
|
"loss": 0.0464, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.726552179656539, |
|
"grad_norm": 6.246374130249023, |
|
"learning_rate": 1.7093791281373846e-05, |
|
"loss": 0.0523, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7926023778071334, |
|
"grad_norm": 0.09200052171945572, |
|
"learning_rate": 1.6829590488771467e-05, |
|
"loss": 0.071, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8586525759577279, |
|
"grad_norm": 0.8642477989196777, |
|
"learning_rate": 1.656538969616909e-05, |
|
"loss": 0.0382, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9247027741083224, |
|
"grad_norm": 0.06948922574520111, |
|
"learning_rate": 1.630118890356671e-05, |
|
"loss": 0.0275, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9907529722589168, |
|
"grad_norm": 0.15016400814056396, |
|
"learning_rate": 1.6036988110964335e-05, |
|
"loss": 0.0471, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9797859690844233, |
|
"eval_loss": 0.052647557109594345, |
|
"eval_runtime": 49.7302, |
|
"eval_samples_per_second": 16.911, |
|
"eval_steps_per_second": 4.243, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.0568031704095113, |
|
"grad_norm": 0.7207925319671631, |
|
"learning_rate": 1.5772787318361958e-05, |
|
"loss": 0.0358, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1228533685601056, |
|
"grad_norm": 0.022695371881127357, |
|
"learning_rate": 1.550858652575958e-05, |
|
"loss": 0.0142, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.1889035667107002, |
|
"grad_norm": 0.01373555138707161, |
|
"learning_rate": 1.52443857331572e-05, |
|
"loss": 0.0678, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2549537648612945, |
|
"grad_norm": 0.02284625917673111, |
|
"learning_rate": 1.4980184940554823e-05, |
|
"loss": 0.0174, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.321003963011889, |
|
"grad_norm": 0.03625660389661789, |
|
"learning_rate": 1.4715984147952445e-05, |
|
"loss": 0.0198, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3870541611624834, |
|
"grad_norm": 8.791983604431152, |
|
"learning_rate": 1.4451783355350067e-05, |
|
"loss": 0.0181, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.453104359313078, |
|
"grad_norm": 10.266106605529785, |
|
"learning_rate": 1.418758256274769e-05, |
|
"loss": 0.0166, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.5191545574636725, |
|
"grad_norm": 0.08618652075529099, |
|
"learning_rate": 1.3923381770145313e-05, |
|
"loss": 0.0216, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.5852047556142668, |
|
"grad_norm": 0.020607857033610344, |
|
"learning_rate": 1.3659180977542933e-05, |
|
"loss": 0.0166, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6512549537648613, |
|
"grad_norm": 0.008363209664821625, |
|
"learning_rate": 1.3394980184940555e-05, |
|
"loss": 0.011, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.7173051519154559, |
|
"grad_norm": 8.230094909667969, |
|
"learning_rate": 1.3130779392338177e-05, |
|
"loss": 0.0391, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.7833553500660502, |
|
"grad_norm": 0.2879900634288788, |
|
"learning_rate": 1.28665785997358e-05, |
|
"loss": 0.0281, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.8494055482166445, |
|
"grad_norm": 0.6117168068885803, |
|
"learning_rate": 1.2602377807133423e-05, |
|
"loss": 0.0275, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.9154557463672393, |
|
"grad_norm": 0.04467739537358284, |
|
"learning_rate": 1.2338177014531045e-05, |
|
"loss": 0.0301, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.9815059445178336, |
|
"grad_norm": 0.017908189445734024, |
|
"learning_rate": 1.2073976221928667e-05, |
|
"loss": 0.0178, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9881093935790726, |
|
"eval_loss": 0.03636582940816879, |
|
"eval_runtime": 47.4743, |
|
"eval_samples_per_second": 17.715, |
|
"eval_steps_per_second": 4.445, |
|
"step": 1514 |
|
}, |
|
{ |
|
"epoch": 2.047556142668428, |
|
"grad_norm": 0.036849573254585266, |
|
"learning_rate": 1.180977542932629e-05, |
|
"loss": 0.0187, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.1136063408190227, |
|
"grad_norm": 0.04460464045405388, |
|
"learning_rate": 1.1545574636723912e-05, |
|
"loss": 0.0067, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.179656538969617, |
|
"grad_norm": 0.009667308069765568, |
|
"learning_rate": 1.1281373844121532e-05, |
|
"loss": 0.0238, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.2457067371202113, |
|
"grad_norm": 0.03686352074146271, |
|
"learning_rate": 1.1017173051519154e-05, |
|
"loss": 0.0022, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.3117569352708056, |
|
"grad_norm": 0.031481340527534485, |
|
"learning_rate": 1.0752972258916778e-05, |
|
"loss": 0.0026, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.3778071334214004, |
|
"grad_norm": 0.011794793419539928, |
|
"learning_rate": 1.04887714663144e-05, |
|
"loss": 0.0116, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.4438573315719947, |
|
"grad_norm": 0.010463342070579529, |
|
"learning_rate": 1.0224570673712022e-05, |
|
"loss": 0.001, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.509907529722589, |
|
"grad_norm": 0.015383273363113403, |
|
"learning_rate": 9.960369881109644e-06, |
|
"loss": 0.0228, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.5759577278731838, |
|
"grad_norm": 0.013397900387644768, |
|
"learning_rate": 9.696169088507266e-06, |
|
"loss": 0.0089, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.642007926023778, |
|
"grad_norm": 0.00911177322268486, |
|
"learning_rate": 9.431968295904888e-06, |
|
"loss": 0.02, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.7080581241743724, |
|
"grad_norm": 0.02440851554274559, |
|
"learning_rate": 9.16776750330251e-06, |
|
"loss": 0.0013, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.7741083223249667, |
|
"grad_norm": 0.009484563954174519, |
|
"learning_rate": 8.903566710700134e-06, |
|
"loss": 0.0194, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.8401585204755615, |
|
"grad_norm": 0.006213477812707424, |
|
"learning_rate": 8.639365918097754e-06, |
|
"loss": 0.0029, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.906208718626156, |
|
"grad_norm": 0.0294838547706604, |
|
"learning_rate": 8.375165125495377e-06, |
|
"loss": 0.0155, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.9722589167767506, |
|
"grad_norm": 0.004683547653257847, |
|
"learning_rate": 8.110964332893e-06, |
|
"loss": 0.0012, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9916765755053508, |
|
"eval_loss": 0.03940876945853233, |
|
"eval_runtime": 46.7804, |
|
"eval_samples_per_second": 17.978, |
|
"eval_steps_per_second": 4.51, |
|
"step": 2271 |
|
}, |
|
{ |
|
"epoch": 3.038309114927345, |
|
"grad_norm": 0.012384450994431973, |
|
"learning_rate": 7.846763540290622e-06, |
|
"loss": 0.0023, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.104359313077939, |
|
"grad_norm": 0.01143250335007906, |
|
"learning_rate": 7.582562747688244e-06, |
|
"loss": 0.0016, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.1704095112285335, |
|
"grad_norm": 0.011226821690797806, |
|
"learning_rate": 7.318361955085866e-06, |
|
"loss": 0.0015, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.2364597093791283, |
|
"grad_norm": 0.1874089390039444, |
|
"learning_rate": 7.054161162483489e-06, |
|
"loss": 0.0048, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.3025099075297226, |
|
"grad_norm": 0.005090535152703524, |
|
"learning_rate": 6.78996036988111e-06, |
|
"loss": 0.0006, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.368560105680317, |
|
"grad_norm": 5.946714401245117, |
|
"learning_rate": 6.525759577278732e-06, |
|
"loss": 0.0039, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.4346103038309117, |
|
"grad_norm": 0.015056404285132885, |
|
"learning_rate": 6.261558784676355e-06, |
|
"loss": 0.0039, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.500660501981506, |
|
"grad_norm": 0.09337496757507324, |
|
"learning_rate": 5.997357992073977e-06, |
|
"loss": 0.0008, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.5667107001321003, |
|
"grad_norm": 0.02832830883562565, |
|
"learning_rate": 5.733157199471598e-06, |
|
"loss": 0.0017, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.6327608982826947, |
|
"grad_norm": 0.003939814865589142, |
|
"learning_rate": 5.468956406869221e-06, |
|
"loss": 0.0006, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.6988110964332894, |
|
"grad_norm": 0.009257273748517036, |
|
"learning_rate": 5.204755614266843e-06, |
|
"loss": 0.01, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.7648612945838837, |
|
"grad_norm": 0.03735257685184479, |
|
"learning_rate": 4.940554821664465e-06, |
|
"loss": 0.0007, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.830911492734478, |
|
"grad_norm": 0.013488363474607468, |
|
"learning_rate": 4.676354029062087e-06, |
|
"loss": 0.0064, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.896961690885073, |
|
"grad_norm": 0.03222118690609932, |
|
"learning_rate": 4.412153236459709e-06, |
|
"loss": 0.0024, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 3.963011889035667, |
|
"grad_norm": 0.007759902160614729, |
|
"learning_rate": 4.147952443857332e-06, |
|
"loss": 0.0007, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9916765755053508, |
|
"eval_loss": 0.030070677399635315, |
|
"eval_runtime": 47.0985, |
|
"eval_samples_per_second": 17.856, |
|
"eval_steps_per_second": 4.48, |
|
"step": 3028 |
|
}, |
|
{ |
|
"epoch": 4.0290620871862615, |
|
"grad_norm": 0.005042864475399256, |
|
"learning_rate": 3.8837516512549536e-06, |
|
"loss": 0.0004, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 4.095112285336856, |
|
"grad_norm": 0.020858343690633774, |
|
"learning_rate": 3.619550858652576e-06, |
|
"loss": 0.0004, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.16116248348745, |
|
"grad_norm": 0.006172764115035534, |
|
"learning_rate": 3.3553500660501986e-06, |
|
"loss": 0.0007, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 4.227212681638045, |
|
"grad_norm": 0.0033390983007848263, |
|
"learning_rate": 3.0911492734478207e-06, |
|
"loss": 0.0009, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.29326287978864, |
|
"grad_norm": 0.015372613444924355, |
|
"learning_rate": 2.8269484808454427e-06, |
|
"loss": 0.0006, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.359313077939234, |
|
"grad_norm": 0.006469820160418749, |
|
"learning_rate": 2.5627476882430652e-06, |
|
"loss": 0.0004, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.425363276089828, |
|
"grad_norm": 0.011778703890740871, |
|
"learning_rate": 2.298546895640687e-06, |
|
"loss": 0.0004, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 4.491413474240423, |
|
"grad_norm": 0.006380158942192793, |
|
"learning_rate": 2.0343461030383094e-06, |
|
"loss": 0.0004, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.557463672391017, |
|
"grad_norm": 0.0013477399479597807, |
|
"learning_rate": 1.7701453104359315e-06, |
|
"loss": 0.0004, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 4.623513870541611, |
|
"grad_norm": 0.006167972926050425, |
|
"learning_rate": 1.5059445178335536e-06, |
|
"loss": 0.0042, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.689564068692206, |
|
"grad_norm": 0.013963188044726849, |
|
"learning_rate": 1.2417437252311758e-06, |
|
"loss": 0.0008, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 4.755614266842801, |
|
"grad_norm": 0.027660081163048744, |
|
"learning_rate": 9.77542932628798e-07, |
|
"loss": 0.0004, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.821664464993395, |
|
"grad_norm": 0.004752615932375193, |
|
"learning_rate": 7.133421400264201e-07, |
|
"loss": 0.0004, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 4.887714663143989, |
|
"grad_norm": 0.009864550083875656, |
|
"learning_rate": 4.4914134742404235e-07, |
|
"loss": 0.0004, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.953764861294584, |
|
"grad_norm": 0.020459244027733803, |
|
"learning_rate": 1.849405548216645e-07, |
|
"loss": 0.0003, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9892984542211652, |
|
"eval_loss": 0.035547275096178055, |
|
"eval_runtime": 47.2255, |
|
"eval_samples_per_second": 17.808, |
|
"eval_steps_per_second": 4.468, |
|
"step": 3785 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 3785, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.9313511964758426e+18, |
|
"train_batch_size": 10, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|