|
{ |
|
"best_metric": 0.9751908396946565, |
|
"best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-lora-medmnistv2/checkpoint-221", |
|
"epoch": 9.898305084745763, |
|
"eval_steps": 500, |
|
"global_step": 730, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13559322033898305, |
|
"grad_norm": 0.5722031593322754, |
|
"learning_rate": 0.004931506849315068, |
|
"loss": 0.4736, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2711864406779661, |
|
"grad_norm": 2.089559316635132, |
|
"learning_rate": 0.0048630136986301375, |
|
"loss": 0.4462, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4067796610169492, |
|
"grad_norm": 3.0388331413269043, |
|
"learning_rate": 0.004794520547945206, |
|
"loss": 0.2969, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5423728813559322, |
|
"grad_norm": 0.4622490406036377, |
|
"learning_rate": 0.004726027397260274, |
|
"loss": 0.309, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"grad_norm": 1.2164361476898193, |
|
"learning_rate": 0.004657534246575342, |
|
"loss": 0.2817, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8135593220338984, |
|
"grad_norm": 0.30212166905403137, |
|
"learning_rate": 0.004589041095890411, |
|
"loss": 0.24, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9491525423728814, |
|
"grad_norm": 0.9486951231956482, |
|
"learning_rate": 0.00452054794520548, |
|
"loss": 0.2447, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9898305084745763, |
|
"eval_accuracy": 0.9561068702290076, |
|
"eval_f1": 0.9445770389275943, |
|
"eval_loss": 0.11798465996980667, |
|
"eval_precision": 0.9313190730837789, |
|
"eval_recall": 0.9607635913548509, |
|
"eval_runtime": 2.3492, |
|
"eval_samples_per_second": 223.055, |
|
"eval_steps_per_second": 14.047, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.0847457627118644, |
|
"grad_norm": 1.1987316608428955, |
|
"learning_rate": 0.004452054794520548, |
|
"loss": 0.2017, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2203389830508475, |
|
"grad_norm": 0.6783656477928162, |
|
"learning_rate": 0.004383561643835616, |
|
"loss": 0.2214, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.3559322033898304, |
|
"grad_norm": 1.073630452156067, |
|
"learning_rate": 0.004315068493150685, |
|
"loss": 0.1989, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.4915254237288136, |
|
"grad_norm": 0.3175281584262848, |
|
"learning_rate": 0.0042465753424657535, |
|
"loss": 0.1857, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.6271186440677967, |
|
"grad_norm": 0.6710169911384583, |
|
"learning_rate": 0.004178082191780822, |
|
"loss": 0.2305, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.7627118644067796, |
|
"grad_norm": 0.8543218374252319, |
|
"learning_rate": 0.00410958904109589, |
|
"loss": 0.2111, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.8983050847457628, |
|
"grad_norm": 0.6326661109924316, |
|
"learning_rate": 0.004041095890410959, |
|
"loss": 0.2136, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.993220338983051, |
|
"eval_accuracy": 0.9637404580152672, |
|
"eval_f1": 0.9529434006853362, |
|
"eval_loss": 0.10151813924312592, |
|
"eval_precision": 0.9497822332357138, |
|
"eval_recall": 0.9562315528896506, |
|
"eval_runtime": 2.4266, |
|
"eval_samples_per_second": 215.944, |
|
"eval_steps_per_second": 13.6, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.0338983050847457, |
|
"grad_norm": 0.40849238634109497, |
|
"learning_rate": 0.003972602739726027, |
|
"loss": 0.2148, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.169491525423729, |
|
"grad_norm": 1.0935662984848022, |
|
"learning_rate": 0.003904109589041096, |
|
"loss": 0.2106, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.305084745762712, |
|
"grad_norm": 0.6485463380813599, |
|
"learning_rate": 0.0038356164383561643, |
|
"loss": 0.2205, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.440677966101695, |
|
"grad_norm": 0.5151948928833008, |
|
"learning_rate": 0.003767123287671233, |
|
"loss": 0.2018, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.576271186440678, |
|
"grad_norm": 0.5166621804237366, |
|
"learning_rate": 0.0036986301369863013, |
|
"loss": 0.1827, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.711864406779661, |
|
"grad_norm": 0.8355435729026794, |
|
"learning_rate": 0.00363013698630137, |
|
"loss": 0.2171, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.847457627118644, |
|
"grad_norm": 0.558962881565094, |
|
"learning_rate": 0.003561643835616438, |
|
"loss": 0.1845, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.983050847457627, |
|
"grad_norm": 0.6180588603019714, |
|
"learning_rate": 0.003493150684931507, |
|
"loss": 0.1431, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.9966101694915253, |
|
"eval_accuracy": 0.9751908396946565, |
|
"eval_f1": 0.9671702932600785, |
|
"eval_loss": 0.07291658967733383, |
|
"eval_precision": 0.9731940648184303, |
|
"eval_recall": 0.9615252784918595, |
|
"eval_runtime": 2.3109, |
|
"eval_samples_per_second": 226.748, |
|
"eval_steps_per_second": 14.28, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 3.1186440677966103, |
|
"grad_norm": 0.4949386715888977, |
|
"learning_rate": 0.003424657534246575, |
|
"loss": 0.2148, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.2542372881355934, |
|
"grad_norm": 0.5671024918556213, |
|
"learning_rate": 0.003356164383561644, |
|
"loss": 0.172, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.389830508474576, |
|
"grad_norm": 0.515737771987915, |
|
"learning_rate": 0.003287671232876712, |
|
"loss": 0.1962, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.5254237288135593, |
|
"grad_norm": 0.42013198137283325, |
|
"learning_rate": 0.0032191780821917808, |
|
"loss": 0.1571, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.6610169491525424, |
|
"grad_norm": 0.41014379262924194, |
|
"learning_rate": 0.003150684931506849, |
|
"loss": 0.1905, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.7966101694915255, |
|
"grad_norm": 0.48967182636260986, |
|
"learning_rate": 0.003082191780821918, |
|
"loss": 0.1344, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.9322033898305087, |
|
"grad_norm": 0.3962344229221344, |
|
"learning_rate": 0.0030136986301369864, |
|
"loss": 0.1576, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9637404580152672, |
|
"eval_f1": 0.9531647653769258, |
|
"eval_loss": 0.08732008934020996, |
|
"eval_precision": 0.9480282738095238, |
|
"eval_recall": 0.9586499095496526, |
|
"eval_runtime": 2.3116, |
|
"eval_samples_per_second": 226.687, |
|
"eval_steps_per_second": 14.276, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 4.067796610169491, |
|
"grad_norm": 0.28995221853256226, |
|
"learning_rate": 0.002945205479452055, |
|
"loss": 0.1319, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.203389830508475, |
|
"grad_norm": 0.5315499901771545, |
|
"learning_rate": 0.0028767123287671234, |
|
"loss": 0.2781, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.338983050847458, |
|
"grad_norm": 0.24718379974365234, |
|
"learning_rate": 0.002808219178082192, |
|
"loss": 0.1896, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.47457627118644, |
|
"grad_norm": 0.404805988073349, |
|
"learning_rate": 0.0027397260273972603, |
|
"loss": 0.1829, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.610169491525424, |
|
"grad_norm": 0.5308801531791687, |
|
"learning_rate": 0.002671232876712329, |
|
"loss": 0.1665, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.745762711864407, |
|
"grad_norm": 0.7429054975509644, |
|
"learning_rate": 0.0026027397260273972, |
|
"loss": 0.1646, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.88135593220339, |
|
"grad_norm": 0.6361825466156006, |
|
"learning_rate": 0.002534246575342466, |
|
"loss": 0.2072, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.989830508474577, |
|
"eval_accuracy": 0.9713740458015268, |
|
"eval_f1": 0.9626722135947228, |
|
"eval_loss": 0.076077401638031, |
|
"eval_precision": 0.9615676167374165, |
|
"eval_recall": 0.9637912977244597, |
|
"eval_runtime": 2.3751, |
|
"eval_samples_per_second": 220.624, |
|
"eval_steps_per_second": 13.894, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 5.016949152542373, |
|
"grad_norm": 0.6114002466201782, |
|
"learning_rate": 0.002465753424657534, |
|
"loss": 0.1943, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.1525423728813555, |
|
"grad_norm": 0.32443058490753174, |
|
"learning_rate": 0.002397260273972603, |
|
"loss": 0.1541, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.288135593220339, |
|
"grad_norm": 0.44808831810951233, |
|
"learning_rate": 0.002328767123287671, |
|
"loss": 0.1397, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.423728813559322, |
|
"grad_norm": 0.478524386882782, |
|
"learning_rate": 0.00226027397260274, |
|
"loss": 0.1647, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.559322033898305, |
|
"grad_norm": 0.8286917805671692, |
|
"learning_rate": 0.002191780821917808, |
|
"loss": 0.1532, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.694915254237288, |
|
"grad_norm": 0.7862728238105774, |
|
"learning_rate": 0.0021232876712328768, |
|
"loss": 0.1474, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.830508474576272, |
|
"grad_norm": 1.1601946353912354, |
|
"learning_rate": 0.002054794520547945, |
|
"loss": 0.2347, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 5.966101694915254, |
|
"grad_norm": 1.3851842880249023, |
|
"learning_rate": 0.0019863013698630137, |
|
"loss": 0.1908, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 5.9932203389830505, |
|
"eval_accuracy": 0.9599236641221374, |
|
"eval_f1": 0.9496169994551458, |
|
"eval_loss": 0.10439441353082657, |
|
"eval_precision": 0.9348118279569892, |
|
"eval_recall": 0.9681709987622584, |
|
"eval_runtime": 2.3199, |
|
"eval_samples_per_second": 225.87, |
|
"eval_steps_per_second": 14.225, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 6.101694915254237, |
|
"grad_norm": 0.6037909388542175, |
|
"learning_rate": 0.0019178082191780822, |
|
"loss": 0.1326, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.237288135593221, |
|
"grad_norm": 0.6797343492507935, |
|
"learning_rate": 0.0018493150684931506, |
|
"loss": 0.1909, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.372881355932203, |
|
"grad_norm": 0.5503271222114563, |
|
"learning_rate": 0.0017876712328767123, |
|
"loss": 0.1328, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.508474576271187, |
|
"grad_norm": 0.46364396810531616, |
|
"learning_rate": 0.0017191780821917808, |
|
"loss": 0.1586, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.6440677966101696, |
|
"grad_norm": 0.7383654713630676, |
|
"learning_rate": 0.0016506849315068492, |
|
"loss": 0.1393, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.779661016949152, |
|
"grad_norm": 0.7454331517219543, |
|
"learning_rate": 0.0015821917808219177, |
|
"loss": 0.163, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.915254237288136, |
|
"grad_norm": 0.7943591475486755, |
|
"learning_rate": 0.0015136986301369862, |
|
"loss": 0.1637, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 6.996610169491525, |
|
"eval_accuracy": 0.9675572519083969, |
|
"eval_f1": 0.9582894361020196, |
|
"eval_loss": 0.07422558218240738, |
|
"eval_precision": 0.9512019762554385, |
|
"eval_recall": 0.9660573169570599, |
|
"eval_runtime": 2.67, |
|
"eval_samples_per_second": 196.256, |
|
"eval_steps_per_second": 12.36, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 7.0508474576271185, |
|
"grad_norm": 0.5350046753883362, |
|
"learning_rate": 0.0014452054794520546, |
|
"loss": 0.1415, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.186440677966102, |
|
"grad_norm": 0.9918954372406006, |
|
"learning_rate": 0.001376712328767123, |
|
"loss": 0.1548, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.322033898305085, |
|
"grad_norm": 2.7371268272399902, |
|
"learning_rate": 0.0013082191780821918, |
|
"loss": 0.1583, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.4576271186440675, |
|
"grad_norm": 1.9455125331878662, |
|
"learning_rate": 0.0012397260273972603, |
|
"loss": 0.1399, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.593220338983051, |
|
"grad_norm": 1.1810232400894165, |
|
"learning_rate": 0.0011712328767123287, |
|
"loss": 0.1535, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 7.728813559322034, |
|
"grad_norm": 0.5082597136497498, |
|
"learning_rate": 0.0011027397260273974, |
|
"loss": 0.1395, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 7.864406779661017, |
|
"grad_norm": 0.435004860162735, |
|
"learning_rate": 0.001034246575342466, |
|
"loss": 0.1443, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.4963846504688263, |
|
"learning_rate": 0.0009657534246575344, |
|
"loss": 0.1385, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9312977099236641, |
|
"eval_f1": 0.9169177104804185, |
|
"eval_loss": 0.18427740037441254, |
|
"eval_precision": 0.8947368421052632, |
|
"eval_recall": 0.9537275064267352, |
|
"eval_runtime": 2.338, |
|
"eval_samples_per_second": 224.121, |
|
"eval_steps_per_second": 14.115, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.135593220338983, |
|
"grad_norm": 0.5249541997909546, |
|
"learning_rate": 0.0008972602739726028, |
|
"loss": 0.1574, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.271186440677965, |
|
"grad_norm": 0.4605633020401001, |
|
"learning_rate": 0.0008287671232876713, |
|
"loss": 0.1543, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.40677966101695, |
|
"grad_norm": 0.3483382761478424, |
|
"learning_rate": 0.0007602739726027398, |
|
"loss": 0.131, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 8.542372881355933, |
|
"grad_norm": 0.21878492832183838, |
|
"learning_rate": 0.0006917808219178081, |
|
"loss": 0.1158, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 8.677966101694915, |
|
"grad_norm": 0.6566260457038879, |
|
"learning_rate": 0.0006232876712328767, |
|
"loss": 0.1018, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 8.813559322033898, |
|
"grad_norm": 0.3624425232410431, |
|
"learning_rate": 0.0005547945205479452, |
|
"loss": 0.1137, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 8.94915254237288, |
|
"grad_norm": 0.8725977540016174, |
|
"learning_rate": 0.0004863013698630137, |
|
"loss": 0.1335, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 8.989830508474576, |
|
"eval_accuracy": 0.9751908396946565, |
|
"eval_f1": 0.9679548394684229, |
|
"eval_loss": 0.0676569938659668, |
|
"eval_precision": 0.9626488095238095, |
|
"eval_recall": 0.9736170617918689, |
|
"eval_runtime": 2.3252, |
|
"eval_samples_per_second": 225.358, |
|
"eval_steps_per_second": 14.192, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 9.084745762711865, |
|
"grad_norm": 0.46945634484291077, |
|
"learning_rate": 0.00041780821917808224, |
|
"loss": 0.1348, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.220338983050848, |
|
"grad_norm": 0.33705276250839233, |
|
"learning_rate": 0.0003493150684931507, |
|
"loss": 0.0968, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 9.35593220338983, |
|
"grad_norm": 0.554704487323761, |
|
"learning_rate": 0.0002808219178082192, |
|
"loss": 0.1379, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 9.491525423728813, |
|
"grad_norm": 0.5419002175331116, |
|
"learning_rate": 0.00021232876712328768, |
|
"loss": 0.128, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.627118644067796, |
|
"grad_norm": 0.32422158122062683, |
|
"learning_rate": 0.00014383561643835618, |
|
"loss": 0.1065, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 9.76271186440678, |
|
"grad_norm": 0.3378284275531769, |
|
"learning_rate": 7.534246575342466e-05, |
|
"loss": 0.1354, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 9.898305084745763, |
|
"grad_norm": 0.2171776294708252, |
|
"learning_rate": 6.849315068493151e-06, |
|
"loss": 0.1186, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 9.898305084745763, |
|
"eval_accuracy": 0.9751908396946565, |
|
"eval_f1": 0.9679548394684229, |
|
"eval_loss": 0.0765310674905777, |
|
"eval_precision": 0.9626488095238095, |
|
"eval_recall": 0.9736170617918689, |
|
"eval_runtime": 2.3291, |
|
"eval_samples_per_second": 224.984, |
|
"eval_steps_per_second": 14.169, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 9.898305084745763, |
|
"step": 730, |
|
"total_flos": 3.6369520534486057e+18, |
|
"train_loss": 0.18177251358554788, |
|
"train_runtime": 459.6522, |
|
"train_samples_per_second": 102.425, |
|
"train_steps_per_second": 1.588 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 730, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.6369520534486057e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|