|
{ |
|
"best_metric": 0.3047935709180844, |
|
"best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_3/checkpoint-584", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 584, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017123287671232876, |
|
"grad_norm": 11.778327941894531, |
|
"learning_rate": 9.4088732392351e-06, |
|
"loss": 2.2193, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03424657534246575, |
|
"grad_norm": 17.048717498779297, |
|
"learning_rate": 1.88177464784702e-05, |
|
"loss": 2.1721, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05136986301369863, |
|
"grad_norm": 9.865235328674316, |
|
"learning_rate": 2.1552795460563643e-05, |
|
"loss": 2.0945, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 8.911628723144531, |
|
"learning_rate": 2.142763404673923e-05, |
|
"loss": 2.1, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08561643835616438, |
|
"grad_norm": 8.172422409057617, |
|
"learning_rate": 2.130247263291482e-05, |
|
"loss": 2.1, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10273972602739725, |
|
"grad_norm": 8.123610496520996, |
|
"learning_rate": 2.117731121909041e-05, |
|
"loss": 2.0782, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11986301369863013, |
|
"grad_norm": 7.698946475982666, |
|
"learning_rate": 2.1052149805266e-05, |
|
"loss": 1.9786, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 8.443121910095215, |
|
"learning_rate": 2.0926988391441588e-05, |
|
"loss": 2.0303, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1541095890410959, |
|
"grad_norm": 8.679265975952148, |
|
"learning_rate": 2.0801826977617177e-05, |
|
"loss": 2.0897, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17123287671232876, |
|
"grad_norm": 10.261368751525879, |
|
"learning_rate": 2.0676665563792766e-05, |
|
"loss": 2.0079, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18835616438356165, |
|
"grad_norm": 8.91393756866455, |
|
"learning_rate": 2.055150414996835e-05, |
|
"loss": 2.0663, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2054794520547945, |
|
"grad_norm": 10.901965141296387, |
|
"learning_rate": 2.042634273614394e-05, |
|
"loss": 1.9684, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2226027397260274, |
|
"grad_norm": 8.871338844299316, |
|
"learning_rate": 2.030118132231953e-05, |
|
"loss": 2.1153, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23972602739726026, |
|
"grad_norm": 8.738993644714355, |
|
"learning_rate": 2.0176019908495118e-05, |
|
"loss": 2.0973, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2568493150684932, |
|
"grad_norm": 8.016680717468262, |
|
"learning_rate": 2.0050858494670707e-05, |
|
"loss": 2.0179, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 7.566287994384766, |
|
"learning_rate": 1.9925697080846293e-05, |
|
"loss": 2.0837, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2910958904109589, |
|
"grad_norm": 8.198686599731445, |
|
"learning_rate": 1.9800535667021882e-05, |
|
"loss": 2.0696, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3082191780821918, |
|
"grad_norm": 7.653988838195801, |
|
"learning_rate": 1.967537425319747e-05, |
|
"loss": 1.9815, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3253424657534247, |
|
"grad_norm": 7.869149684906006, |
|
"learning_rate": 1.955021283937306e-05, |
|
"loss": 2.0139, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"grad_norm": 11.699186325073242, |
|
"learning_rate": 1.942505142554865e-05, |
|
"loss": 2.0046, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3595890410958904, |
|
"grad_norm": 7.273196220397949, |
|
"learning_rate": 1.9299890011724238e-05, |
|
"loss": 1.9229, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3767123287671233, |
|
"grad_norm": 12.080012321472168, |
|
"learning_rate": 1.9174728597899827e-05, |
|
"loss": 2.0691, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3938356164383562, |
|
"grad_norm": 10.047798156738281, |
|
"learning_rate": 1.9049567184075416e-05, |
|
"loss": 2.0008, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 12.511098861694336, |
|
"learning_rate": 1.8924405770251005e-05, |
|
"loss": 2.0886, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4280821917808219, |
|
"grad_norm": 10.744943618774414, |
|
"learning_rate": 1.8799244356426594e-05, |
|
"loss": 1.9832, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4452054794520548, |
|
"grad_norm": 10.715825080871582, |
|
"learning_rate": 1.8674082942602183e-05, |
|
"loss": 1.9835, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4623287671232877, |
|
"grad_norm": 8.178912162780762, |
|
"learning_rate": 1.8548921528777768e-05, |
|
"loss": 2.0427, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4794520547945205, |
|
"grad_norm": 14.410847663879395, |
|
"learning_rate": 1.8423760114953357e-05, |
|
"loss": 1.8612, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4965753424657534, |
|
"grad_norm": 11.317428588867188, |
|
"learning_rate": 1.8298598701128946e-05, |
|
"loss": 1.9943, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5136986301369864, |
|
"grad_norm": 8.62607192993164, |
|
"learning_rate": 1.8173437287304535e-05, |
|
"loss": 1.896, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5308219178082192, |
|
"grad_norm": 8.02371883392334, |
|
"learning_rate": 1.8048275873480124e-05, |
|
"loss": 1.9874, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 9.2183198928833, |
|
"learning_rate": 1.7923114459655713e-05, |
|
"loss": 1.9973, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.565068493150685, |
|
"grad_norm": 10.167363166809082, |
|
"learning_rate": 1.7797953045831302e-05, |
|
"loss": 2.008, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5821917808219178, |
|
"grad_norm": 8.94150161743164, |
|
"learning_rate": 1.767279163200689e-05, |
|
"loss": 1.9215, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5993150684931506, |
|
"grad_norm": 12.496065139770508, |
|
"learning_rate": 1.754763021818248e-05, |
|
"loss": 1.9297, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6164383561643836, |
|
"grad_norm": 8.680890083312988, |
|
"learning_rate": 1.742246880435807e-05, |
|
"loss": 1.8756, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6335616438356164, |
|
"grad_norm": 12.159443855285645, |
|
"learning_rate": 1.7297307390533658e-05, |
|
"loss": 1.9632, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6506849315068494, |
|
"grad_norm": 10.144058227539062, |
|
"learning_rate": 1.7172145976709244e-05, |
|
"loss": 1.8928, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6678082191780822, |
|
"grad_norm": 9.923543930053711, |
|
"learning_rate": 1.7046984562884833e-05, |
|
"loss": 1.9076, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 11.46466064453125, |
|
"learning_rate": 1.6921823149060422e-05, |
|
"loss": 1.92, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.702054794520548, |
|
"grad_norm": 9.840792655944824, |
|
"learning_rate": 1.679666173523601e-05, |
|
"loss": 1.8128, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7191780821917808, |
|
"grad_norm": 10.992412567138672, |
|
"learning_rate": 1.66715003214116e-05, |
|
"loss": 1.8548, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7363013698630136, |
|
"grad_norm": 10.46108627319336, |
|
"learning_rate": 1.6546338907587185e-05, |
|
"loss": 1.8455, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7534246575342466, |
|
"grad_norm": 13.512310981750488, |
|
"learning_rate": 1.6421177493762774e-05, |
|
"loss": 1.9271, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7705479452054794, |
|
"grad_norm": 10.795140266418457, |
|
"learning_rate": 1.6296016079938363e-05, |
|
"loss": 1.9033, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7876712328767124, |
|
"grad_norm": 11.524979591369629, |
|
"learning_rate": 1.6170854666113952e-05, |
|
"loss": 1.9248, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8047945205479452, |
|
"grad_norm": 8.86741828918457, |
|
"learning_rate": 1.604569325228954e-05, |
|
"loss": 1.9549, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 12.26812744140625, |
|
"learning_rate": 1.592053183846513e-05, |
|
"loss": 1.9197, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.839041095890411, |
|
"grad_norm": 17.214059829711914, |
|
"learning_rate": 1.579537042464072e-05, |
|
"loss": 1.9561, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8561643835616438, |
|
"grad_norm": 13.144837379455566, |
|
"learning_rate": 1.5670209010816308e-05, |
|
"loss": 1.9064, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8732876712328768, |
|
"grad_norm": 10.606916427612305, |
|
"learning_rate": 1.5545047596991897e-05, |
|
"loss": 1.8398, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8904109589041096, |
|
"grad_norm": 11.539923667907715, |
|
"learning_rate": 1.5419886183167483e-05, |
|
"loss": 1.9345, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9075342465753424, |
|
"grad_norm": 14.816058158874512, |
|
"learning_rate": 1.5294724769343072e-05, |
|
"loss": 1.8451, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9246575342465754, |
|
"grad_norm": 12.361188888549805, |
|
"learning_rate": 1.5169563355518661e-05, |
|
"loss": 1.7723, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9417808219178082, |
|
"grad_norm": 13.830178260803223, |
|
"learning_rate": 1.504440194169425e-05, |
|
"loss": 1.9174, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.958904109589041, |
|
"grad_norm": 11.410951614379883, |
|
"learning_rate": 1.4919240527869839e-05, |
|
"loss": 1.8714, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.976027397260274, |
|
"grad_norm": 11.380172729492188, |
|
"learning_rate": 1.4794079114045428e-05, |
|
"loss": 1.8259, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9931506849315068, |
|
"grad_norm": 17.25227165222168, |
|
"learning_rate": 1.4668917700221017e-05, |
|
"loss": 1.8291, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_classification_report": { |
|
"accuracy": 0.3175, |
|
"ar": { |
|
"f1-score": 0.2611111111111111, |
|
"precision": 0.3051948051948052, |
|
"recall": 0.22815533980582525, |
|
"support": 206.0 |
|
}, |
|
"cl": { |
|
"f1-score": 0.2444113263785395, |
|
"precision": 0.2152230971128609, |
|
"recall": 0.2827586206896552, |
|
"support": 290.0 |
|
}, |
|
"co": { |
|
"f1-score": 0.35724331926863573, |
|
"precision": 0.30238095238095236, |
|
"recall": 0.436426116838488, |
|
"support": 291.0 |
|
}, |
|
"es": { |
|
"f1-score": 0.32696390658174096, |
|
"precision": 0.4010416666666667, |
|
"recall": 0.27598566308243727, |
|
"support": 279.0 |
|
}, |
|
"macro avg": { |
|
"f1-score": 0.3047935709180844, |
|
"precision": 0.3345601053365995, |
|
"recall": 0.29767349498848117, |
|
"support": 2000.0 |
|
}, |
|
"mx": { |
|
"f1-score": 0.3294663573085847, |
|
"precision": 0.5071428571428571, |
|
"recall": 0.24398625429553264, |
|
"support": 291.0 |
|
}, |
|
"pe": { |
|
"f1-score": 0.325434439178515, |
|
"precision": 0.30116959064327486, |
|
"recall": 0.3539518900343643, |
|
"support": 291.0 |
|
}, |
|
"pr": { |
|
"f1-score": 0.6171428571428571, |
|
"precision": 0.7297297297297297, |
|
"recall": 0.5346534653465347, |
|
"support": 101.0 |
|
}, |
|
"uy": { |
|
"f1-score": 0.2813688212927757, |
|
"precision": 0.24915824915824916, |
|
"recall": 0.3231441048034934, |
|
"support": 229.0 |
|
}, |
|
"ve": { |
|
"f1-score": 0.0, |
|
"precision": 0.0, |
|
"recall": 0.0, |
|
"support": 22.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.3185949649036821, |
|
"precision": 0.3455735871207114, |
|
"recall": 0.3175, |
|
"support": 2000.0 |
|
} |
|
}, |
|
"eval_f1": 0.3047935709180844, |
|
"eval_loss": 1.8013501167297363, |
|
"eval_runtime": 4.4012, |
|
"eval_samples_per_second": 454.419, |
|
"eval_steps_per_second": 56.802, |
|
"step": 584 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1752, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 306938335993344.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|