|
{ |
|
"best_metric": 0.33959150175639824, |
|
"best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_6/checkpoint-1168", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 1168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017123287671232876, |
|
"grad_norm": 10.533243179321289, |
|
"learning_rate": 2.2368737828601498e-06, |
|
"loss": 2.2235, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03424657534246575, |
|
"grad_norm": 13.400142669677734, |
|
"learning_rate": 4.4737475657202996e-06, |
|
"loss": 2.203, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05136986301369863, |
|
"grad_norm": 11.283329010009766, |
|
"learning_rate": 6.71062134858045e-06, |
|
"loss": 2.202, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 9.742809295654297, |
|
"learning_rate": 8.947495131440599e-06, |
|
"loss": 2.1295, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08561643835616438, |
|
"grad_norm": 9.277532577514648, |
|
"learning_rate": 1.1184368914300748e-05, |
|
"loss": 2.1058, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10273972602739725, |
|
"grad_norm": 9.417570114135742, |
|
"learning_rate": 1.34212426971609e-05, |
|
"loss": 2.0999, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11986301369863013, |
|
"grad_norm": 8.50729751586914, |
|
"learning_rate": 1.5658116480021048e-05, |
|
"loss": 2.0348, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 11.117788314819336, |
|
"learning_rate": 1.7894990262881198e-05, |
|
"loss": 2.0573, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1541095890410959, |
|
"grad_norm": 8.794454574584961, |
|
"learning_rate": 2.013186404574135e-05, |
|
"loss": 2.1162, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17123287671232876, |
|
"grad_norm": 10.189006805419922, |
|
"learning_rate": 2.2368737828601496e-05, |
|
"loss": 2.0447, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18835616438356165, |
|
"grad_norm": 8.304252624511719, |
|
"learning_rate": 2.4605611611461647e-05, |
|
"loss": 2.0826, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2054794520547945, |
|
"grad_norm": 11.60432243347168, |
|
"learning_rate": 2.5884293984157745e-05, |
|
"loss": 2.0481, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2226027397260274, |
|
"grad_norm": 11.781548500061035, |
|
"learning_rate": 2.5725689241607756e-05, |
|
"loss": 2.1976, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23972602739726026, |
|
"grad_norm": 8.521300315856934, |
|
"learning_rate": 2.5567084499057773e-05, |
|
"loss": 2.1029, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2568493150684932, |
|
"grad_norm": 9.029247283935547, |
|
"learning_rate": 2.5408479756507783e-05, |
|
"loss": 2.0236, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 8.05414867401123, |
|
"learning_rate": 2.52498750139578e-05, |
|
"loss": 2.1098, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2910958904109589, |
|
"grad_norm": 8.391693115234375, |
|
"learning_rate": 2.509127027140781e-05, |
|
"loss": 2.0854, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3082191780821918, |
|
"grad_norm": 8.5321044921875, |
|
"learning_rate": 2.4932665528857828e-05, |
|
"loss": 2.0164, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3253424657534247, |
|
"grad_norm": 8.022624969482422, |
|
"learning_rate": 2.477406078630784e-05, |
|
"loss": 2.0136, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"grad_norm": 16.765403747558594, |
|
"learning_rate": 2.4615456043757856e-05, |
|
"loss": 2.0038, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3595890410958904, |
|
"grad_norm": 8.68701171875, |
|
"learning_rate": 2.4456851301207866e-05, |
|
"loss": 1.9602, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3767123287671233, |
|
"grad_norm": 8.106729507446289, |
|
"learning_rate": 2.4298246558657883e-05, |
|
"loss": 2.0734, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3938356164383562, |
|
"grad_norm": 9.488205909729004, |
|
"learning_rate": 2.4139641816107894e-05, |
|
"loss": 2.0728, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 10.983114242553711, |
|
"learning_rate": 2.398103707355791e-05, |
|
"loss": 2.0433, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4280821917808219, |
|
"grad_norm": 10.720239639282227, |
|
"learning_rate": 2.382243233100792e-05, |
|
"loss": 1.9801, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4452054794520548, |
|
"grad_norm": 9.009136199951172, |
|
"learning_rate": 2.3663827588457938e-05, |
|
"loss": 2.0108, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4623287671232877, |
|
"grad_norm": 8.843775749206543, |
|
"learning_rate": 2.350522284590795e-05, |
|
"loss": 2.072, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4794520547945205, |
|
"grad_norm": 10.325736999511719, |
|
"learning_rate": 2.3346618103357966e-05, |
|
"loss": 1.8827, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4965753424657534, |
|
"grad_norm": 12.433212280273438, |
|
"learning_rate": 2.318801336080798e-05, |
|
"loss": 2.0127, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5136986301369864, |
|
"grad_norm": 8.865859985351562, |
|
"learning_rate": 2.3029408618257993e-05, |
|
"loss": 1.92, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5308219178082192, |
|
"grad_norm": 8.225539207458496, |
|
"learning_rate": 2.2870803875708007e-05, |
|
"loss": 2.0065, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 9.395380973815918, |
|
"learning_rate": 2.271219913315802e-05, |
|
"loss": 2.018, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.565068493150685, |
|
"grad_norm": 11.355223655700684, |
|
"learning_rate": 2.2553594390608035e-05, |
|
"loss": 2.0069, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5821917808219178, |
|
"grad_norm": 9.485352516174316, |
|
"learning_rate": 2.239498964805805e-05, |
|
"loss": 1.8434, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5993150684931506, |
|
"grad_norm": 9.75179386138916, |
|
"learning_rate": 2.2236384905508062e-05, |
|
"loss": 1.9366, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6164383561643836, |
|
"grad_norm": 8.961932182312012, |
|
"learning_rate": 2.2077780162958076e-05, |
|
"loss": 1.8823, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6335616438356164, |
|
"grad_norm": 11.1987886428833, |
|
"learning_rate": 2.191917542040809e-05, |
|
"loss": 2.0209, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6506849315068494, |
|
"grad_norm": 9.715045928955078, |
|
"learning_rate": 2.1760570677858104e-05, |
|
"loss": 2.001, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6678082191780822, |
|
"grad_norm": 12.430977821350098, |
|
"learning_rate": 2.1601965935308117e-05, |
|
"loss": 1.9342, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 13.551401138305664, |
|
"learning_rate": 2.144336119275813e-05, |
|
"loss": 1.9697, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.702054794520548, |
|
"grad_norm": 10.96045970916748, |
|
"learning_rate": 2.1284756450208145e-05, |
|
"loss": 1.8875, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7191780821917808, |
|
"grad_norm": 10.306199073791504, |
|
"learning_rate": 2.112615170765816e-05, |
|
"loss": 1.9113, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7363013698630136, |
|
"grad_norm": 8.820138931274414, |
|
"learning_rate": 2.0967546965108172e-05, |
|
"loss": 1.8304, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7534246575342466, |
|
"grad_norm": 11.025798797607422, |
|
"learning_rate": 2.0808942222558186e-05, |
|
"loss": 1.9428, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7705479452054794, |
|
"grad_norm": 10.171765327453613, |
|
"learning_rate": 2.06503374800082e-05, |
|
"loss": 1.9606, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7876712328767124, |
|
"grad_norm": 12.033662796020508, |
|
"learning_rate": 2.0491732737458214e-05, |
|
"loss": 1.9102, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8047945205479452, |
|
"grad_norm": 10.383848190307617, |
|
"learning_rate": 2.0333127994908228e-05, |
|
"loss": 1.9791, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 12.919981002807617, |
|
"learning_rate": 2.017452325235824e-05, |
|
"loss": 1.9671, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.839041095890411, |
|
"grad_norm": 12.617621421813965, |
|
"learning_rate": 2.0015918509808255e-05, |
|
"loss": 1.9002, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8561643835616438, |
|
"grad_norm": 10.720662117004395, |
|
"learning_rate": 1.985731376725827e-05, |
|
"loss": 1.925, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8732876712328768, |
|
"grad_norm": 10.810115814208984, |
|
"learning_rate": 1.9698709024708283e-05, |
|
"loss": 1.8476, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8904109589041096, |
|
"grad_norm": 11.600144386291504, |
|
"learning_rate": 1.9540104282158296e-05, |
|
"loss": 2.0233, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9075342465753424, |
|
"grad_norm": 19.464778900146484, |
|
"learning_rate": 1.938149953960831e-05, |
|
"loss": 1.8731, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9246575342465754, |
|
"grad_norm": 14.501326560974121, |
|
"learning_rate": 1.9222894797058324e-05, |
|
"loss": 1.8296, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9417808219178082, |
|
"grad_norm": 12.78044605255127, |
|
"learning_rate": 1.9064290054508338e-05, |
|
"loss": 1.9029, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.958904109589041, |
|
"grad_norm": 11.88862133026123, |
|
"learning_rate": 1.890568531195835e-05, |
|
"loss": 1.9337, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.976027397260274, |
|
"grad_norm": 12.300003051757812, |
|
"learning_rate": 1.8747080569408365e-05, |
|
"loss": 1.8796, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9931506849315068, |
|
"grad_norm": 15.312654495239258, |
|
"learning_rate": 1.858847582685838e-05, |
|
"loss": 1.8259, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_classification_report": { |
|
"accuracy": 0.3195, |
|
"ar": { |
|
"f1-score": 0.23384615384615384, |
|
"precision": 0.31932773109243695, |
|
"recall": 0.18446601941747573, |
|
"support": 206.0 |
|
}, |
|
"cl": { |
|
"f1-score": 0.25039123630672927, |
|
"precision": 0.22922636103151864, |
|
"recall": 0.27586206896551724, |
|
"support": 290.0 |
|
}, |
|
"co": { |
|
"f1-score": 0.37142857142857144, |
|
"precision": 0.2985386221294363, |
|
"recall": 0.49140893470790376, |
|
"support": 291.0 |
|
}, |
|
"es": { |
|
"f1-score": 0.36607142857142855, |
|
"precision": 0.48520710059171596, |
|
"recall": 0.2939068100358423, |
|
"support": 279.0 |
|
}, |
|
"macro avg": { |
|
"f1-score": 0.30612372170181307, |
|
"precision": 0.3459220621957859, |
|
"recall": 0.29992450825787437, |
|
"support": 2000.0 |
|
}, |
|
"mx": { |
|
"f1-score": 0.3076923076923077, |
|
"precision": 0.4782608695652174, |
|
"recall": 0.2268041237113402, |
|
"support": 291.0 |
|
}, |
|
"pe": { |
|
"f1-score": 0.3013698630136986, |
|
"precision": 0.3003412969283277, |
|
"recall": 0.3024054982817869, |
|
"support": 291.0 |
|
}, |
|
"pr": { |
|
"f1-score": 0.6395348837209303, |
|
"precision": 0.7746478873239436, |
|
"recall": 0.5445544554455446, |
|
"support": 101.0 |
|
}, |
|
"uy": { |
|
"f1-score": 0.28477905073649756, |
|
"precision": 0.22774869109947643, |
|
"recall": 0.3799126637554585, |
|
"support": 229.0 |
|
}, |
|
"ve": { |
|
"f1-score": 0.0, |
|
"precision": 0.0, |
|
"recall": 0.0, |
|
"support": 22.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.3190249633141609, |
|
"precision": 0.3557358973700286, |
|
"recall": 0.3195, |
|
"support": 2000.0 |
|
} |
|
}, |
|
"eval_f1": 0.30612372170181307, |
|
"eval_loss": 1.8373411893844604, |
|
"eval_runtime": 2.9167, |
|
"eval_samples_per_second": 685.713, |
|
"eval_steps_per_second": 85.714, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.0102739726027397, |
|
"grad_norm": 9.993717193603516, |
|
"learning_rate": 1.8429871084308393e-05, |
|
"loss": 1.7138, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.0273972602739727, |
|
"grad_norm": 16.997873306274414, |
|
"learning_rate": 1.8271266341758407e-05, |
|
"loss": 1.6405, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0445205479452055, |
|
"grad_norm": 14.189482688903809, |
|
"learning_rate": 1.811266159920842e-05, |
|
"loss": 1.6928, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0616438356164384, |
|
"grad_norm": 11.937070846557617, |
|
"learning_rate": 1.7954056856658434e-05, |
|
"loss": 1.606, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0787671232876712, |
|
"grad_norm": 15.003684997558594, |
|
"learning_rate": 1.7795452114108448e-05, |
|
"loss": 1.532, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.095890410958904, |
|
"grad_norm": 12.880982398986816, |
|
"learning_rate": 1.7636847371558462e-05, |
|
"loss": 1.6335, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.1130136986301369, |
|
"grad_norm": 27.958925247192383, |
|
"learning_rate": 1.7478242629008476e-05, |
|
"loss": 1.7617, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.13013698630137, |
|
"grad_norm": 20.256528854370117, |
|
"learning_rate": 1.731963788645849e-05, |
|
"loss": 1.6595, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1472602739726028, |
|
"grad_norm": 11.43181037902832, |
|
"learning_rate": 1.7161033143908503e-05, |
|
"loss": 1.4586, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.1643835616438356, |
|
"grad_norm": 16.205936431884766, |
|
"learning_rate": 1.7002428401358517e-05, |
|
"loss": 1.6602, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1815068493150684, |
|
"grad_norm": 10.263736724853516, |
|
"learning_rate": 1.684382365880853e-05, |
|
"loss": 1.614, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1986301369863013, |
|
"grad_norm": 15.32834529876709, |
|
"learning_rate": 1.6685218916258544e-05, |
|
"loss": 1.6591, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2157534246575343, |
|
"grad_norm": 19.10625648498535, |
|
"learning_rate": 1.6526614173708558e-05, |
|
"loss": 1.5541, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 16.795921325683594, |
|
"learning_rate": 1.6368009431158572e-05, |
|
"loss": 1.6756, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 15.281452178955078, |
|
"learning_rate": 1.6209404688608586e-05, |
|
"loss": 1.6431, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.2671232876712328, |
|
"grad_norm": 20.04804039001465, |
|
"learning_rate": 1.60507999460586e-05, |
|
"loss": 1.6474, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2842465753424657, |
|
"grad_norm": 14.96975040435791, |
|
"learning_rate": 1.5892195203508613e-05, |
|
"loss": 1.696, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.3013698630136985, |
|
"grad_norm": 24.126489639282227, |
|
"learning_rate": 1.5733590460958627e-05, |
|
"loss": 1.7574, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.3184931506849316, |
|
"grad_norm": 18.405784606933594, |
|
"learning_rate": 1.557498571840864e-05, |
|
"loss": 1.4954, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3356164383561644, |
|
"grad_norm": 14.699495315551758, |
|
"learning_rate": 1.5416380975858658e-05, |
|
"loss": 1.5321, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.3527397260273972, |
|
"grad_norm": 31.16180419921875, |
|
"learning_rate": 1.5257776233308668e-05, |
|
"loss": 1.6808, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.36986301369863, |
|
"grad_norm": 18.2718448638916, |
|
"learning_rate": 1.5099171490758684e-05, |
|
"loss": 1.5726, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3869863013698631, |
|
"grad_norm": 17.004480361938477, |
|
"learning_rate": 1.4940566748208696e-05, |
|
"loss": 1.5544, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.404109589041096, |
|
"grad_norm": 21.022958755493164, |
|
"learning_rate": 1.4781962005658711e-05, |
|
"loss": 1.674, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.4212328767123288, |
|
"grad_norm": 15.981225967407227, |
|
"learning_rate": 1.4623357263108724e-05, |
|
"loss": 1.7799, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.4383561643835616, |
|
"grad_norm": 21.790754318237305, |
|
"learning_rate": 1.4464752520558739e-05, |
|
"loss": 1.6517, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.4554794520547945, |
|
"grad_norm": 28.04483413696289, |
|
"learning_rate": 1.4306147778008751e-05, |
|
"loss": 1.5257, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4726027397260273, |
|
"grad_norm": 11.142111778259277, |
|
"learning_rate": 1.4147543035458767e-05, |
|
"loss": 1.692, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4897260273972603, |
|
"grad_norm": 18.453630447387695, |
|
"learning_rate": 1.3988938292908779e-05, |
|
"loss": 1.7486, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.5068493150684932, |
|
"grad_norm": 18.69659423828125, |
|
"learning_rate": 1.3830333550358794e-05, |
|
"loss": 1.5123, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.523972602739726, |
|
"grad_norm": 17.997787475585938, |
|
"learning_rate": 1.3671728807808806e-05, |
|
"loss": 1.5542, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.541095890410959, |
|
"grad_norm": 23.682682037353516, |
|
"learning_rate": 1.3513124065258822e-05, |
|
"loss": 1.5797, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.558219178082192, |
|
"grad_norm": 15.725004196166992, |
|
"learning_rate": 1.3354519322708834e-05, |
|
"loss": 1.4599, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.5753424657534247, |
|
"grad_norm": 27.747419357299805, |
|
"learning_rate": 1.319591458015885e-05, |
|
"loss": 1.575, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.5924657534246576, |
|
"grad_norm": 24.641860961914062, |
|
"learning_rate": 1.3037309837608863e-05, |
|
"loss": 1.5311, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.6095890410958904, |
|
"grad_norm": 21.082124710083008, |
|
"learning_rate": 1.2878705095058877e-05, |
|
"loss": 1.6095, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.6267123287671232, |
|
"grad_norm": 20.251115798950195, |
|
"learning_rate": 1.272010035250889e-05, |
|
"loss": 1.4753, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 28.846712112426758, |
|
"learning_rate": 1.2561495609958904e-05, |
|
"loss": 1.5826, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.660958904109589, |
|
"grad_norm": 16.3935489654541, |
|
"learning_rate": 1.2402890867408918e-05, |
|
"loss": 1.5531, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.678082191780822, |
|
"grad_norm": 26.24190330505371, |
|
"learning_rate": 1.2244286124858932e-05, |
|
"loss": 1.6214, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.6952054794520548, |
|
"grad_norm": 25.269094467163086, |
|
"learning_rate": 1.2085681382308946e-05, |
|
"loss": 1.4609, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.7123287671232876, |
|
"grad_norm": 27.003097534179688, |
|
"learning_rate": 1.1927076639758961e-05, |
|
"loss": 1.5031, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7294520547945207, |
|
"grad_norm": 24.793109893798828, |
|
"learning_rate": 1.1768471897208975e-05, |
|
"loss": 1.5054, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.7465753424657535, |
|
"grad_norm": 19.835695266723633, |
|
"learning_rate": 1.1609867154658989e-05, |
|
"loss": 1.5893, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.7636986301369864, |
|
"grad_norm": 20.677410125732422, |
|
"learning_rate": 1.1451262412109003e-05, |
|
"loss": 1.4415, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.7808219178082192, |
|
"grad_norm": 12.999378204345703, |
|
"learning_rate": 1.1292657669559016e-05, |
|
"loss": 1.6227, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.797945205479452, |
|
"grad_norm": 24.370025634765625, |
|
"learning_rate": 1.113405292700903e-05, |
|
"loss": 1.6393, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.8150684931506849, |
|
"grad_norm": 21.418380737304688, |
|
"learning_rate": 1.0975448184459044e-05, |
|
"loss": 1.4325, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.8321917808219177, |
|
"grad_norm": 16.800621032714844, |
|
"learning_rate": 1.0816843441909058e-05, |
|
"loss": 1.5552, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.8493150684931505, |
|
"grad_norm": 25.122791290283203, |
|
"learning_rate": 1.0658238699359071e-05, |
|
"loss": 1.4473, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.8664383561643836, |
|
"grad_norm": 30.148611068725586, |
|
"learning_rate": 1.0499633956809085e-05, |
|
"loss": 1.4344, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.8835616438356164, |
|
"grad_norm": 23.485918045043945, |
|
"learning_rate": 1.0341029214259099e-05, |
|
"loss": 1.5767, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.9006849315068495, |
|
"grad_norm": 17.657108306884766, |
|
"learning_rate": 1.0182424471709113e-05, |
|
"loss": 1.5423, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.9178082191780823, |
|
"grad_norm": 22.166349411010742, |
|
"learning_rate": 1.0023819729159127e-05, |
|
"loss": 1.5175, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.9349315068493151, |
|
"grad_norm": 15.70091438293457, |
|
"learning_rate": 9.86521498660914e-06, |
|
"loss": 1.335, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.952054794520548, |
|
"grad_norm": 17.611722946166992, |
|
"learning_rate": 9.706610244059154e-06, |
|
"loss": 1.3978, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.9691780821917808, |
|
"grad_norm": 22.696762084960938, |
|
"learning_rate": 9.548005501509168e-06, |
|
"loss": 1.5925, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.9863013698630136, |
|
"grad_norm": 19.230512619018555, |
|
"learning_rate": 9.389400758959182e-06, |
|
"loss": 1.4233, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_classification_report": { |
|
"accuracy": 0.361, |
|
"ar": { |
|
"f1-score": 0.32941176470588235, |
|
"precision": 0.417910447761194, |
|
"recall": 0.27184466019417475, |
|
"support": 206.0 |
|
}, |
|
"cl": { |
|
"f1-score": 0.25830258302583026, |
|
"precision": 0.2777777777777778, |
|
"recall": 0.2413793103448276, |
|
"support": 290.0 |
|
}, |
|
"co": { |
|
"f1-score": 0.3806970509383378, |
|
"precision": 0.3120879120879121, |
|
"recall": 0.4879725085910653, |
|
"support": 291.0 |
|
}, |
|
"es": { |
|
"f1-score": 0.3992467043314501, |
|
"precision": 0.42063492063492064, |
|
"recall": 0.37992831541218636, |
|
"support": 279.0 |
|
}, |
|
"macro avg": { |
|
"f1-score": 0.33959150175639824, |
|
"precision": 0.34685147408513256, |
|
"recall": 0.34303341981862456, |
|
"support": 2000.0 |
|
}, |
|
"mx": { |
|
"f1-score": 0.3804143126177024, |
|
"precision": 0.42083333333333334, |
|
"recall": 0.3470790378006873, |
|
"support": 291.0 |
|
}, |
|
"pe": { |
|
"f1-score": 0.3188854489164087, |
|
"precision": 0.29014084507042254, |
|
"recall": 0.3539518900343643, |
|
"support": 291.0 |
|
}, |
|
"pr": { |
|
"f1-score": 0.6325581395348837, |
|
"precision": 0.5964912280701754, |
|
"recall": 0.6732673267326733, |
|
"support": 101.0 |
|
}, |
|
"uy": { |
|
"f1-score": 0.3568075117370892, |
|
"precision": 0.38578680203045684, |
|
"recall": 0.3318777292576419, |
|
"support": 229.0 |
|
}, |
|
"ve": { |
|
"f1-score": 0.0, |
|
"precision": 0.0, |
|
"recall": 0.0, |
|
"support": 22.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.3570163839128382, |
|
"precision": 0.3651520553423211, |
|
"recall": 0.361, |
|
"support": 2000.0 |
|
} |
|
}, |
|
"eval_f1": 0.33959150175639824, |
|
"eval_loss": 1.7848442792892456, |
|
"eval_runtime": 2.9769, |
|
"eval_samples_per_second": 671.834, |
|
"eval_steps_per_second": 83.979, |
|
"step": 1168 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1752, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 306938335993344.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|