mariagrandury's picture
End of training
1c7d2cb verified
{
"best_metric": 0.3047935709180844,
"best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_3/checkpoint-584",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 584,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017123287671232876,
"grad_norm": 11.778327941894531,
"learning_rate": 9.4088732392351e-06,
"loss": 2.2193,
"step": 10
},
{
"epoch": 0.03424657534246575,
"grad_norm": 17.048717498779297,
"learning_rate": 1.88177464784702e-05,
"loss": 2.1721,
"step": 20
},
{
"epoch": 0.05136986301369863,
"grad_norm": 9.865235328674316,
"learning_rate": 2.1552795460563643e-05,
"loss": 2.0945,
"step": 30
},
{
"epoch": 0.0684931506849315,
"grad_norm": 8.911628723144531,
"learning_rate": 2.142763404673923e-05,
"loss": 2.1,
"step": 40
},
{
"epoch": 0.08561643835616438,
"grad_norm": 8.172422409057617,
"learning_rate": 2.130247263291482e-05,
"loss": 2.1,
"step": 50
},
{
"epoch": 0.10273972602739725,
"grad_norm": 8.123610496520996,
"learning_rate": 2.117731121909041e-05,
"loss": 2.0782,
"step": 60
},
{
"epoch": 0.11986301369863013,
"grad_norm": 7.698946475982666,
"learning_rate": 2.1052149805266e-05,
"loss": 1.9786,
"step": 70
},
{
"epoch": 0.136986301369863,
"grad_norm": 8.443121910095215,
"learning_rate": 2.0926988391441588e-05,
"loss": 2.0303,
"step": 80
},
{
"epoch": 0.1541095890410959,
"grad_norm": 8.679265975952148,
"learning_rate": 2.0801826977617177e-05,
"loss": 2.0897,
"step": 90
},
{
"epoch": 0.17123287671232876,
"grad_norm": 10.261368751525879,
"learning_rate": 2.0676665563792766e-05,
"loss": 2.0079,
"step": 100
},
{
"epoch": 0.18835616438356165,
"grad_norm": 8.91393756866455,
"learning_rate": 2.055150414996835e-05,
"loss": 2.0663,
"step": 110
},
{
"epoch": 0.2054794520547945,
"grad_norm": 10.901965141296387,
"learning_rate": 2.042634273614394e-05,
"loss": 1.9684,
"step": 120
},
{
"epoch": 0.2226027397260274,
"grad_norm": 8.871338844299316,
"learning_rate": 2.030118132231953e-05,
"loss": 2.1153,
"step": 130
},
{
"epoch": 0.23972602739726026,
"grad_norm": 8.738993644714355,
"learning_rate": 2.0176019908495118e-05,
"loss": 2.0973,
"step": 140
},
{
"epoch": 0.2568493150684932,
"grad_norm": 8.016680717468262,
"learning_rate": 2.0050858494670707e-05,
"loss": 2.0179,
"step": 150
},
{
"epoch": 0.273972602739726,
"grad_norm": 7.566287994384766,
"learning_rate": 1.9925697080846293e-05,
"loss": 2.0837,
"step": 160
},
{
"epoch": 0.2910958904109589,
"grad_norm": 8.198686599731445,
"learning_rate": 1.9800535667021882e-05,
"loss": 2.0696,
"step": 170
},
{
"epoch": 0.3082191780821918,
"grad_norm": 7.653988838195801,
"learning_rate": 1.967537425319747e-05,
"loss": 1.9815,
"step": 180
},
{
"epoch": 0.3253424657534247,
"grad_norm": 7.869149684906006,
"learning_rate": 1.955021283937306e-05,
"loss": 2.0139,
"step": 190
},
{
"epoch": 0.3424657534246575,
"grad_norm": 11.699186325073242,
"learning_rate": 1.942505142554865e-05,
"loss": 2.0046,
"step": 200
},
{
"epoch": 0.3595890410958904,
"grad_norm": 7.273196220397949,
"learning_rate": 1.9299890011724238e-05,
"loss": 1.9229,
"step": 210
},
{
"epoch": 0.3767123287671233,
"grad_norm": 12.080012321472168,
"learning_rate": 1.9174728597899827e-05,
"loss": 2.0691,
"step": 220
},
{
"epoch": 0.3938356164383562,
"grad_norm": 10.047798156738281,
"learning_rate": 1.9049567184075416e-05,
"loss": 2.0008,
"step": 230
},
{
"epoch": 0.410958904109589,
"grad_norm": 12.511098861694336,
"learning_rate": 1.8924405770251005e-05,
"loss": 2.0886,
"step": 240
},
{
"epoch": 0.4280821917808219,
"grad_norm": 10.744943618774414,
"learning_rate": 1.8799244356426594e-05,
"loss": 1.9832,
"step": 250
},
{
"epoch": 0.4452054794520548,
"grad_norm": 10.715825080871582,
"learning_rate": 1.8674082942602183e-05,
"loss": 1.9835,
"step": 260
},
{
"epoch": 0.4623287671232877,
"grad_norm": 8.178912162780762,
"learning_rate": 1.8548921528777768e-05,
"loss": 2.0427,
"step": 270
},
{
"epoch": 0.4794520547945205,
"grad_norm": 14.410847663879395,
"learning_rate": 1.8423760114953357e-05,
"loss": 1.8612,
"step": 280
},
{
"epoch": 0.4965753424657534,
"grad_norm": 11.317428588867188,
"learning_rate": 1.8298598701128946e-05,
"loss": 1.9943,
"step": 290
},
{
"epoch": 0.5136986301369864,
"grad_norm": 8.62607192993164,
"learning_rate": 1.8173437287304535e-05,
"loss": 1.896,
"step": 300
},
{
"epoch": 0.5308219178082192,
"grad_norm": 8.02371883392334,
"learning_rate": 1.8048275873480124e-05,
"loss": 1.9874,
"step": 310
},
{
"epoch": 0.547945205479452,
"grad_norm": 9.2183198928833,
"learning_rate": 1.7923114459655713e-05,
"loss": 1.9973,
"step": 320
},
{
"epoch": 0.565068493150685,
"grad_norm": 10.167363166809082,
"learning_rate": 1.7797953045831302e-05,
"loss": 2.008,
"step": 330
},
{
"epoch": 0.5821917808219178,
"grad_norm": 8.94150161743164,
"learning_rate": 1.767279163200689e-05,
"loss": 1.9215,
"step": 340
},
{
"epoch": 0.5993150684931506,
"grad_norm": 12.496065139770508,
"learning_rate": 1.754763021818248e-05,
"loss": 1.9297,
"step": 350
},
{
"epoch": 0.6164383561643836,
"grad_norm": 8.680890083312988,
"learning_rate": 1.742246880435807e-05,
"loss": 1.8756,
"step": 360
},
{
"epoch": 0.6335616438356164,
"grad_norm": 12.159443855285645,
"learning_rate": 1.7297307390533658e-05,
"loss": 1.9632,
"step": 370
},
{
"epoch": 0.6506849315068494,
"grad_norm": 10.144058227539062,
"learning_rate": 1.7172145976709244e-05,
"loss": 1.8928,
"step": 380
},
{
"epoch": 0.6678082191780822,
"grad_norm": 9.923543930053711,
"learning_rate": 1.7046984562884833e-05,
"loss": 1.9076,
"step": 390
},
{
"epoch": 0.684931506849315,
"grad_norm": 11.46466064453125,
"learning_rate": 1.6921823149060422e-05,
"loss": 1.92,
"step": 400
},
{
"epoch": 0.702054794520548,
"grad_norm": 9.840792655944824,
"learning_rate": 1.679666173523601e-05,
"loss": 1.8128,
"step": 410
},
{
"epoch": 0.7191780821917808,
"grad_norm": 10.992412567138672,
"learning_rate": 1.66715003214116e-05,
"loss": 1.8548,
"step": 420
},
{
"epoch": 0.7363013698630136,
"grad_norm": 10.46108627319336,
"learning_rate": 1.6546338907587185e-05,
"loss": 1.8455,
"step": 430
},
{
"epoch": 0.7534246575342466,
"grad_norm": 13.512310981750488,
"learning_rate": 1.6421177493762774e-05,
"loss": 1.9271,
"step": 440
},
{
"epoch": 0.7705479452054794,
"grad_norm": 10.795140266418457,
"learning_rate": 1.6296016079938363e-05,
"loss": 1.9033,
"step": 450
},
{
"epoch": 0.7876712328767124,
"grad_norm": 11.524979591369629,
"learning_rate": 1.6170854666113952e-05,
"loss": 1.9248,
"step": 460
},
{
"epoch": 0.8047945205479452,
"grad_norm": 8.86741828918457,
"learning_rate": 1.604569325228954e-05,
"loss": 1.9549,
"step": 470
},
{
"epoch": 0.821917808219178,
"grad_norm": 12.26812744140625,
"learning_rate": 1.592053183846513e-05,
"loss": 1.9197,
"step": 480
},
{
"epoch": 0.839041095890411,
"grad_norm": 17.214059829711914,
"learning_rate": 1.579537042464072e-05,
"loss": 1.9561,
"step": 490
},
{
"epoch": 0.8561643835616438,
"grad_norm": 13.144837379455566,
"learning_rate": 1.5670209010816308e-05,
"loss": 1.9064,
"step": 500
},
{
"epoch": 0.8732876712328768,
"grad_norm": 10.606916427612305,
"learning_rate": 1.5545047596991897e-05,
"loss": 1.8398,
"step": 510
},
{
"epoch": 0.8904109589041096,
"grad_norm": 11.539923667907715,
"learning_rate": 1.5419886183167483e-05,
"loss": 1.9345,
"step": 520
},
{
"epoch": 0.9075342465753424,
"grad_norm": 14.816058158874512,
"learning_rate": 1.5294724769343072e-05,
"loss": 1.8451,
"step": 530
},
{
"epoch": 0.9246575342465754,
"grad_norm": 12.361188888549805,
"learning_rate": 1.5169563355518661e-05,
"loss": 1.7723,
"step": 540
},
{
"epoch": 0.9417808219178082,
"grad_norm": 13.830178260803223,
"learning_rate": 1.504440194169425e-05,
"loss": 1.9174,
"step": 550
},
{
"epoch": 0.958904109589041,
"grad_norm": 11.410951614379883,
"learning_rate": 1.4919240527869839e-05,
"loss": 1.8714,
"step": 560
},
{
"epoch": 0.976027397260274,
"grad_norm": 11.380172729492188,
"learning_rate": 1.4794079114045428e-05,
"loss": 1.8259,
"step": 570
},
{
"epoch": 0.9931506849315068,
"grad_norm": 17.25227165222168,
"learning_rate": 1.4668917700221017e-05,
"loss": 1.8291,
"step": 580
},
{
"epoch": 1.0,
"eval_classification_report": {
"accuracy": 0.3175,
"ar": {
"f1-score": 0.2611111111111111,
"precision": 0.3051948051948052,
"recall": 0.22815533980582525,
"support": 206.0
},
"cl": {
"f1-score": 0.2444113263785395,
"precision": 0.2152230971128609,
"recall": 0.2827586206896552,
"support": 290.0
},
"co": {
"f1-score": 0.35724331926863573,
"precision": 0.30238095238095236,
"recall": 0.436426116838488,
"support": 291.0
},
"es": {
"f1-score": 0.32696390658174096,
"precision": 0.4010416666666667,
"recall": 0.27598566308243727,
"support": 279.0
},
"macro avg": {
"f1-score": 0.3047935709180844,
"precision": 0.3345601053365995,
"recall": 0.29767349498848117,
"support": 2000.0
},
"mx": {
"f1-score": 0.3294663573085847,
"precision": 0.5071428571428571,
"recall": 0.24398625429553264,
"support": 291.0
},
"pe": {
"f1-score": 0.325434439178515,
"precision": 0.30116959064327486,
"recall": 0.3539518900343643,
"support": 291.0
},
"pr": {
"f1-score": 0.6171428571428571,
"precision": 0.7297297297297297,
"recall": 0.5346534653465347,
"support": 101.0
},
"uy": {
"f1-score": 0.2813688212927757,
"precision": 0.24915824915824916,
"recall": 0.3231441048034934,
"support": 229.0
},
"ve": {
"f1-score": 0.0,
"precision": 0.0,
"recall": 0.0,
"support": 22.0
},
"weighted avg": {
"f1-score": 0.3185949649036821,
"precision": 0.3455735871207114,
"recall": 0.3175,
"support": 2000.0
}
},
"eval_f1": 0.3047935709180844,
"eval_loss": 1.8013501167297363,
"eval_runtime": 4.4012,
"eval_samples_per_second": 454.419,
"eval_steps_per_second": 56.802,
"step": 584
}
],
"logging_steps": 10,
"max_steps": 1752,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 306938335993344.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}