bluesky-spanish-classifier / trial_3 /checkpoint-584 /trainer_state.json

End of training

1c7d2cb verified 16 days ago

13.1 kB

	{
	"best_metric": 0.3047935709180844,
	"best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_3/checkpoint-584",
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 584,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.017123287671232876,
	"grad_norm": 11.778327941894531,
	"learning_rate": 9.4088732392351e-06,
	"loss": 2.2193,
	"step": 10
	},
	{
	"epoch": 0.03424657534246575,
	"grad_norm": 17.048717498779297,
	"learning_rate": 1.88177464784702e-05,
	"loss": 2.1721,
	"step": 20
	},
	{
	"epoch": 0.05136986301369863,
	"grad_norm": 9.865235328674316,
	"learning_rate": 2.1552795460563643e-05,
	"loss": 2.0945,
	"step": 30
	},
	{
	"epoch": 0.0684931506849315,
	"grad_norm": 8.911628723144531,
	"learning_rate": 2.142763404673923e-05,
	"loss": 2.1,
	"step": 40
	},
	{
	"epoch": 0.08561643835616438,
	"grad_norm": 8.172422409057617,
	"learning_rate": 2.130247263291482e-05,
	"loss": 2.1,
	"step": 50
	},
	{
	"epoch": 0.10273972602739725,
	"grad_norm": 8.123610496520996,
	"learning_rate": 2.117731121909041e-05,
	"loss": 2.0782,
	"step": 60
	},
	{
	"epoch": 0.11986301369863013,
	"grad_norm": 7.698946475982666,
	"learning_rate": 2.1052149805266e-05,
	"loss": 1.9786,
	"step": 70
	},
	{
	"epoch": 0.136986301369863,
	"grad_norm": 8.443121910095215,
	"learning_rate": 2.0926988391441588e-05,
	"loss": 2.0303,
	"step": 80
	},
	{
	"epoch": 0.1541095890410959,
	"grad_norm": 8.679265975952148,
	"learning_rate": 2.0801826977617177e-05,
	"loss": 2.0897,
	"step": 90
	},
	{
	"epoch": 0.17123287671232876,
	"grad_norm": 10.261368751525879,
	"learning_rate": 2.0676665563792766e-05,
	"loss": 2.0079,
	"step": 100
	},
	{
	"epoch": 0.18835616438356165,
	"grad_norm": 8.91393756866455,
	"learning_rate": 2.055150414996835e-05,
	"loss": 2.0663,
	"step": 110
	},
	{
	"epoch": 0.2054794520547945,
	"grad_norm": 10.901965141296387,
	"learning_rate": 2.042634273614394e-05,
	"loss": 1.9684,
	"step": 120
	},
	{
	"epoch": 0.2226027397260274,
	"grad_norm": 8.871338844299316,
	"learning_rate": 2.030118132231953e-05,
	"loss": 2.1153,
	"step": 130
	},
	{
	"epoch": 0.23972602739726026,
	"grad_norm": 8.738993644714355,
	"learning_rate": 2.0176019908495118e-05,
	"loss": 2.0973,
	"step": 140
	},
	{
	"epoch": 0.2568493150684932,
	"grad_norm": 8.016680717468262,
	"learning_rate": 2.0050858494670707e-05,
	"loss": 2.0179,
	"step": 150
	},
	{
	"epoch": 0.273972602739726,
	"grad_norm": 7.566287994384766,
	"learning_rate": 1.9925697080846293e-05,
	"loss": 2.0837,
	"step": 160
	},
	{
	"epoch": 0.2910958904109589,
	"grad_norm": 8.198686599731445,
	"learning_rate": 1.9800535667021882e-05,
	"loss": 2.0696,
	"step": 170
	},
	{
	"epoch": 0.3082191780821918,
	"grad_norm": 7.653988838195801,
	"learning_rate": 1.967537425319747e-05,
	"loss": 1.9815,
	"step": 180
	},
	{
	"epoch": 0.3253424657534247,
	"grad_norm": 7.869149684906006,
	"learning_rate": 1.955021283937306e-05,
	"loss": 2.0139,
	"step": 190
	},
	{
	"epoch": 0.3424657534246575,
	"grad_norm": 11.699186325073242,
	"learning_rate": 1.942505142554865e-05,
	"loss": 2.0046,
	"step": 200
	},
	{
	"epoch": 0.3595890410958904,
	"grad_norm": 7.273196220397949,
	"learning_rate": 1.9299890011724238e-05,
	"loss": 1.9229,
	"step": 210
	},
	{
	"epoch": 0.3767123287671233,
	"grad_norm": 12.080012321472168,
	"learning_rate": 1.9174728597899827e-05,
	"loss": 2.0691,
	"step": 220
	},
	{
	"epoch": 0.3938356164383562,
	"grad_norm": 10.047798156738281,
	"learning_rate": 1.9049567184075416e-05,
	"loss": 2.0008,
	"step": 230
	},
	{
	"epoch": 0.410958904109589,
	"grad_norm": 12.511098861694336,
	"learning_rate": 1.8924405770251005e-05,
	"loss": 2.0886,
	"step": 240
	},
	{
	"epoch": 0.4280821917808219,
	"grad_norm": 10.744943618774414,
	"learning_rate": 1.8799244356426594e-05,
	"loss": 1.9832,
	"step": 250
	},
	{
	"epoch": 0.4452054794520548,
	"grad_norm": 10.715825080871582,
	"learning_rate": 1.8674082942602183e-05,
	"loss": 1.9835,
	"step": 260
	},
	{
	"epoch": 0.4623287671232877,
	"grad_norm": 8.178912162780762,
	"learning_rate": 1.8548921528777768e-05,
	"loss": 2.0427,
	"step": 270
	},
	{
	"epoch": 0.4794520547945205,
	"grad_norm": 14.410847663879395,
	"learning_rate": 1.8423760114953357e-05,
	"loss": 1.8612,
	"step": 280
	},
	{
	"epoch": 0.4965753424657534,
	"grad_norm": 11.317428588867188,
	"learning_rate": 1.8298598701128946e-05,
	"loss": 1.9943,
	"step": 290
	},
	{
	"epoch": 0.5136986301369864,
	"grad_norm": 8.62607192993164,
	"learning_rate": 1.8173437287304535e-05,
	"loss": 1.896,
	"step": 300
	},
	{
	"epoch": 0.5308219178082192,
	"grad_norm": 8.02371883392334,
	"learning_rate": 1.8048275873480124e-05,
	"loss": 1.9874,
	"step": 310
	},
	{
	"epoch": 0.547945205479452,
	"grad_norm": 9.2183198928833,
	"learning_rate": 1.7923114459655713e-05,
	"loss": 1.9973,
	"step": 320
	},
	{
	"epoch": 0.565068493150685,
	"grad_norm": 10.167363166809082,
	"learning_rate": 1.7797953045831302e-05,
	"loss": 2.008,
	"step": 330
	},
	{
	"epoch": 0.5821917808219178,
	"grad_norm": 8.94150161743164,
	"learning_rate": 1.767279163200689e-05,
	"loss": 1.9215,
	"step": 340
	},
	{
	"epoch": 0.5993150684931506,
	"grad_norm": 12.496065139770508,
	"learning_rate": 1.754763021818248e-05,
	"loss": 1.9297,
	"step": 350
	},
	{
	"epoch": 0.6164383561643836,
	"grad_norm": 8.680890083312988,
	"learning_rate": 1.742246880435807e-05,
	"loss": 1.8756,
	"step": 360
	},
	{
	"epoch": 0.6335616438356164,
	"grad_norm": 12.159443855285645,
	"learning_rate": 1.7297307390533658e-05,
	"loss": 1.9632,
	"step": 370
	},
	{
	"epoch": 0.6506849315068494,
	"grad_norm": 10.144058227539062,
	"learning_rate": 1.7172145976709244e-05,
	"loss": 1.8928,
	"step": 380
	},
	{
	"epoch": 0.6678082191780822,
	"grad_norm": 9.923543930053711,
	"learning_rate": 1.7046984562884833e-05,
	"loss": 1.9076,
	"step": 390
	},
	{
	"epoch": 0.684931506849315,
	"grad_norm": 11.46466064453125,
	"learning_rate": 1.6921823149060422e-05,
	"loss": 1.92,
	"step": 400
	},
	{
	"epoch": 0.702054794520548,
	"grad_norm": 9.840792655944824,
	"learning_rate": 1.679666173523601e-05,
	"loss": 1.8128,
	"step": 410
	},
	{
	"epoch": 0.7191780821917808,
	"grad_norm": 10.992412567138672,
	"learning_rate": 1.66715003214116e-05,
	"loss": 1.8548,
	"step": 420
	},
	{
	"epoch": 0.7363013698630136,
	"grad_norm": 10.46108627319336,
	"learning_rate": 1.6546338907587185e-05,
	"loss": 1.8455,
	"step": 430
	},
	{
	"epoch": 0.7534246575342466,
	"grad_norm": 13.512310981750488,
	"learning_rate": 1.6421177493762774e-05,
	"loss": 1.9271,
	"step": 440
	},
	{
	"epoch": 0.7705479452054794,
	"grad_norm": 10.795140266418457,
	"learning_rate": 1.6296016079938363e-05,
	"loss": 1.9033,
	"step": 450
	},
	{
	"epoch": 0.7876712328767124,
	"grad_norm": 11.524979591369629,
	"learning_rate": 1.6170854666113952e-05,
	"loss": 1.9248,
	"step": 460
	},
	{
	"epoch": 0.8047945205479452,
	"grad_norm": 8.86741828918457,
	"learning_rate": 1.604569325228954e-05,
	"loss": 1.9549,
	"step": 470
	},
	{
	"epoch": 0.821917808219178,
	"grad_norm": 12.26812744140625,
	"learning_rate": 1.592053183846513e-05,
	"loss": 1.9197,
	"step": 480
	},
	{
	"epoch": 0.839041095890411,
	"grad_norm": 17.214059829711914,
	"learning_rate": 1.579537042464072e-05,
	"loss": 1.9561,
	"step": 490
	},
	{
	"epoch": 0.8561643835616438,
	"grad_norm": 13.144837379455566,
	"learning_rate": 1.5670209010816308e-05,
	"loss": 1.9064,
	"step": 500
	},
	{
	"epoch": 0.8732876712328768,
	"grad_norm": 10.606916427612305,
	"learning_rate": 1.5545047596991897e-05,
	"loss": 1.8398,
	"step": 510
	},
	{
	"epoch": 0.8904109589041096,
	"grad_norm": 11.539923667907715,
	"learning_rate": 1.5419886183167483e-05,
	"loss": 1.9345,
	"step": 520
	},
	{
	"epoch": 0.9075342465753424,
	"grad_norm": 14.816058158874512,
	"learning_rate": 1.5294724769343072e-05,
	"loss": 1.8451,
	"step": 530
	},
	{
	"epoch": 0.9246575342465754,
	"grad_norm": 12.361188888549805,
	"learning_rate": 1.5169563355518661e-05,
	"loss": 1.7723,
	"step": 540
	},
	{
	"epoch": 0.9417808219178082,
	"grad_norm": 13.830178260803223,
	"learning_rate": 1.504440194169425e-05,
	"loss": 1.9174,
	"step": 550
	},
	{
	"epoch": 0.958904109589041,
	"grad_norm": 11.410951614379883,
	"learning_rate": 1.4919240527869839e-05,
	"loss": 1.8714,
	"step": 560
	},
	{
	"epoch": 0.976027397260274,
	"grad_norm": 11.380172729492188,
	"learning_rate": 1.4794079114045428e-05,
	"loss": 1.8259,
	"step": 570
	},
	{
	"epoch": 0.9931506849315068,
	"grad_norm": 17.25227165222168,
	"learning_rate": 1.4668917700221017e-05,
	"loss": 1.8291,
	"step": 580
	},
	{
	"epoch": 1.0,
	"eval_classification_report": {
	"accuracy": 0.3175,
	"ar": {
	"f1-score": 0.2611111111111111,
	"precision": 0.3051948051948052,
	"recall": 0.22815533980582525,
	"support": 206.0
	},
	"cl": {
	"f1-score": 0.2444113263785395,
	"precision": 0.2152230971128609,
	"recall": 0.2827586206896552,
	"support": 290.0
	},
	"co": {
	"f1-score": 0.35724331926863573,
	"precision": 0.30238095238095236,
	"recall": 0.436426116838488,
	"support": 291.0
	},
	"es": {
	"f1-score": 0.32696390658174096,
	"precision": 0.4010416666666667,
	"recall": 0.27598566308243727,
	"support": 279.0
	},
	"macro avg": {
	"f1-score": 0.3047935709180844,
	"precision": 0.3345601053365995,
	"recall": 0.29767349498848117,
	"support": 2000.0
	},
	"mx": {
	"f1-score": 0.3294663573085847,
	"precision": 0.5071428571428571,
	"recall": 0.24398625429553264,
	"support": 291.0
	},
	"pe": {
	"f1-score": 0.325434439178515,
	"precision": 0.30116959064327486,
	"recall": 0.3539518900343643,
	"support": 291.0
	},
	"pr": {
	"f1-score": 0.6171428571428571,
	"precision": 0.7297297297297297,
	"recall": 0.5346534653465347,
	"support": 101.0
	},
	"uy": {
	"f1-score": 0.2813688212927757,
	"precision": 0.24915824915824916,
	"recall": 0.3231441048034934,
	"support": 229.0
	},
	"ve": {
	"f1-score": 0.0,
	"precision": 0.0,
	"recall": 0.0,
	"support": 22.0
	},
	"weighted avg": {
	"f1-score": 0.3185949649036821,
	"precision": 0.3455735871207114,
	"recall": 0.3175,
	"support": 2000.0
	}
	},
	"eval_f1": 0.3047935709180844,
	"eval_loss": 1.8013501167297363,
	"eval_runtime": 4.4012,
	"eval_samples_per_second": 454.419,
	"eval_steps_per_second": 56.802,
	"step": 584
	}
	],
	"logging_steps": 10,
	"max_steps": 1752,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 306938335993344.0,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}