llama2-PT / checkpoint-3700 /trainer_state.json
0x-YuAN's picture
Upload folder using huggingface_hub
01c0c4c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.045894387614045,
"global_step": 3700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.9999895164082156e-05,
"loss": 1.6349,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 4.999958065720787e-05,
"loss": 1.6199,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 4.999905648201487e-05,
"loss": 1.4834,
"step": 15
},
{
"epoch": 0.01,
"learning_rate": 4.999832264289934e-05,
"loss": 1.3882,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 4.999737914601591e-05,
"loss": 1.3679,
"step": 25
},
{
"epoch": 0.02,
"learning_rate": 4.999622599927756e-05,
"loss": 1.2396,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 4.999486321235559e-05,
"loss": 1.321,
"step": 35
},
{
"epoch": 0.02,
"learning_rate": 4.9993290796679516e-05,
"loss": 1.2874,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 4.999150876543699e-05,
"loss": 1.2607,
"step": 45
},
{
"epoch": 0.03,
"learning_rate": 4.9989517133573694e-05,
"loss": 1.2454,
"step": 50
},
{
"epoch": 0.03,
"learning_rate": 4.9987315917793174e-05,
"loss": 1.2799,
"step": 55
},
{
"epoch": 0.03,
"learning_rate": 4.998490513655676e-05,
"loss": 1.2575,
"step": 60
},
{
"epoch": 0.04,
"learning_rate": 4.998228481008337e-05,
"loss": 1.2404,
"step": 65
},
{
"epoch": 0.04,
"learning_rate": 4.997945496034934e-05,
"loss": 1.2219,
"step": 70
},
{
"epoch": 0.04,
"learning_rate": 4.9976415611088267e-05,
"loss": 1.2241,
"step": 75
},
{
"epoch": 0.04,
"learning_rate": 4.997316678779079e-05,
"loss": 1.1716,
"step": 80
},
{
"epoch": 0.05,
"learning_rate": 4.996970851770438e-05,
"loss": 1.1883,
"step": 85
},
{
"epoch": 0.05,
"learning_rate": 4.9966040829833115e-05,
"loss": 1.205,
"step": 90
},
{
"epoch": 0.05,
"learning_rate": 4.9962163754937426e-05,
"loss": 1.1246,
"step": 95
},
{
"epoch": 0.06,
"learning_rate": 4.995807732553384e-05,
"loss": 1.1636,
"step": 100
},
{
"epoch": 0.06,
"learning_rate": 4.9953781575894723e-05,
"loss": 1.158,
"step": 105
},
{
"epoch": 0.06,
"learning_rate": 4.9949276542048e-05,
"loss": 1.1477,
"step": 110
},
{
"epoch": 0.06,
"learning_rate": 4.9944562261776805e-05,
"loss": 1.1678,
"step": 115
},
{
"epoch": 0.07,
"learning_rate": 4.9939638774619216e-05,
"loss": 1.1501,
"step": 120
},
{
"epoch": 0.07,
"learning_rate": 4.99345061218679e-05,
"loss": 1.1955,
"step": 125
},
{
"epoch": 0.07,
"learning_rate": 4.9929164346569756e-05,
"loss": 1.1724,
"step": 130
},
{
"epoch": 0.07,
"learning_rate": 4.9923613493525576e-05,
"loss": 1.177,
"step": 135
},
{
"epoch": 0.08,
"learning_rate": 4.991785360928968e-05,
"loss": 1.1418,
"step": 140
},
{
"epoch": 0.08,
"learning_rate": 4.991188474216947e-05,
"loss": 1.1898,
"step": 145
},
{
"epoch": 0.08,
"learning_rate": 4.9905706942225094e-05,
"loss": 1.1479,
"step": 150
},
{
"epoch": 0.09,
"learning_rate": 4.9899320261268966e-05,
"loss": 1.1356,
"step": 155
},
{
"epoch": 0.09,
"learning_rate": 4.989272475286537e-05,
"loss": 1.1397,
"step": 160
},
{
"epoch": 0.09,
"learning_rate": 4.9885920472330004e-05,
"loss": 1.1215,
"step": 165
},
{
"epoch": 0.09,
"learning_rate": 4.9878907476729516e-05,
"loss": 1.167,
"step": 170
},
{
"epoch": 0.1,
"learning_rate": 4.9871685824881e-05,
"loss": 1.1219,
"step": 175
},
{
"epoch": 0.1,
"learning_rate": 4.9864255577351534e-05,
"loss": 1.0835,
"step": 180
},
{
"epoch": 0.1,
"learning_rate": 4.985661679645769e-05,
"loss": 1.0721,
"step": 185
},
{
"epoch": 0.11,
"learning_rate": 4.9848769546264915e-05,
"loss": 1.0692,
"step": 190
},
{
"epoch": 0.11,
"learning_rate": 4.9840713892587146e-05,
"loss": 1.0488,
"step": 195
},
{
"epoch": 0.11,
"learning_rate": 4.983244990298609e-05,
"loss": 1.1285,
"step": 200
},
{
"epoch": 0.11,
"learning_rate": 4.982397764677081e-05,
"loss": 1.0832,
"step": 205
},
{
"epoch": 0.12,
"learning_rate": 4.981529719499704e-05,
"loss": 1.0652,
"step": 210
},
{
"epoch": 0.12,
"learning_rate": 4.980640862046663e-05,
"loss": 1.1043,
"step": 215
},
{
"epoch": 0.12,
"learning_rate": 4.979731199772693e-05,
"loss": 1.112,
"step": 220
},
{
"epoch": 0.12,
"learning_rate": 4.9788007403070146e-05,
"loss": 1.1029,
"step": 225
},
{
"epoch": 0.13,
"learning_rate": 4.977849491453277e-05,
"loss": 1.0869,
"step": 230
},
{
"epoch": 0.13,
"learning_rate": 4.976877461189481e-05,
"loss": 1.0843,
"step": 235
},
{
"epoch": 0.13,
"learning_rate": 4.975884657667922e-05,
"loss": 1.0789,
"step": 240
},
{
"epoch": 0.14,
"learning_rate": 4.974871089215118e-05,
"loss": 1.0449,
"step": 245
},
{
"epoch": 0.14,
"learning_rate": 4.9738367643317405e-05,
"loss": 1.1053,
"step": 250
},
{
"epoch": 0.14,
"learning_rate": 4.9727816916925395e-05,
"loss": 1.0651,
"step": 255
},
{
"epoch": 0.14,
"learning_rate": 4.971705880146276e-05,
"loss": 1.0828,
"step": 260
},
{
"epoch": 0.15,
"learning_rate": 4.970609338715646e-05,
"loss": 1.0932,
"step": 265
},
{
"epoch": 0.15,
"learning_rate": 4.969492076597203e-05,
"loss": 1.0648,
"step": 270
},
{
"epoch": 0.15,
"learning_rate": 4.968354103161283e-05,
"loss": 1.0948,
"step": 275
},
{
"epoch": 0.15,
"learning_rate": 4.967195427951926e-05,
"loss": 1.0721,
"step": 280
},
{
"epoch": 0.16,
"learning_rate": 4.9660160606867936e-05,
"loss": 1.124,
"step": 285
},
{
"epoch": 0.16,
"learning_rate": 4.9648160112570896e-05,
"loss": 1.0963,
"step": 290
},
{
"epoch": 0.16,
"learning_rate": 4.9635952897274773e-05,
"loss": 1.1078,
"step": 295
},
{
"epoch": 0.17,
"learning_rate": 4.9623539063359925e-05,
"loss": 1.1059,
"step": 300
},
{
"epoch": 0.17,
"learning_rate": 4.961091871493962e-05,
"loss": 1.1032,
"step": 305
},
{
"epoch": 0.17,
"learning_rate": 4.959809195785912e-05,
"loss": 1.0595,
"step": 310
},
{
"epoch": 0.17,
"learning_rate": 4.958505889969481e-05,
"loss": 1.1096,
"step": 315
},
{
"epoch": 0.18,
"learning_rate": 4.957181964975329e-05,
"loss": 1.0589,
"step": 320
},
{
"epoch": 0.18,
"learning_rate": 4.955837431907049e-05,
"loss": 1.0608,
"step": 325
},
{
"epoch": 0.18,
"learning_rate": 4.954472302041069e-05,
"loss": 1.0819,
"step": 330
},
{
"epoch": 0.19,
"learning_rate": 4.9530865868265605e-05,
"loss": 1.0759,
"step": 335
},
{
"epoch": 0.19,
"learning_rate": 4.951680297885342e-05,
"loss": 1.0515,
"step": 340
},
{
"epoch": 0.19,
"learning_rate": 4.950253447011779e-05,
"loss": 1.0371,
"step": 345
},
{
"epoch": 0.19,
"learning_rate": 4.948806046172691e-05,
"loss": 1.0619,
"step": 350
},
{
"epoch": 0.2,
"learning_rate": 4.947338107507245e-05,
"loss": 1.0757,
"step": 355
},
{
"epoch": 0.2,
"learning_rate": 4.945849643326857e-05,
"loss": 1.0686,
"step": 360
},
{
"epoch": 0.2,
"learning_rate": 4.9443406661150874e-05,
"loss": 1.0809,
"step": 365
},
{
"epoch": 0.2,
"learning_rate": 4.942811188527537e-05,
"loss": 1.0704,
"step": 370
},
{
"epoch": 0.21,
"learning_rate": 4.941261223391742e-05,
"loss": 1.0655,
"step": 375
},
{
"epoch": 0.21,
"learning_rate": 4.939690783707063e-05,
"loss": 1.1182,
"step": 380
},
{
"epoch": 0.21,
"learning_rate": 4.938099882644578e-05,
"loss": 1.081,
"step": 385
},
{
"epoch": 0.22,
"learning_rate": 4.9364885335469734e-05,
"loss": 1.0792,
"step": 390
},
{
"epoch": 0.22,
"learning_rate": 4.93485674992843e-05,
"loss": 1.0244,
"step": 395
},
{
"epoch": 0.22,
"learning_rate": 4.933204545474511e-05,
"loss": 1.0531,
"step": 400
},
{
"epoch": 0.22,
"learning_rate": 4.9315319340420465e-05,
"loss": 1.048,
"step": 405
},
{
"epoch": 0.23,
"learning_rate": 4.929838929659015e-05,
"loss": 1.04,
"step": 410
},
{
"epoch": 0.23,
"learning_rate": 4.9281255465244314e-05,
"loss": 1.0686,
"step": 415
},
{
"epoch": 0.23,
"learning_rate": 4.926391799008223e-05,
"loss": 1.0933,
"step": 420
},
{
"epoch": 0.24,
"learning_rate": 4.924637701651111e-05,
"loss": 1.0313,
"step": 425
},
{
"epoch": 0.24,
"learning_rate": 4.9228632691644874e-05,
"loss": 1.0811,
"step": 430
},
{
"epoch": 0.24,
"learning_rate": 4.921068516430293e-05,
"loss": 1.0617,
"step": 435
},
{
"epoch": 0.24,
"learning_rate": 4.919253458500892e-05,
"loss": 1.0482,
"step": 440
},
{
"epoch": 0.25,
"learning_rate": 4.9174181105989445e-05,
"loss": 1.0681,
"step": 445
},
{
"epoch": 0.25,
"learning_rate": 4.9155624881172834e-05,
"loss": 1.0476,
"step": 450
},
{
"epoch": 0.25,
"learning_rate": 4.913686606618777e-05,
"loss": 1.0463,
"step": 455
},
{
"epoch": 0.25,
"learning_rate": 4.911790481836208e-05,
"loss": 1.0213,
"step": 460
},
{
"epoch": 0.26,
"learning_rate": 4.909874129672133e-05,
"loss": 0.9855,
"step": 465
},
{
"epoch": 0.26,
"learning_rate": 4.907937566198757e-05,
"loss": 1.045,
"step": 470
},
{
"epoch": 0.26,
"learning_rate": 4.9059808076577914e-05,
"loss": 1.0442,
"step": 475
},
{
"epoch": 0.27,
"learning_rate": 4.904003870460323e-05,
"loss": 1.0354,
"step": 480
},
{
"epoch": 0.27,
"learning_rate": 4.9020067711866735e-05,
"loss": 1.0331,
"step": 485
},
{
"epoch": 0.27,
"learning_rate": 4.899989526586261e-05,
"loss": 1.0585,
"step": 490
},
{
"epoch": 0.27,
"learning_rate": 4.8979521535774636e-05,
"loss": 1.0223,
"step": 495
},
{
"epoch": 0.28,
"learning_rate": 4.895894669247468e-05,
"loss": 1.0118,
"step": 500
},
{
"epoch": 0.28,
"learning_rate": 4.8938170908521356e-05,
"loss": 1.0508,
"step": 505
},
{
"epoch": 0.28,
"learning_rate": 4.8917194358158534e-05,
"loss": 1.0656,
"step": 510
},
{
"epoch": 0.28,
"learning_rate": 4.8896017217313886e-05,
"loss": 1.0655,
"step": 515
},
{
"epoch": 0.29,
"learning_rate": 4.887463966359741e-05,
"loss": 1.0833,
"step": 520
},
{
"epoch": 0.29,
"learning_rate": 4.8853061876299956e-05,
"loss": 1.068,
"step": 525
},
{
"epoch": 0.29,
"learning_rate": 4.8831284036391684e-05,
"loss": 1.0487,
"step": 530
},
{
"epoch": 0.3,
"learning_rate": 4.880930632652058e-05,
"loss": 1.059,
"step": 535
},
{
"epoch": 0.3,
"learning_rate": 4.878712893101092e-05,
"loss": 1.0408,
"step": 540
},
{
"epoch": 0.3,
"learning_rate": 4.876475203586171e-05,
"loss": 0.9976,
"step": 545
},
{
"epoch": 0.3,
"learning_rate": 4.874217582874514e-05,
"loss": 1.0358,
"step": 550
},
{
"epoch": 0.31,
"learning_rate": 4.8719400499005e-05,
"loss": 1.0073,
"step": 555
},
{
"epoch": 0.31,
"learning_rate": 4.869642623765509e-05,
"loss": 1.0742,
"step": 560
},
{
"epoch": 0.31,
"learning_rate": 4.867325323737765e-05,
"loss": 1.0463,
"step": 565
},
{
"epoch": 0.32,
"learning_rate": 4.864988169252168e-05,
"loss": 1.0242,
"step": 570
},
{
"epoch": 0.32,
"learning_rate": 4.8626311799101375e-05,
"loss": 1.0773,
"step": 575
},
{
"epoch": 0.32,
"learning_rate": 4.860254375479446e-05,
"loss": 1.0371,
"step": 580
},
{
"epoch": 0.32,
"learning_rate": 4.8578577758940504e-05,
"loss": 1.0399,
"step": 585
},
{
"epoch": 0.33,
"learning_rate": 4.855441401253928e-05,
"loss": 1.0411,
"step": 590
},
{
"epoch": 0.33,
"learning_rate": 4.8530052718249076e-05,
"loss": 1.0476,
"step": 595
},
{
"epoch": 0.33,
"learning_rate": 4.850549408038498e-05,
"loss": 1.0075,
"step": 600
},
{
"epoch": 0.33,
"learning_rate": 4.848073830491717e-05,
"loss": 1.0045,
"step": 605
},
{
"epoch": 0.34,
"learning_rate": 4.845578559946923e-05,
"loss": 1.0368,
"step": 610
},
{
"epoch": 0.34,
"learning_rate": 4.8430636173316306e-05,
"loss": 1.0156,
"step": 615
},
{
"epoch": 0.34,
"learning_rate": 4.840529023738348e-05,
"loss": 1.0334,
"step": 620
},
{
"epoch": 0.35,
"learning_rate": 4.837974800424389e-05,
"loss": 0.9994,
"step": 625
},
{
"epoch": 0.35,
"learning_rate": 4.8354009688117026e-05,
"loss": 1.0467,
"step": 630
},
{
"epoch": 0.35,
"learning_rate": 4.8328075504866874e-05,
"loss": 0.9968,
"step": 635
},
{
"epoch": 0.35,
"learning_rate": 4.8301945672000164e-05,
"loss": 1.041,
"step": 640
},
{
"epoch": 0.36,
"learning_rate": 4.8275620408664487e-05,
"loss": 0.9811,
"step": 645
},
{
"epoch": 0.36,
"learning_rate": 4.8249099935646494e-05,
"loss": 1.0235,
"step": 650
},
{
"epoch": 0.36,
"learning_rate": 4.822238447537003e-05,
"loss": 1.0152,
"step": 655
},
{
"epoch": 0.36,
"learning_rate": 4.819547425189429e-05,
"loss": 1.038,
"step": 660
},
{
"epoch": 0.37,
"learning_rate": 4.81683694909119e-05,
"loss": 1.0387,
"step": 665
},
{
"epoch": 0.37,
"learning_rate": 4.814107041974707e-05,
"loss": 1.019,
"step": 670
},
{
"epoch": 0.37,
"learning_rate": 4.811357726735366e-05,
"loss": 1.0403,
"step": 675
},
{
"epoch": 0.38,
"learning_rate": 4.808589026431324e-05,
"loss": 1.0828,
"step": 680
},
{
"epoch": 0.38,
"learning_rate": 4.805800964283322e-05,
"loss": 1.0128,
"step": 685
},
{
"epoch": 0.38,
"learning_rate": 4.802993563674483e-05,
"loss": 1.0502,
"step": 690
},
{
"epoch": 0.38,
"learning_rate": 4.80016684815012e-05,
"loss": 1.0169,
"step": 695
},
{
"epoch": 0.39,
"learning_rate": 4.7973208414175406e-05,
"loss": 1.0316,
"step": 700
},
{
"epoch": 0.39,
"learning_rate": 4.794455567345842e-05,
"loss": 1.0263,
"step": 705
},
{
"epoch": 0.39,
"learning_rate": 4.791571049965714e-05,
"loss": 1.0801,
"step": 710
},
{
"epoch": 0.4,
"learning_rate": 4.7886673134692404e-05,
"loss": 1.0575,
"step": 715
},
{
"epoch": 0.4,
"learning_rate": 4.7857443822096905e-05,
"loss": 0.9767,
"step": 720
},
{
"epoch": 0.4,
"learning_rate": 4.782802280701319e-05,
"loss": 0.9666,
"step": 725
},
{
"epoch": 0.4,
"learning_rate": 4.77984103361916e-05,
"loss": 0.9983,
"step": 730
},
{
"epoch": 0.41,
"learning_rate": 4.776860665798816e-05,
"loss": 1.0219,
"step": 735
},
{
"epoch": 0.41,
"learning_rate": 4.773861202236257e-05,
"loss": 0.9963,
"step": 740
},
{
"epoch": 0.41,
"learning_rate": 4.770842668087602e-05,
"loss": 1.0219,
"step": 745
},
{
"epoch": 0.41,
"learning_rate": 4.767805088668916e-05,
"loss": 1.0029,
"step": 750
},
{
"epoch": 0.42,
"learning_rate": 4.7647484894559936e-05,
"loss": 1.0077,
"step": 755
},
{
"epoch": 0.42,
"learning_rate": 4.7616728960841444e-05,
"loss": 0.9908,
"step": 760
},
{
"epoch": 0.42,
"learning_rate": 4.758578334347981e-05,
"loss": 1.0092,
"step": 765
},
{
"epoch": 0.43,
"learning_rate": 4.7554648302012015e-05,
"loss": 0.9874,
"step": 770
},
{
"epoch": 0.43,
"learning_rate": 4.7523324097563706e-05,
"loss": 1.0185,
"step": 775
},
{
"epoch": 0.43,
"learning_rate": 4.749181099284703e-05,
"loss": 1.0533,
"step": 780
},
{
"epoch": 0.43,
"learning_rate": 4.746010925215839e-05,
"loss": 1.0083,
"step": 785
},
{
"epoch": 0.44,
"learning_rate": 4.74282191413763e-05,
"loss": 1.0191,
"step": 790
},
{
"epoch": 0.44,
"learning_rate": 4.7396140927959045e-05,
"loss": 0.9786,
"step": 795
},
{
"epoch": 0.44,
"learning_rate": 4.7363874880942574e-05,
"loss": 1.0197,
"step": 800
},
{
"epoch": 0.45,
"learning_rate": 4.733142127093813e-05,
"loss": 1.0471,
"step": 805
},
{
"epoch": 0.45,
"learning_rate": 4.7298780370130014e-05,
"loss": 1.0134,
"step": 810
},
{
"epoch": 0.45,
"learning_rate": 4.726595245227336e-05,
"loss": 0.9801,
"step": 815
},
{
"epoch": 0.45,
"learning_rate": 4.723293779269173e-05,
"loss": 0.9944,
"step": 820
},
{
"epoch": 0.46,
"learning_rate": 4.7199736668274924e-05,
"loss": 1.0358,
"step": 825
},
{
"epoch": 0.46,
"learning_rate": 4.716634935747655e-05,
"loss": 0.9907,
"step": 830
},
{
"epoch": 0.46,
"learning_rate": 4.713277614031177e-05,
"loss": 1.0698,
"step": 835
},
{
"epoch": 0.46,
"learning_rate": 4.70990172983549e-05,
"loss": 1.0466,
"step": 840
},
{
"epoch": 0.47,
"learning_rate": 4.706507311473707e-05,
"loss": 0.9489,
"step": 845
},
{
"epoch": 0.47,
"learning_rate": 4.703094387414385e-05,
"loss": 0.9936,
"step": 850
},
{
"epoch": 0.47,
"learning_rate": 4.699662986281288e-05,
"loss": 0.9672,
"step": 855
},
{
"epoch": 0.48,
"learning_rate": 4.696213136853141e-05,
"loss": 1.0164,
"step": 860
},
{
"epoch": 0.48,
"learning_rate": 4.6927448680633954e-05,
"loss": 0.9817,
"step": 865
},
{
"epoch": 0.48,
"learning_rate": 4.689258208999983e-05,
"loss": 1.028,
"step": 870
},
{
"epoch": 0.48,
"learning_rate": 4.6857531889050716e-05,
"loss": 1.0239,
"step": 875
},
{
"epoch": 0.49,
"learning_rate": 4.682229837174821e-05,
"loss": 1.0378,
"step": 880
},
{
"epoch": 0.49,
"learning_rate": 4.678688183359135e-05,
"loss": 0.9908,
"step": 885
},
{
"epoch": 0.49,
"learning_rate": 4.675128257161418e-05,
"loss": 0.9384,
"step": 890
},
{
"epoch": 0.49,
"learning_rate": 4.671550088438319e-05,
"loss": 0.9481,
"step": 895
},
{
"epoch": 0.5,
"learning_rate": 4.6679537071994874e-05,
"loss": 0.9541,
"step": 900
},
{
"epoch": 0.5,
"learning_rate": 4.6643391436073165e-05,
"loss": 0.9787,
"step": 905
},
{
"epoch": 0.5,
"learning_rate": 4.660706427976693e-05,
"loss": 0.9897,
"step": 910
},
{
"epoch": 0.51,
"learning_rate": 4.657055590774745e-05,
"loss": 0.9947,
"step": 915
},
{
"epoch": 0.51,
"learning_rate": 4.6533866626205805e-05,
"loss": 0.9614,
"step": 920
},
{
"epoch": 0.51,
"learning_rate": 4.649699674285036e-05,
"loss": 1.0323,
"step": 925
},
{
"epoch": 0.51,
"learning_rate": 4.645994656690417e-05,
"loss": 1.0345,
"step": 930
},
{
"epoch": 0.52,
"learning_rate": 4.642271640910235e-05,
"loss": 1.0432,
"step": 935
},
{
"epoch": 0.52,
"learning_rate": 4.638530658168954e-05,
"loss": 1.0102,
"step": 940
},
{
"epoch": 0.52,
"learning_rate": 4.6347717398417203e-05,
"loss": 1.0361,
"step": 945
},
{
"epoch": 0.53,
"learning_rate": 4.6309949174541096e-05,
"loss": 0.9818,
"step": 950
},
{
"epoch": 0.53,
"learning_rate": 4.627200222681851e-05,
"loss": 1.0293,
"step": 955
},
{
"epoch": 0.53,
"learning_rate": 4.6233876873505694e-05,
"loss": 1.0125,
"step": 960
},
{
"epoch": 0.53,
"learning_rate": 4.619557343435516e-05,
"loss": 1.0039,
"step": 965
},
{
"epoch": 0.54,
"learning_rate": 4.615709223061302e-05,
"loss": 1.0146,
"step": 970
},
{
"epoch": 0.54,
"learning_rate": 4.611843358501624e-05,
"loss": 1.0352,
"step": 975
},
{
"epoch": 0.54,
"learning_rate": 4.6079597821789993e-05,
"loss": 1.0405,
"step": 980
},
{
"epoch": 0.54,
"learning_rate": 4.604058526664491e-05,
"loss": 0.9834,
"step": 985
},
{
"epoch": 0.55,
"learning_rate": 4.600139624677436e-05,
"loss": 1.0252,
"step": 990
},
{
"epoch": 0.55,
"learning_rate": 4.596203109085168e-05,
"loss": 1.0516,
"step": 995
},
{
"epoch": 0.55,
"learning_rate": 4.5922490129027464e-05,
"loss": 0.9762,
"step": 1000
},
{
"epoch": 0.56,
"learning_rate": 4.588277369292674e-05,
"loss": 0.9801,
"step": 1005
},
{
"epoch": 0.56,
"learning_rate": 4.5842882115646234e-05,
"loss": 1.0127,
"step": 1010
},
{
"epoch": 0.56,
"learning_rate": 4.580281573175157e-05,
"loss": 1.0415,
"step": 1015
},
{
"epoch": 0.56,
"learning_rate": 4.576257487727442e-05,
"loss": 0.9974,
"step": 1020
},
{
"epoch": 0.57,
"learning_rate": 4.572215988970974e-05,
"loss": 1.0097,
"step": 1025
},
{
"epoch": 0.57,
"learning_rate": 4.568157110801293e-05,
"loss": 1.0004,
"step": 1030
},
{
"epoch": 0.57,
"learning_rate": 4.5640808872596944e-05,
"loss": 0.9932,
"step": 1035
},
{
"epoch": 0.58,
"learning_rate": 4.5599873525329505e-05,
"loss": 0.9437,
"step": 1040
},
{
"epoch": 0.58,
"learning_rate": 4.555876540953019e-05,
"loss": 1.0004,
"step": 1045
},
{
"epoch": 0.58,
"learning_rate": 4.551748486996755e-05,
"loss": 0.9743,
"step": 1050
},
{
"epoch": 0.58,
"learning_rate": 4.547603225285626e-05,
"loss": 1.0303,
"step": 1055
},
{
"epoch": 0.59,
"learning_rate": 4.543440790585417e-05,
"loss": 1.0225,
"step": 1060
},
{
"epoch": 0.59,
"learning_rate": 4.539261217805939e-05,
"loss": 1.0102,
"step": 1065
},
{
"epoch": 0.59,
"learning_rate": 4.535064542000743e-05,
"loss": 1.0087,
"step": 1070
},
{
"epoch": 0.59,
"learning_rate": 4.5308507983668165e-05,
"loss": 0.952,
"step": 1075
},
{
"epoch": 0.6,
"learning_rate": 4.526620022244293e-05,
"loss": 0.9767,
"step": 1080
},
{
"epoch": 0.6,
"learning_rate": 4.522372249116158e-05,
"loss": 1.0049,
"step": 1085
},
{
"epoch": 0.6,
"learning_rate": 4.5181075146079456e-05,
"loss": 1.0397,
"step": 1090
},
{
"epoch": 0.61,
"learning_rate": 4.5138258544874455e-05,
"loss": 1.0167,
"step": 1095
},
{
"epoch": 0.61,
"learning_rate": 4.5095273046643985e-05,
"loss": 0.9966,
"step": 1100
},
{
"epoch": 0.61,
"learning_rate": 4.5052119011901986e-05,
"loss": 0.983,
"step": 1105
},
{
"epoch": 0.61,
"learning_rate": 4.500879680257587e-05,
"loss": 1.0301,
"step": 1110
},
{
"epoch": 0.62,
"learning_rate": 4.4965306782003535e-05,
"loss": 1.0371,
"step": 1115
},
{
"epoch": 0.62,
"learning_rate": 4.492164931493028e-05,
"loss": 1.0083,
"step": 1120
},
{
"epoch": 0.62,
"learning_rate": 4.487782476750575e-05,
"loss": 0.9728,
"step": 1125
},
{
"epoch": 0.62,
"learning_rate": 4.4833833507280884e-05,
"loss": 0.9892,
"step": 1130
},
{
"epoch": 0.63,
"learning_rate": 4.4789675903204805e-05,
"loss": 0.9847,
"step": 1135
},
{
"epoch": 0.63,
"learning_rate": 4.474535232562176e-05,
"loss": 1.0545,
"step": 1140
},
{
"epoch": 0.63,
"learning_rate": 4.470086314626797e-05,
"loss": 1.0204,
"step": 1145
},
{
"epoch": 0.64,
"learning_rate": 4.465620873826856e-05,
"loss": 0.9648,
"step": 1150
},
{
"epoch": 0.64,
"learning_rate": 4.46113894761344e-05,
"loss": 0.9545,
"step": 1155
},
{
"epoch": 0.64,
"learning_rate": 4.456640573575896e-05,
"loss": 1.0256,
"step": 1160
},
{
"epoch": 0.64,
"learning_rate": 4.4521257894415183e-05,
"loss": 0.99,
"step": 1165
},
{
"epoch": 0.65,
"learning_rate": 4.44759463307523e-05,
"loss": 0.9906,
"step": 1170
},
{
"epoch": 0.65,
"learning_rate": 4.443047142479266e-05,
"loss": 0.9876,
"step": 1175
},
{
"epoch": 0.65,
"learning_rate": 4.4384833557928553e-05,
"loss": 1.0495,
"step": 1180
},
{
"epoch": 0.66,
"learning_rate": 4.4339033112918966e-05,
"loss": 0.9869,
"step": 1185
},
{
"epoch": 0.66,
"learning_rate": 4.4293070473886456e-05,
"loss": 1.0299,
"step": 1190
},
{
"epoch": 0.66,
"learning_rate": 4.424694602631385e-05,
"loss": 1.0073,
"step": 1195
},
{
"epoch": 0.66,
"learning_rate": 4.420066015704105e-05,
"loss": 1.0023,
"step": 1200
},
{
"epoch": 0.67,
"learning_rate": 4.41542132542618e-05,
"loss": 0.9462,
"step": 1205
},
{
"epoch": 0.67,
"learning_rate": 4.410760570752037e-05,
"loss": 1.0116,
"step": 1210
},
{
"epoch": 0.67,
"learning_rate": 4.4060837907708375e-05,
"loss": 0.9652,
"step": 1215
},
{
"epoch": 0.67,
"learning_rate": 4.401391024706142e-05,
"loss": 1.0411,
"step": 1220
},
{
"epoch": 0.68,
"learning_rate": 4.396682311915586e-05,
"loss": 0.9691,
"step": 1225
},
{
"epoch": 0.68,
"learning_rate": 4.3919576918905495e-05,
"loss": 0.961,
"step": 1230
},
{
"epoch": 0.68,
"learning_rate": 4.387217204255819e-05,
"loss": 0.9602,
"step": 1235
},
{
"epoch": 0.69,
"learning_rate": 4.3824608887692666e-05,
"loss": 1.0197,
"step": 1240
},
{
"epoch": 0.69,
"learning_rate": 4.377688785321507e-05,
"loss": 0.9982,
"step": 1245
},
{
"epoch": 0.69,
"learning_rate": 4.372900933935569e-05,
"loss": 0.986,
"step": 1250
},
{
"epoch": 0.69,
"learning_rate": 4.368097374766556e-05,
"loss": 0.9744,
"step": 1255
},
{
"epoch": 0.7,
"learning_rate": 4.3632781481013105e-05,
"loss": 1.0078,
"step": 1260
},
{
"epoch": 0.7,
"learning_rate": 4.358443294358077e-05,
"loss": 0.9865,
"step": 1265
},
{
"epoch": 0.7,
"learning_rate": 4.35359285408616e-05,
"loss": 0.976,
"step": 1270
},
{
"epoch": 0.71,
"learning_rate": 4.348726867965591e-05,
"loss": 1.0141,
"step": 1275
},
{
"epoch": 0.71,
"learning_rate": 4.343845376806777e-05,
"loss": 0.952,
"step": 1280
},
{
"epoch": 0.71,
"learning_rate": 4.338948421550169e-05,
"loss": 1.0239,
"step": 1285
},
{
"epoch": 0.71,
"learning_rate": 4.334036043265909e-05,
"loss": 0.994,
"step": 1290
},
{
"epoch": 0.72,
"learning_rate": 4.329108283153492e-05,
"loss": 1.0505,
"step": 1295
},
{
"epoch": 0.72,
"learning_rate": 4.3241651825414195e-05,
"loss": 0.9919,
"step": 1300
},
{
"epoch": 0.72,
"learning_rate": 4.31920678288685e-05,
"loss": 1.0169,
"step": 1305
},
{
"epoch": 0.72,
"learning_rate": 4.3142331257752546e-05,
"loss": 0.9938,
"step": 1310
},
{
"epoch": 0.73,
"learning_rate": 4.309244252920064e-05,
"loss": 0.92,
"step": 1315
},
{
"epoch": 0.73,
"learning_rate": 4.304240206162326e-05,
"loss": 0.9537,
"step": 1320
},
{
"epoch": 0.73,
"learning_rate": 4.299221027470345e-05,
"loss": 0.996,
"step": 1325
},
{
"epoch": 0.74,
"learning_rate": 4.294186758939339e-05,
"loss": 0.9652,
"step": 1330
},
{
"epoch": 0.74,
"learning_rate": 4.2891374427910795e-05,
"loss": 1.0138,
"step": 1335
},
{
"epoch": 0.74,
"learning_rate": 4.284073121373544e-05,
"loss": 0.9267,
"step": 1340
},
{
"epoch": 0.74,
"learning_rate": 4.278993837160553e-05,
"loss": 0.9899,
"step": 1345
},
{
"epoch": 0.75,
"learning_rate": 4.273899632751422e-05,
"loss": 0.963,
"step": 1350
},
{
"epoch": 0.75,
"learning_rate": 4.2687905508705974e-05,
"loss": 1.0066,
"step": 1355
},
{
"epoch": 0.75,
"learning_rate": 4.263666634367303e-05,
"loss": 0.9939,
"step": 1360
},
{
"epoch": 0.75,
"learning_rate": 4.258527926215178e-05,
"loss": 0.9667,
"step": 1365
},
{
"epoch": 0.76,
"learning_rate": 4.253374469511917e-05,
"loss": 0.9999,
"step": 1370
},
{
"epoch": 0.76,
"learning_rate": 4.248206307478909e-05,
"loss": 0.9738,
"step": 1375
},
{
"epoch": 0.76,
"learning_rate": 4.243023483460875e-05,
"loss": 0.9829,
"step": 1380
},
{
"epoch": 0.77,
"learning_rate": 4.237826040925503e-05,
"loss": 1.0107,
"step": 1385
},
{
"epoch": 0.77,
"learning_rate": 4.232614023463088e-05,
"loss": 0.9992,
"step": 1390
},
{
"epoch": 0.77,
"learning_rate": 4.227387474786159e-05,
"loss": 0.9566,
"step": 1395
},
{
"epoch": 0.77,
"learning_rate": 4.222146438729119e-05,
"loss": 0.9722,
"step": 1400
},
{
"epoch": 0.78,
"learning_rate": 4.216890959247873e-05,
"loss": 0.988,
"step": 1405
},
{
"epoch": 0.78,
"learning_rate": 4.211621080419463e-05,
"loss": 1.0103,
"step": 1410
},
{
"epoch": 0.78,
"learning_rate": 4.206336846441695e-05,
"loss": 0.9805,
"step": 1415
},
{
"epoch": 0.79,
"learning_rate": 4.201038301632772e-05,
"loss": 1.0177,
"step": 1420
},
{
"epoch": 0.79,
"learning_rate": 4.195725490430917e-05,
"loss": 0.9938,
"step": 1425
},
{
"epoch": 0.79,
"learning_rate": 4.190398457394007e-05,
"loss": 1.0276,
"step": 1430
},
{
"epoch": 0.79,
"learning_rate": 4.1850572471991924e-05,
"loss": 0.9909,
"step": 1435
},
{
"epoch": 0.8,
"learning_rate": 4.1797019046425264e-05,
"loss": 0.9794,
"step": 1440
},
{
"epoch": 0.8,
"learning_rate": 4.1743324746385914e-05,
"loss": 1.0073,
"step": 1445
},
{
"epoch": 0.8,
"learning_rate": 4.1689490022201154e-05,
"loss": 1.0106,
"step": 1450
},
{
"epoch": 0.8,
"learning_rate": 4.163551532537601e-05,
"loss": 1.0241,
"step": 1455
},
{
"epoch": 0.81,
"learning_rate": 4.1581401108589425e-05,
"loss": 1.0178,
"step": 1460
},
{
"epoch": 0.81,
"learning_rate": 4.1527147825690495e-05,
"loss": 0.9426,
"step": 1465
},
{
"epoch": 0.81,
"learning_rate": 4.1472755931694626e-05,
"loss": 0.9642,
"step": 1470
},
{
"epoch": 0.82,
"learning_rate": 4.141822588277976e-05,
"loss": 0.9678,
"step": 1475
},
{
"epoch": 0.82,
"learning_rate": 4.136355813628251e-05,
"loss": 0.9654,
"step": 1480
},
{
"epoch": 0.82,
"learning_rate": 4.130875315069435e-05,
"loss": 0.9748,
"step": 1485
},
{
"epoch": 0.82,
"learning_rate": 4.125381138565775e-05,
"loss": 0.9321,
"step": 1490
},
{
"epoch": 0.83,
"learning_rate": 4.1198733301962346e-05,
"loss": 0.9885,
"step": 1495
},
{
"epoch": 0.83,
"learning_rate": 4.114351936154105e-05,
"loss": 0.9385,
"step": 1500
},
{
"epoch": 0.83,
"learning_rate": 4.108817002746619e-05,
"loss": 0.9362,
"step": 1505
},
{
"epoch": 0.83,
"learning_rate": 4.1032685763945625e-05,
"loss": 0.9764,
"step": 1510
},
{
"epoch": 0.84,
"learning_rate": 4.097706703631886e-05,
"loss": 0.961,
"step": 1515
},
{
"epoch": 0.84,
"learning_rate": 4.092131431105312e-05,
"loss": 0.9818,
"step": 1520
},
{
"epoch": 0.84,
"learning_rate": 4.086542805573945e-05,
"loss": 0.9855,
"step": 1525
},
{
"epoch": 0.85,
"learning_rate": 4.080940873908881e-05,
"loss": 0.9618,
"step": 1530
},
{
"epoch": 0.85,
"learning_rate": 4.07532568309281e-05,
"loss": 1.0102,
"step": 1535
},
{
"epoch": 0.85,
"learning_rate": 4.069697280219628e-05,
"loss": 0.9773,
"step": 1540
},
{
"epoch": 0.85,
"learning_rate": 4.0640557124940376e-05,
"loss": 0.9687,
"step": 1545
},
{
"epoch": 0.86,
"learning_rate": 4.058401027231152e-05,
"loss": 0.9818,
"step": 1550
},
{
"epoch": 0.86,
"learning_rate": 4.052733271856103e-05,
"loss": 0.9911,
"step": 1555
},
{
"epoch": 0.86,
"learning_rate": 4.0470524939036355e-05,
"loss": 0.9694,
"step": 1560
},
{
"epoch": 0.87,
"learning_rate": 4.0413587410177155e-05,
"loss": 0.9374,
"step": 1565
},
{
"epoch": 0.87,
"learning_rate": 4.035652060951128e-05,
"loss": 0.956,
"step": 1570
},
{
"epoch": 0.87,
"learning_rate": 4.0299325015650774e-05,
"loss": 0.9491,
"step": 1575
},
{
"epoch": 0.87,
"learning_rate": 4.024200110828783e-05,
"loss": 0.9569,
"step": 1580
},
{
"epoch": 0.88,
"learning_rate": 4.018454936819082e-05,
"loss": 0.9627,
"step": 1585
},
{
"epoch": 0.88,
"learning_rate": 4.012697027720018e-05,
"loss": 0.9922,
"step": 1590
},
{
"epoch": 0.88,
"learning_rate": 4.0069264318224506e-05,
"loss": 0.9703,
"step": 1595
},
{
"epoch": 0.88,
"learning_rate": 4.0011431975236337e-05,
"loss": 0.9436,
"step": 1600
},
{
"epoch": 0.89,
"learning_rate": 3.995347373326822e-05,
"loss": 0.961,
"step": 1605
},
{
"epoch": 0.89,
"learning_rate": 3.989539007840861e-05,
"loss": 0.9247,
"step": 1610
},
{
"epoch": 0.89,
"learning_rate": 3.983718149779775e-05,
"loss": 0.9537,
"step": 1615
},
{
"epoch": 0.9,
"learning_rate": 3.9778848479623656e-05,
"loss": 0.9415,
"step": 1620
},
{
"epoch": 0.9,
"learning_rate": 3.972039151311795e-05,
"loss": 0.9954,
"step": 1625
},
{
"epoch": 0.9,
"learning_rate": 3.966181108855183e-05,
"loss": 0.9451,
"step": 1630
},
{
"epoch": 0.9,
"learning_rate": 3.960310769723189e-05,
"loss": 0.9977,
"step": 1635
},
{
"epoch": 0.91,
"learning_rate": 3.9544281831496034e-05,
"loss": 0.9806,
"step": 1640
},
{
"epoch": 0.91,
"learning_rate": 3.9485333984709374e-05,
"loss": 0.9851,
"step": 1645
},
{
"epoch": 0.91,
"learning_rate": 3.942626465126001e-05,
"loss": 1.0089,
"step": 1650
},
{
"epoch": 0.92,
"learning_rate": 3.9367074326555e-05,
"loss": 0.9562,
"step": 1655
},
{
"epoch": 0.92,
"learning_rate": 3.930776350701609e-05,
"loss": 0.9892,
"step": 1660
},
{
"epoch": 0.92,
"learning_rate": 3.92483326900756e-05,
"loss": 0.9875,
"step": 1665
},
{
"epoch": 0.92,
"learning_rate": 3.91887823741723e-05,
"loss": 1.0012,
"step": 1670
},
{
"epoch": 0.93,
"learning_rate": 3.9129113058747136e-05,
"loss": 0.97,
"step": 1675
},
{
"epoch": 0.93,
"learning_rate": 3.9069325244239095e-05,
"loss": 0.9793,
"step": 1680
},
{
"epoch": 0.93,
"learning_rate": 3.900941943208103e-05,
"loss": 0.9864,
"step": 1685
},
{
"epoch": 0.93,
"learning_rate": 3.894939612469539e-05,
"loss": 0.9596,
"step": 1690
},
{
"epoch": 0.94,
"learning_rate": 3.888925582549006e-05,
"loss": 0.9679,
"step": 1695
},
{
"epoch": 0.94,
"learning_rate": 3.882899903885412e-05,
"loss": 0.9395,
"step": 1700
},
{
"epoch": 0.94,
"learning_rate": 3.876862627015361e-05,
"loss": 0.9774,
"step": 1705
},
{
"epoch": 0.95,
"learning_rate": 3.87081380257273e-05,
"loss": 0.9686,
"step": 1710
},
{
"epoch": 0.95,
"learning_rate": 3.864753481288244e-05,
"loss": 0.953,
"step": 1715
},
{
"epoch": 0.95,
"learning_rate": 3.8586817139890515e-05,
"loss": 0.9719,
"step": 1720
},
{
"epoch": 0.95,
"learning_rate": 3.852598551598294e-05,
"loss": 0.9748,
"step": 1725
},
{
"epoch": 0.96,
"learning_rate": 3.8465040451346874e-05,
"loss": 0.9741,
"step": 1730
},
{
"epoch": 0.96,
"learning_rate": 3.8403982457120836e-05,
"loss": 0.9747,
"step": 1735
},
{
"epoch": 0.96,
"learning_rate": 3.834281204539051e-05,
"loss": 0.9791,
"step": 1740
},
{
"epoch": 0.96,
"learning_rate": 3.828152972918438e-05,
"loss": 0.9704,
"step": 1745
},
{
"epoch": 0.97,
"learning_rate": 3.82201360224695e-05,
"loss": 0.9556,
"step": 1750
},
{
"epoch": 0.97,
"learning_rate": 3.815863144014711e-05,
"loss": 0.9552,
"step": 1755
},
{
"epoch": 0.97,
"learning_rate": 3.809701649804834e-05,
"loss": 0.9594,
"step": 1760
},
{
"epoch": 0.98,
"learning_rate": 3.8035291712929926e-05,
"loss": 0.959,
"step": 1765
},
{
"epoch": 0.98,
"learning_rate": 3.797345760246982e-05,
"loss": 1.0254,
"step": 1770
},
{
"epoch": 0.98,
"learning_rate": 3.791151468526289e-05,
"loss": 0.9925,
"step": 1775
},
{
"epoch": 0.98,
"learning_rate": 3.784946348081654e-05,
"loss": 0.9516,
"step": 1780
},
{
"epoch": 0.99,
"learning_rate": 3.7787304509546365e-05,
"loss": 0.954,
"step": 1785
},
{
"epoch": 0.99,
"learning_rate": 3.7725038292771774e-05,
"loss": 0.9465,
"step": 1790
},
{
"epoch": 0.99,
"learning_rate": 3.766266535271167e-05,
"loss": 0.9792,
"step": 1795
},
{
"epoch": 1.0,
"learning_rate": 3.760018621248e-05,
"loss": 0.964,
"step": 1800
},
{
"epoch": 1.0,
"learning_rate": 3.75376013960814e-05,
"loss": 0.9419,
"step": 1805
},
{
"epoch": 1.0,
"learning_rate": 3.747491142840681e-05,
"loss": 0.9818,
"step": 1810
},
{
"epoch": 1.0,
"learning_rate": 3.741211683522904e-05,
"loss": 0.9153,
"step": 1815
},
{
"epoch": 1.01,
"learning_rate": 3.734921814319841e-05,
"loss": 0.955,
"step": 1820
},
{
"epoch": 1.01,
"learning_rate": 3.728621587983828e-05,
"loss": 0.9467,
"step": 1825
},
{
"epoch": 1.01,
"learning_rate": 3.722311057354067e-05,
"loss": 0.9816,
"step": 1830
},
{
"epoch": 1.01,
"learning_rate": 3.715990275356178e-05,
"loss": 0.9727,
"step": 1835
},
{
"epoch": 1.02,
"learning_rate": 3.7096592950017617e-05,
"loss": 0.9822,
"step": 1840
},
{
"epoch": 1.02,
"learning_rate": 3.703318169387947e-05,
"loss": 0.942,
"step": 1845
},
{
"epoch": 1.02,
"learning_rate": 3.696966951696952e-05,
"loss": 0.9306,
"step": 1850
},
{
"epoch": 1.03,
"learning_rate": 3.690605695195637e-05,
"loss": 0.9654,
"step": 1855
},
{
"epoch": 1.03,
"learning_rate": 3.684234453235054e-05,
"loss": 0.9592,
"step": 1860
},
{
"epoch": 1.03,
"learning_rate": 3.677853279250003e-05,
"loss": 0.988,
"step": 1865
},
{
"epoch": 1.03,
"learning_rate": 3.671462226758583e-05,
"loss": 0.9462,
"step": 1870
},
{
"epoch": 1.04,
"learning_rate": 3.665061349361742e-05,
"loss": 0.9685,
"step": 1875
},
{
"epoch": 1.04,
"learning_rate": 3.658650700742828e-05,
"loss": 0.9772,
"step": 1880
},
{
"epoch": 1.04,
"learning_rate": 3.6522303346671404e-05,
"loss": 0.9482,
"step": 1885
},
{
"epoch": 1.05,
"learning_rate": 3.645800304981477e-05,
"loss": 1.0069,
"step": 1890
},
{
"epoch": 1.05,
"learning_rate": 3.639360665613683e-05,
"loss": 0.9003,
"step": 1895
},
{
"epoch": 1.05,
"learning_rate": 3.632911470572197e-05,
"loss": 0.9279,
"step": 1900
},
{
"epoch": 1.05,
"learning_rate": 3.626452773945603e-05,
"loss": 0.9237,
"step": 1905
},
{
"epoch": 1.06,
"learning_rate": 3.619984629902172e-05,
"loss": 1.0086,
"step": 1910
},
{
"epoch": 1.06,
"learning_rate": 3.613507092689409e-05,
"loss": 0.9625,
"step": 1915
},
{
"epoch": 1.06,
"learning_rate": 3.607020216633599e-05,
"loss": 0.9297,
"step": 1920
},
{
"epoch": 1.06,
"learning_rate": 3.60052405613935e-05,
"loss": 0.9718,
"step": 1925
},
{
"epoch": 1.07,
"learning_rate": 3.594018665689139e-05,
"loss": 0.9512,
"step": 1930
},
{
"epoch": 1.07,
"learning_rate": 3.5875040998428513e-05,
"loss": 0.9923,
"step": 1935
},
{
"epoch": 1.07,
"learning_rate": 3.5809804132373253e-05,
"loss": 0.9518,
"step": 1940
},
{
"epoch": 1.08,
"learning_rate": 3.574447660585897e-05,
"loss": 0.9324,
"step": 1945
},
{
"epoch": 1.08,
"learning_rate": 3.5679058966779344e-05,
"loss": 0.9724,
"step": 1950
},
{
"epoch": 1.08,
"learning_rate": 3.561355176378384e-05,
"loss": 0.978,
"step": 1955
},
{
"epoch": 1.08,
"learning_rate": 3.554795554627307e-05,
"loss": 0.9893,
"step": 1960
},
{
"epoch": 1.09,
"learning_rate": 3.548227086439422e-05,
"loss": 0.967,
"step": 1965
},
{
"epoch": 1.09,
"learning_rate": 3.541649826903639e-05,
"loss": 0.9529,
"step": 1970
},
{
"epoch": 1.09,
"learning_rate": 3.535063831182602e-05,
"loss": 0.9506,
"step": 1975
},
{
"epoch": 1.09,
"learning_rate": 3.528469154512224e-05,
"loss": 0.9525,
"step": 1980
},
{
"epoch": 1.1,
"learning_rate": 3.521865852201223e-05,
"loss": 0.9258,
"step": 1985
},
{
"epoch": 1.1,
"learning_rate": 3.5152539796306596e-05,
"loss": 0.9417,
"step": 1990
},
{
"epoch": 1.1,
"learning_rate": 3.508633592253472e-05,
"loss": 0.9465,
"step": 1995
},
{
"epoch": 1.11,
"learning_rate": 3.502004745594011e-05,
"loss": 0.9494,
"step": 2000
},
{
"epoch": 1.11,
"learning_rate": 3.4953674952475755e-05,
"loss": 0.9709,
"step": 2005
},
{
"epoch": 1.11,
"learning_rate": 3.488721896879943e-05,
"loss": 0.9581,
"step": 2010
},
{
"epoch": 1.11,
"learning_rate": 3.4820680062269074e-05,
"loss": 0.974,
"step": 2015
},
{
"epoch": 1.12,
"learning_rate": 3.4754058790938046e-05,
"loss": 0.9768,
"step": 2020
},
{
"epoch": 1.12,
"learning_rate": 3.468735571355055e-05,
"loss": 0.982,
"step": 2025
},
{
"epoch": 1.12,
"learning_rate": 3.4620571389536825e-05,
"loss": 0.984,
"step": 2030
},
{
"epoch": 1.13,
"learning_rate": 3.455370637900856e-05,
"loss": 0.9604,
"step": 2035
},
{
"epoch": 1.13,
"learning_rate": 3.448676124275414e-05,
"loss": 0.897,
"step": 2040
},
{
"epoch": 1.13,
"learning_rate": 3.4419736542233925e-05,
"loss": 0.9968,
"step": 2045
},
{
"epoch": 1.13,
"learning_rate": 3.4352632839575616e-05,
"loss": 0.9479,
"step": 2050
},
{
"epoch": 1.14,
"learning_rate": 3.428545069756946e-05,
"loss": 0.9724,
"step": 2055
},
{
"epoch": 1.14,
"learning_rate": 3.42181906796636e-05,
"loss": 0.9493,
"step": 2060
},
{
"epoch": 1.14,
"learning_rate": 3.415085334995927e-05,
"loss": 0.9348,
"step": 2065
},
{
"epoch": 1.14,
"learning_rate": 3.408343927320613e-05,
"loss": 0.9702,
"step": 2070
},
{
"epoch": 1.15,
"learning_rate": 3.401594901479753e-05,
"loss": 0.9089,
"step": 2075
},
{
"epoch": 1.15,
"learning_rate": 3.394838314076572e-05,
"loss": 0.9606,
"step": 2080
},
{
"epoch": 1.15,
"learning_rate": 3.3880742217777115e-05,
"loss": 0.9743,
"step": 2085
},
{
"epoch": 1.16,
"learning_rate": 3.381302681312759e-05,
"loss": 0.9469,
"step": 2090
},
{
"epoch": 1.16,
"learning_rate": 3.374523749473767e-05,
"loss": 0.949,
"step": 2095
},
{
"epoch": 1.16,
"learning_rate": 3.367737483114779e-05,
"loss": 0.9567,
"step": 2100
},
{
"epoch": 1.16,
"learning_rate": 3.360943939151351e-05,
"loss": 0.9718,
"step": 2105
},
{
"epoch": 1.17,
"learning_rate": 3.354143174560078e-05,
"loss": 0.9626,
"step": 2110
},
{
"epoch": 1.17,
"learning_rate": 3.3473352463781105e-05,
"loss": 0.9346,
"step": 2115
},
{
"epoch": 1.17,
"learning_rate": 3.340520211702681e-05,
"loss": 0.9208,
"step": 2120
},
{
"epoch": 1.18,
"learning_rate": 3.333698127690623e-05,
"loss": 0.9856,
"step": 2125
},
{
"epoch": 1.18,
"learning_rate": 3.326869051557891e-05,
"loss": 0.9049,
"step": 2130
},
{
"epoch": 1.18,
"learning_rate": 3.320033040579082e-05,
"loss": 0.9222,
"step": 2135
},
{
"epoch": 1.18,
"learning_rate": 3.3131901520869565e-05,
"loss": 0.9648,
"step": 2140
},
{
"epoch": 1.19,
"learning_rate": 3.306340443471951e-05,
"loss": 0.9538,
"step": 2145
},
{
"epoch": 1.19,
"learning_rate": 3.299483972181708e-05,
"loss": 0.9314,
"step": 2150
},
{
"epoch": 1.19,
"learning_rate": 3.292620795720583e-05,
"loss": 0.9576,
"step": 2155
},
{
"epoch": 1.19,
"learning_rate": 3.285750971649167e-05,
"loss": 0.9499,
"step": 2160
},
{
"epoch": 1.2,
"learning_rate": 3.278874557583807e-05,
"loss": 0.9568,
"step": 2165
},
{
"epoch": 1.2,
"learning_rate": 3.271991611196117e-05,
"loss": 0.9642,
"step": 2170
},
{
"epoch": 1.2,
"learning_rate": 3.265102190212497e-05,
"loss": 0.9526,
"step": 2175
},
{
"epoch": 1.21,
"learning_rate": 3.258206352413648e-05,
"loss": 0.933,
"step": 2180
},
{
"epoch": 1.21,
"learning_rate": 3.2513041556340887e-05,
"loss": 0.9683,
"step": 2185
},
{
"epoch": 1.21,
"learning_rate": 3.244395657761671e-05,
"loss": 0.9155,
"step": 2190
},
{
"epoch": 1.21,
"learning_rate": 3.2374809167370924e-05,
"loss": 0.9262,
"step": 2195
},
{
"epoch": 1.22,
"learning_rate": 3.230559990553409e-05,
"loss": 0.9778,
"step": 2200
},
{
"epoch": 1.22,
"learning_rate": 3.2236329372555544e-05,
"loss": 0.9577,
"step": 2205
},
{
"epoch": 1.22,
"learning_rate": 3.2166998149398465e-05,
"loss": 0.9286,
"step": 2210
},
{
"epoch": 1.22,
"learning_rate": 3.209760681753505e-05,
"loss": 0.9634,
"step": 2215
},
{
"epoch": 1.23,
"learning_rate": 3.2028155958941615e-05,
"loss": 0.9467,
"step": 2220
},
{
"epoch": 1.23,
"learning_rate": 3.195864615609373e-05,
"loss": 0.9543,
"step": 2225
},
{
"epoch": 1.23,
"learning_rate": 3.1889077991961304e-05,
"loss": 0.9914,
"step": 2230
},
{
"epoch": 1.24,
"learning_rate": 3.181945205000373e-05,
"loss": 0.9309,
"step": 2235
},
{
"epoch": 1.24,
"learning_rate": 3.1749768914164955e-05,
"loss": 0.9299,
"step": 2240
},
{
"epoch": 1.24,
"learning_rate": 3.168002916886864e-05,
"loss": 0.9462,
"step": 2245
},
{
"epoch": 1.24,
"learning_rate": 3.1610233399013194e-05,
"loss": 0.948,
"step": 2250
},
{
"epoch": 1.25,
"learning_rate": 3.15403821899669e-05,
"loss": 0.9335,
"step": 2255
},
{
"epoch": 1.25,
"learning_rate": 3.147047612756302e-05,
"loss": 0.952,
"step": 2260
},
{
"epoch": 1.25,
"learning_rate": 3.140051579809484e-05,
"loss": 0.9532,
"step": 2265
},
{
"epoch": 1.26,
"learning_rate": 3.133050178831079e-05,
"loss": 0.9853,
"step": 2270
},
{
"epoch": 1.26,
"learning_rate": 3.12604346854095e-05,
"loss": 0.8925,
"step": 2275
},
{
"epoch": 1.26,
"learning_rate": 3.119031507703491e-05,
"loss": 0.942,
"step": 2280
},
{
"epoch": 1.26,
"learning_rate": 3.112014355127129e-05,
"loss": 0.9132,
"step": 2285
},
{
"epoch": 1.27,
"learning_rate": 3.104992069663835e-05,
"loss": 0.9335,
"step": 2290
},
{
"epoch": 1.27,
"learning_rate": 3.0979647102086273e-05,
"loss": 0.9403,
"step": 2295
},
{
"epoch": 1.27,
"learning_rate": 3.090932335699081e-05,
"loss": 0.9246,
"step": 2300
},
{
"epoch": 1.27,
"learning_rate": 3.083895005114831e-05,
"loss": 0.912,
"step": 2305
},
{
"epoch": 1.28,
"learning_rate": 3.076852777477079e-05,
"loss": 0.9334,
"step": 2310
},
{
"epoch": 1.28,
"learning_rate": 3.069805711848096e-05,
"loss": 0.933,
"step": 2315
},
{
"epoch": 1.28,
"learning_rate": 3.062753867330729e-05,
"loss": 0.9348,
"step": 2320
},
{
"epoch": 1.29,
"learning_rate": 3.055697303067905e-05,
"loss": 0.9997,
"step": 2325
},
{
"epoch": 1.29,
"learning_rate": 3.048636078242137e-05,
"loss": 0.9196,
"step": 2330
},
{
"epoch": 1.29,
"learning_rate": 3.0415702520750235e-05,
"loss": 0.9735,
"step": 2335
},
{
"epoch": 1.29,
"learning_rate": 3.0344998838267525e-05,
"loss": 0.9395,
"step": 2340
},
{
"epoch": 1.3,
"learning_rate": 3.0274250327956093e-05,
"loss": 0.9455,
"step": 2345
},
{
"epoch": 1.3,
"learning_rate": 3.020345758317474e-05,
"loss": 0.968,
"step": 2350
},
{
"epoch": 1.3,
"learning_rate": 3.0132621197653245e-05,
"loss": 0.9403,
"step": 2355
},
{
"epoch": 1.3,
"learning_rate": 3.0061741765487418e-05,
"loss": 0.9267,
"step": 2360
},
{
"epoch": 1.31,
"learning_rate": 2.9990819881134073e-05,
"loss": 0.9734,
"step": 2365
},
{
"epoch": 1.31,
"learning_rate": 2.9919856139406093e-05,
"loss": 0.949,
"step": 2370
},
{
"epoch": 1.31,
"learning_rate": 2.9848851135467386e-05,
"loss": 0.9464,
"step": 2375
},
{
"epoch": 1.32,
"learning_rate": 2.977780546482794e-05,
"loss": 0.9709,
"step": 2380
},
{
"epoch": 1.32,
"learning_rate": 2.9706719723338795e-05,
"loss": 0.9202,
"step": 2385
},
{
"epoch": 1.32,
"learning_rate": 2.9635594507187074e-05,
"loss": 0.9625,
"step": 2390
},
{
"epoch": 1.32,
"learning_rate": 2.956443041289096e-05,
"loss": 0.9307,
"step": 2395
},
{
"epoch": 1.33,
"learning_rate": 2.9493228037294702e-05,
"loss": 0.907,
"step": 2400
},
{
"epoch": 1.33,
"learning_rate": 2.9421987977563613e-05,
"loss": 0.9487,
"step": 2405
},
{
"epoch": 1.33,
"learning_rate": 2.935071083117907e-05,
"loss": 0.9259,
"step": 2410
},
{
"epoch": 1.34,
"learning_rate": 2.9279397195933457e-05,
"loss": 0.9357,
"step": 2415
},
{
"epoch": 1.34,
"learning_rate": 2.920804766992521e-05,
"loss": 0.9149,
"step": 2420
},
{
"epoch": 1.34,
"learning_rate": 2.9136662851553787e-05,
"loss": 0.9664,
"step": 2425
},
{
"epoch": 1.34,
"learning_rate": 2.906524333951461e-05,
"loss": 0.973,
"step": 2430
},
{
"epoch": 1.35,
"learning_rate": 2.899378973279409e-05,
"loss": 0.9797,
"step": 2435
},
{
"epoch": 1.35,
"learning_rate": 2.892230263066459e-05,
"loss": 0.9604,
"step": 2440
},
{
"epoch": 1.35,
"learning_rate": 2.885078263267938e-05,
"loss": 0.9147,
"step": 2445
},
{
"epoch": 1.35,
"learning_rate": 2.8779230338667634e-05,
"loss": 0.9827,
"step": 2450
},
{
"epoch": 1.36,
"learning_rate": 2.870764634872939e-05,
"loss": 0.9362,
"step": 2455
},
{
"epoch": 1.36,
"learning_rate": 2.86360312632305e-05,
"loss": 0.9373,
"step": 2460
},
{
"epoch": 1.36,
"learning_rate": 2.8564385682797622e-05,
"loss": 0.955,
"step": 2465
},
{
"epoch": 1.37,
"learning_rate": 2.8492710208313177e-05,
"loss": 0.9108,
"step": 2470
},
{
"epoch": 1.37,
"learning_rate": 2.8421005440910303e-05,
"loss": 0.9259,
"step": 2475
},
{
"epoch": 1.37,
"learning_rate": 2.8349271981967797e-05,
"loss": 0.9333,
"step": 2480
},
{
"epoch": 1.37,
"learning_rate": 2.8277510433105102e-05,
"loss": 0.9437,
"step": 2485
},
{
"epoch": 1.38,
"learning_rate": 2.820572139617725e-05,
"loss": 0.9189,
"step": 2490
},
{
"epoch": 1.38,
"learning_rate": 2.8133905473269802e-05,
"loss": 0.9522,
"step": 2495
},
{
"epoch": 1.38,
"learning_rate": 2.8062063266693818e-05,
"loss": 0.9352,
"step": 2500
},
{
"epoch": 1.39,
"learning_rate": 2.7990195378980784e-05,
"loss": 0.9232,
"step": 2505
},
{
"epoch": 1.39,
"learning_rate": 2.7918302412877583e-05,
"loss": 0.8807,
"step": 2510
},
{
"epoch": 1.39,
"learning_rate": 2.7846384971341427e-05,
"loss": 0.9231,
"step": 2515
},
{
"epoch": 1.39,
"learning_rate": 2.7774443657534788e-05,
"loss": 0.9253,
"step": 2520
},
{
"epoch": 1.4,
"learning_rate": 2.770247907482036e-05,
"loss": 0.9646,
"step": 2525
},
{
"epoch": 1.4,
"learning_rate": 2.763049182675599e-05,
"loss": 0.9575,
"step": 2530
},
{
"epoch": 1.4,
"learning_rate": 2.7558482517089617e-05,
"loss": 0.9234,
"step": 2535
},
{
"epoch": 1.4,
"learning_rate": 2.748645174975421e-05,
"loss": 0.9215,
"step": 2540
},
{
"epoch": 1.41,
"learning_rate": 2.74144001288627e-05,
"loss": 0.959,
"step": 2545
},
{
"epoch": 1.41,
"learning_rate": 2.7342328258702894e-05,
"loss": 0.9509,
"step": 2550
},
{
"epoch": 1.41,
"learning_rate": 2.727023674373246e-05,
"loss": 0.8989,
"step": 2555
},
{
"epoch": 1.42,
"learning_rate": 2.7198126188573807e-05,
"loss": 0.9653,
"step": 2560
},
{
"epoch": 1.42,
"learning_rate": 2.7125997198009028e-05,
"loss": 0.9046,
"step": 2565
},
{
"epoch": 1.42,
"learning_rate": 2.7053850376974848e-05,
"loss": 0.9318,
"step": 2570
},
{
"epoch": 1.42,
"learning_rate": 2.6981686330557516e-05,
"loss": 0.9292,
"step": 2575
},
{
"epoch": 1.43,
"learning_rate": 2.6909505663987756e-05,
"loss": 0.99,
"step": 2580
},
{
"epoch": 1.43,
"learning_rate": 2.6837308982635678e-05,
"loss": 0.9737,
"step": 2585
},
{
"epoch": 1.43,
"learning_rate": 2.6765096892005726e-05,
"loss": 0.9649,
"step": 2590
},
{
"epoch": 1.43,
"learning_rate": 2.6692869997731545e-05,
"loss": 0.9687,
"step": 2595
},
{
"epoch": 1.44,
"learning_rate": 2.6620628905570964e-05,
"loss": 0.9708,
"step": 2600
},
{
"epoch": 1.44,
"learning_rate": 2.6548374221400884e-05,
"loss": 0.9498,
"step": 2605
},
{
"epoch": 1.44,
"learning_rate": 2.6476106551212188e-05,
"loss": 0.9512,
"step": 2610
},
{
"epoch": 1.45,
"learning_rate": 2.6403826501104682e-05,
"loss": 0.964,
"step": 2615
},
{
"epoch": 1.45,
"learning_rate": 2.6331534677281998e-05,
"loss": 0.9321,
"step": 2620
},
{
"epoch": 1.45,
"learning_rate": 2.6259231686046508e-05,
"loss": 0.9032,
"step": 2625
},
{
"epoch": 1.45,
"learning_rate": 2.6186918133794252e-05,
"loss": 0.9543,
"step": 2630
},
{
"epoch": 1.46,
"learning_rate": 2.6114594627009847e-05,
"loss": 0.9355,
"step": 2635
},
{
"epoch": 1.46,
"learning_rate": 2.604226177226137e-05,
"loss": 0.9111,
"step": 2640
},
{
"epoch": 1.46,
"learning_rate": 2.596992017619534e-05,
"loss": 0.9896,
"step": 2645
},
{
"epoch": 1.47,
"learning_rate": 2.589757044553155e-05,
"loss": 0.9715,
"step": 2650
},
{
"epoch": 1.47,
"learning_rate": 2.5825213187058045e-05,
"loss": 0.8911,
"step": 2655
},
{
"epoch": 1.47,
"learning_rate": 2.5752849007625986e-05,
"loss": 0.9446,
"step": 2660
},
{
"epoch": 1.47,
"learning_rate": 2.568047851414459e-05,
"loss": 0.9453,
"step": 2665
},
{
"epoch": 1.48,
"learning_rate": 2.5608102313576027e-05,
"loss": 0.9244,
"step": 2670
},
{
"epoch": 1.48,
"learning_rate": 2.553572101293033e-05,
"loss": 0.9478,
"step": 2675
},
{
"epoch": 1.48,
"learning_rate": 2.546333521926031e-05,
"loss": 0.9708,
"step": 2680
},
{
"epoch": 1.48,
"learning_rate": 2.5390945539656445e-05,
"loss": 0.9266,
"step": 2685
},
{
"epoch": 1.49,
"learning_rate": 2.5318552581241822e-05,
"loss": 0.9199,
"step": 2690
},
{
"epoch": 1.49,
"learning_rate": 2.524615695116702e-05,
"loss": 0.9666,
"step": 2695
},
{
"epoch": 1.49,
"learning_rate": 2.5173759256605027e-05,
"loss": 0.9247,
"step": 2700
},
{
"epoch": 1.5,
"learning_rate": 2.510136010474614e-05,
"loss": 0.9534,
"step": 2705
},
{
"epoch": 1.5,
"learning_rate": 2.5028960102792887e-05,
"loss": 0.9502,
"step": 2710
},
{
"epoch": 1.5,
"learning_rate": 2.4971039897207112e-05,
"loss": 0.9331,
"step": 2715
},
{
"epoch": 1.5,
"learning_rate": 2.4898639895253865e-05,
"loss": 0.9806,
"step": 2720
},
{
"epoch": 1.51,
"learning_rate": 2.4826240743394982e-05,
"loss": 0.9525,
"step": 2725
},
{
"epoch": 1.51,
"learning_rate": 2.4753843048832985e-05,
"loss": 0.937,
"step": 2730
},
{
"epoch": 1.51,
"learning_rate": 2.4681447418758187e-05,
"loss": 0.935,
"step": 2735
},
{
"epoch": 1.52,
"learning_rate": 2.460905446034356e-05,
"loss": 0.9662,
"step": 2740
},
{
"epoch": 1.52,
"learning_rate": 2.45366647807397e-05,
"loss": 0.9249,
"step": 2745
},
{
"epoch": 1.52,
"learning_rate": 2.446427898706967e-05,
"loss": 0.9673,
"step": 2750
},
{
"epoch": 1.52,
"learning_rate": 2.439189768642398e-05,
"loss": 0.9316,
"step": 2755
},
{
"epoch": 1.53,
"learning_rate": 2.431952148585541e-05,
"loss": 0.9297,
"step": 2760
},
{
"epoch": 1.53,
"learning_rate": 2.424715099237402e-05,
"loss": 0.9278,
"step": 2765
},
{
"epoch": 1.53,
"learning_rate": 2.4174786812941968e-05,
"loss": 0.8954,
"step": 2770
},
{
"epoch": 1.53,
"learning_rate": 2.4102429554468456e-05,
"loss": 0.9586,
"step": 2775
},
{
"epoch": 1.54,
"learning_rate": 2.4030079823804673e-05,
"loss": 0.9119,
"step": 2780
},
{
"epoch": 1.54,
"learning_rate": 2.395773822773863e-05,
"loss": 0.8949,
"step": 2785
},
{
"epoch": 1.54,
"learning_rate": 2.3885405372990166e-05,
"loss": 0.9506,
"step": 2790
},
{
"epoch": 1.55,
"learning_rate": 2.3813081866205754e-05,
"loss": 0.9087,
"step": 2795
},
{
"epoch": 1.55,
"learning_rate": 2.3740768313953494e-05,
"loss": 0.923,
"step": 2800
},
{
"epoch": 1.55,
"learning_rate": 2.3668465322718004e-05,
"loss": 1.0212,
"step": 2805
},
{
"epoch": 1.55,
"learning_rate": 2.359617349889532e-05,
"loss": 0.9182,
"step": 2810
},
{
"epoch": 1.56,
"learning_rate": 2.3523893448787818e-05,
"loss": 0.9316,
"step": 2815
},
{
"epoch": 1.56,
"learning_rate": 2.3451625778599122e-05,
"loss": 0.94,
"step": 2820
},
{
"epoch": 1.56,
"learning_rate": 2.3379371094429038e-05,
"loss": 0.9181,
"step": 2825
},
{
"epoch": 1.56,
"learning_rate": 2.3307130002268457e-05,
"loss": 0.937,
"step": 2830
},
{
"epoch": 1.57,
"learning_rate": 2.3234903107994287e-05,
"loss": 0.9026,
"step": 2835
},
{
"epoch": 1.57,
"learning_rate": 2.3162691017364317e-05,
"loss": 0.954,
"step": 2840
},
{
"epoch": 1.57,
"learning_rate": 2.3090494336012253e-05,
"loss": 0.9661,
"step": 2845
},
{
"epoch": 1.58,
"learning_rate": 2.3018313669442483e-05,
"loss": 0.9127,
"step": 2850
},
{
"epoch": 1.58,
"learning_rate": 2.2946149623025158e-05,
"loss": 0.9317,
"step": 2855
},
{
"epoch": 1.58,
"learning_rate": 2.2874002801990978e-05,
"loss": 0.9856,
"step": 2860
},
{
"epoch": 1.58,
"learning_rate": 2.28018738114262e-05,
"loss": 1.0021,
"step": 2865
},
{
"epoch": 1.59,
"learning_rate": 2.272976325626755e-05,
"loss": 0.9655,
"step": 2870
},
{
"epoch": 1.59,
"learning_rate": 2.265767174129711e-05,
"loss": 0.9619,
"step": 2875
},
{
"epoch": 1.59,
"learning_rate": 2.2585599871137313e-05,
"loss": 0.9383,
"step": 2880
},
{
"epoch": 1.6,
"learning_rate": 2.251354825024579e-05,
"loss": 0.9332,
"step": 2885
},
{
"epoch": 1.6,
"learning_rate": 2.244151748291039e-05,
"loss": 0.9544,
"step": 2890
},
{
"epoch": 1.6,
"learning_rate": 2.236950817324401e-05,
"loss": 0.9343,
"step": 2895
},
{
"epoch": 1.6,
"learning_rate": 2.2297520925179647e-05,
"loss": 0.9189,
"step": 2900
},
{
"epoch": 1.61,
"learning_rate": 2.222555634246521e-05,
"loss": 0.9341,
"step": 2905
},
{
"epoch": 1.61,
"learning_rate": 2.215361502865858e-05,
"loss": 0.9567,
"step": 2910
},
{
"epoch": 1.61,
"learning_rate": 2.2081697587122423e-05,
"loss": 0.9047,
"step": 2915
},
{
"epoch": 1.61,
"learning_rate": 2.200980462101922e-05,
"loss": 0.9126,
"step": 2920
},
{
"epoch": 1.62,
"learning_rate": 2.1937936733306195e-05,
"loss": 0.9523,
"step": 2925
},
{
"epoch": 1.62,
"learning_rate": 2.18660945267302e-05,
"loss": 0.8802,
"step": 2930
},
{
"epoch": 1.62,
"learning_rate": 2.179427860382276e-05,
"loss": 0.9197,
"step": 2935
},
{
"epoch": 1.63,
"learning_rate": 2.1722489566894903e-05,
"loss": 0.9255,
"step": 2940
},
{
"epoch": 1.63,
"learning_rate": 2.1650728018032206e-05,
"loss": 0.8921,
"step": 2945
},
{
"epoch": 1.63,
"learning_rate": 2.15789945590897e-05,
"loss": 0.9607,
"step": 2950
},
{
"epoch": 1.63,
"learning_rate": 2.150728979168683e-05,
"loss": 0.9755,
"step": 2955
},
{
"epoch": 1.64,
"learning_rate": 2.1435614317202384e-05,
"loss": 0.9943,
"step": 2960
},
{
"epoch": 1.64,
"learning_rate": 2.1363968736769508e-05,
"loss": 0.9463,
"step": 2965
},
{
"epoch": 1.64,
"learning_rate": 2.1292353651270617e-05,
"loss": 0.9107,
"step": 2970
},
{
"epoch": 1.65,
"learning_rate": 2.1220769661332365e-05,
"loss": 0.9311,
"step": 2975
},
{
"epoch": 1.65,
"learning_rate": 2.1149217367320622e-05,
"loss": 0.9459,
"step": 2980
},
{
"epoch": 1.65,
"learning_rate": 2.107769736933541e-05,
"loss": 0.9439,
"step": 2985
},
{
"epoch": 1.65,
"learning_rate": 2.100621026720591e-05,
"loss": 0.9719,
"step": 2990
},
{
"epoch": 1.66,
"learning_rate": 2.093475666048539e-05,
"loss": 0.9569,
"step": 2995
},
{
"epoch": 1.66,
"learning_rate": 2.0863337148446222e-05,
"loss": 0.9308,
"step": 3000
},
{
"epoch": 1.66,
"learning_rate": 2.07919523300748e-05,
"loss": 0.9269,
"step": 3005
},
{
"epoch": 1.66,
"learning_rate": 2.0720602804066552e-05,
"loss": 0.9358,
"step": 3010
},
{
"epoch": 1.67,
"learning_rate": 2.0649289168820943e-05,
"loss": 0.9291,
"step": 3015
},
{
"epoch": 1.67,
"learning_rate": 2.0578012022436386e-05,
"loss": 0.969,
"step": 3020
},
{
"epoch": 1.67,
"learning_rate": 2.0506771962705304e-05,
"loss": 0.9319,
"step": 3025
},
{
"epoch": 1.68,
"learning_rate": 2.0435569587109042e-05,
"loss": 0.9574,
"step": 3030
},
{
"epoch": 1.68,
"learning_rate": 2.036440549281293e-05,
"loss": 0.9654,
"step": 3035
},
{
"epoch": 1.68,
"learning_rate": 2.0293280276661204e-05,
"loss": 0.9742,
"step": 3040
},
{
"epoch": 1.68,
"learning_rate": 2.0222194535172067e-05,
"loss": 0.9014,
"step": 3045
},
{
"epoch": 1.69,
"learning_rate": 2.0151148864532623e-05,
"loss": 0.9246,
"step": 3050
},
{
"epoch": 1.69,
"learning_rate": 2.0080143860593913e-05,
"loss": 0.9522,
"step": 3055
},
{
"epoch": 1.69,
"learning_rate": 2.0009180118865933e-05,
"loss": 0.9567,
"step": 3060
},
{
"epoch": 1.69,
"learning_rate": 1.9938258234512588e-05,
"loss": 0.983,
"step": 3065
},
{
"epoch": 1.7,
"learning_rate": 1.9867378802346764e-05,
"loss": 0.8722,
"step": 3070
},
{
"epoch": 1.7,
"learning_rate": 1.979654241682527e-05,
"loss": 0.9122,
"step": 3075
},
{
"epoch": 1.7,
"learning_rate": 1.972574967204391e-05,
"loss": 0.9362,
"step": 3080
},
{
"epoch": 1.71,
"learning_rate": 1.9655001161732478e-05,
"loss": 0.8944,
"step": 3085
},
{
"epoch": 1.71,
"learning_rate": 1.9584297479249774e-05,
"loss": 0.9329,
"step": 3090
},
{
"epoch": 1.71,
"learning_rate": 1.9513639217578636e-05,
"loss": 0.9707,
"step": 3095
},
{
"epoch": 1.71,
"learning_rate": 1.9443026969320955e-05,
"loss": 0.9367,
"step": 3100
},
{
"epoch": 1.72,
"learning_rate": 1.937246132669272e-05,
"loss": 0.896,
"step": 3105
},
{
"epoch": 1.72,
"learning_rate": 1.9301942881519047e-05,
"loss": 0.97,
"step": 3110
},
{
"epoch": 1.72,
"learning_rate": 1.9231472225229216e-05,
"loss": 0.9638,
"step": 3115
},
{
"epoch": 1.73,
"learning_rate": 1.9161049948851684e-05,
"loss": 0.9561,
"step": 3120
},
{
"epoch": 1.73,
"learning_rate": 1.9090676643009193e-05,
"loss": 0.9734,
"step": 3125
},
{
"epoch": 1.73,
"learning_rate": 1.902035289791373e-05,
"loss": 0.9651,
"step": 3130
},
{
"epoch": 1.73,
"learning_rate": 1.8950079303361658e-05,
"loss": 0.9489,
"step": 3135
},
{
"epoch": 1.74,
"learning_rate": 1.8879856448728723e-05,
"loss": 0.9893,
"step": 3140
},
{
"epoch": 1.74,
"learning_rate": 1.8809684922965097e-05,
"loss": 0.9549,
"step": 3145
},
{
"epoch": 1.74,
"learning_rate": 1.8739565314590507e-05,
"loss": 0.9196,
"step": 3150
},
{
"epoch": 1.74,
"learning_rate": 1.8669498211689216e-05,
"loss": 0.9568,
"step": 3155
},
{
"epoch": 1.75,
"learning_rate": 1.859948420190517e-05,
"loss": 0.904,
"step": 3160
},
{
"epoch": 1.75,
"learning_rate": 1.852952387243698e-05,
"loss": 0.9375,
"step": 3165
},
{
"epoch": 1.75,
"learning_rate": 1.8459617810033096e-05,
"loss": 0.9048,
"step": 3170
},
{
"epoch": 1.76,
"learning_rate": 1.83897666009868e-05,
"loss": 0.9514,
"step": 3175
},
{
"epoch": 1.76,
"learning_rate": 1.8319970831131363e-05,
"loss": 0.8855,
"step": 3180
},
{
"epoch": 1.76,
"learning_rate": 1.825023108583505e-05,
"loss": 0.9119,
"step": 3185
},
{
"epoch": 1.76,
"learning_rate": 1.818054794999628e-05,
"loss": 0.9733,
"step": 3190
},
{
"epoch": 1.77,
"learning_rate": 1.8110922008038705e-05,
"loss": 0.9597,
"step": 3195
},
{
"epoch": 1.77,
"learning_rate": 1.8041353843906275e-05,
"loss": 0.8952,
"step": 3200
},
{
"epoch": 1.77,
"learning_rate": 1.797184404105839e-05,
"loss": 0.8934,
"step": 3205
},
{
"epoch": 1.77,
"learning_rate": 1.7902393182464955e-05,
"loss": 0.9299,
"step": 3210
},
{
"epoch": 1.78,
"learning_rate": 1.7833001850601544e-05,
"loss": 0.9247,
"step": 3215
},
{
"epoch": 1.78,
"learning_rate": 1.7763670627444465e-05,
"loss": 0.9672,
"step": 3220
},
{
"epoch": 1.78,
"learning_rate": 1.7694400094465913e-05,
"loss": 0.9451,
"step": 3225
},
{
"epoch": 1.79,
"learning_rate": 1.7625190832629085e-05,
"loss": 0.9294,
"step": 3230
},
{
"epoch": 1.79,
"learning_rate": 1.7556043422383293e-05,
"loss": 0.9145,
"step": 3235
},
{
"epoch": 1.79,
"learning_rate": 1.7486958443659112e-05,
"loss": 0.9508,
"step": 3240
},
{
"epoch": 1.79,
"learning_rate": 1.7417936475863526e-05,
"loss": 0.8725,
"step": 3245
},
{
"epoch": 1.8,
"learning_rate": 1.7348978097875036e-05,
"loss": 0.9195,
"step": 3250
},
{
"epoch": 1.8,
"learning_rate": 1.728008388803883e-05,
"loss": 0.933,
"step": 3255
},
{
"epoch": 1.8,
"learning_rate": 1.7211254424161933e-05,
"loss": 0.9747,
"step": 3260
},
{
"epoch": 1.81,
"learning_rate": 1.7142490283508324e-05,
"loss": 0.9168,
"step": 3265
},
{
"epoch": 1.81,
"learning_rate": 1.707379204279418e-05,
"loss": 0.9844,
"step": 3270
},
{
"epoch": 1.81,
"learning_rate": 1.700516027818293e-05,
"loss": 0.9071,
"step": 3275
},
{
"epoch": 1.81,
"learning_rate": 1.6936595565280488e-05,
"loss": 0.9311,
"step": 3280
},
{
"epoch": 1.82,
"learning_rate": 1.686809847913045e-05,
"loss": 0.9419,
"step": 3285
},
{
"epoch": 1.82,
"learning_rate": 1.679966959420918e-05,
"loss": 0.9679,
"step": 3290
},
{
"epoch": 1.82,
"learning_rate": 1.67313094844211e-05,
"loss": 0.9601,
"step": 3295
},
{
"epoch": 1.82,
"learning_rate": 1.6663018723093774e-05,
"loss": 0.9022,
"step": 3300
},
{
"epoch": 1.83,
"learning_rate": 1.6594797882973196e-05,
"loss": 0.8925,
"step": 3305
},
{
"epoch": 1.83,
"learning_rate": 1.6526647536218894e-05,
"loss": 0.9238,
"step": 3310
},
{
"epoch": 1.83,
"learning_rate": 1.6458568254399225e-05,
"loss": 0.9632,
"step": 3315
},
{
"epoch": 1.84,
"learning_rate": 1.6390560608486496e-05,
"loss": 0.9164,
"step": 3320
},
{
"epoch": 1.84,
"learning_rate": 1.6322625168852217e-05,
"loss": 0.9505,
"step": 3325
},
{
"epoch": 1.84,
"learning_rate": 1.6254762505262338e-05,
"loss": 0.9622,
"step": 3330
},
{
"epoch": 1.84,
"learning_rate": 1.618697318687241e-05,
"loss": 0.9204,
"step": 3335
},
{
"epoch": 1.85,
"learning_rate": 1.6119257782222895e-05,
"loss": 0.9504,
"step": 3340
},
{
"epoch": 1.85,
"learning_rate": 1.6051616859234285e-05,
"loss": 0.9384,
"step": 3345
},
{
"epoch": 1.85,
"learning_rate": 1.5984050985202474e-05,
"loss": 0.9374,
"step": 3350
},
{
"epoch": 1.86,
"learning_rate": 1.591656072679387e-05,
"loss": 0.9937,
"step": 3355
},
{
"epoch": 1.86,
"learning_rate": 1.5849146650040737e-05,
"loss": 0.9587,
"step": 3360
},
{
"epoch": 1.86,
"learning_rate": 1.5781809320336412e-05,
"loss": 0.9312,
"step": 3365
},
{
"epoch": 1.86,
"learning_rate": 1.5714549302430536e-05,
"loss": 0.9343,
"step": 3370
},
{
"epoch": 1.87,
"learning_rate": 1.5647367160424393e-05,
"loss": 0.9419,
"step": 3375
},
{
"epoch": 1.87,
"learning_rate": 1.558026345776608e-05,
"loss": 0.9371,
"step": 3380
},
{
"epoch": 1.87,
"learning_rate": 1.551323875724587e-05,
"loss": 0.913,
"step": 3385
},
{
"epoch": 1.87,
"learning_rate": 1.5446293620991437e-05,
"loss": 0.9195,
"step": 3390
},
{
"epoch": 1.88,
"learning_rate": 1.5379428610463174e-05,
"loss": 0.9164,
"step": 3395
},
{
"epoch": 1.88,
"learning_rate": 1.531264428644945e-05,
"loss": 0.9256,
"step": 3400
},
{
"epoch": 1.88,
"learning_rate": 1.5245941209061953e-05,
"loss": 0.9658,
"step": 3405
},
{
"epoch": 1.89,
"learning_rate": 1.517931993773094e-05,
"loss": 0.9397,
"step": 3410
},
{
"epoch": 1.89,
"learning_rate": 1.5112781031200569e-05,
"loss": 0.915,
"step": 3415
},
{
"epoch": 1.89,
"learning_rate": 1.5046325047524251e-05,
"loss": 0.9116,
"step": 3420
},
{
"epoch": 1.89,
"learning_rate": 1.4979952544059888e-05,
"loss": 0.9192,
"step": 3425
},
{
"epoch": 1.9,
"learning_rate": 1.4913664077465289e-05,
"loss": 0.924,
"step": 3430
},
{
"epoch": 1.9,
"learning_rate": 1.4847460203693408e-05,
"loss": 0.9579,
"step": 3435
},
{
"epoch": 1.9,
"learning_rate": 1.4781341477987776e-05,
"loss": 0.9816,
"step": 3440
},
{
"epoch": 1.9,
"learning_rate": 1.4715308454877758e-05,
"loss": 0.9412,
"step": 3445
},
{
"epoch": 1.91,
"learning_rate": 1.4649361688173979e-05,
"loss": 0.9413,
"step": 3450
},
{
"epoch": 1.91,
"learning_rate": 1.458350173096361e-05,
"loss": 0.9425,
"step": 3455
},
{
"epoch": 1.91,
"learning_rate": 1.4517729135605795e-05,
"loss": 0.9132,
"step": 3460
},
{
"epoch": 1.92,
"learning_rate": 1.4452044453726942e-05,
"loss": 0.9455,
"step": 3465
},
{
"epoch": 1.92,
"learning_rate": 1.4386448236216174e-05,
"loss": 0.9543,
"step": 3470
},
{
"epoch": 1.92,
"learning_rate": 1.4320941033220667e-05,
"loss": 0.9118,
"step": 3475
},
{
"epoch": 1.92,
"learning_rate": 1.4255523394141041e-05,
"loss": 0.9425,
"step": 3480
},
{
"epoch": 1.93,
"learning_rate": 1.4190195867626749e-05,
"loss": 0.8958,
"step": 3485
},
{
"epoch": 1.93,
"learning_rate": 1.4124959001571497e-05,
"loss": 0.944,
"step": 3490
},
{
"epoch": 1.93,
"learning_rate": 1.4059813343108616e-05,
"loss": 0.9611,
"step": 3495
},
{
"epoch": 1.94,
"learning_rate": 1.3994759438606501e-05,
"loss": 0.9448,
"step": 3500
},
{
"epoch": 1.94,
"learning_rate": 1.3929797833664013e-05,
"loss": 0.9659,
"step": 3505
},
{
"epoch": 1.94,
"learning_rate": 1.3864929073105922e-05,
"loss": 0.9178,
"step": 3510
},
{
"epoch": 1.94,
"learning_rate": 1.3800153700978282e-05,
"loss": 0.8965,
"step": 3515
},
{
"epoch": 1.95,
"learning_rate": 1.373547226054398e-05,
"loss": 0.9198,
"step": 3520
},
{
"epoch": 1.95,
"learning_rate": 1.367088529427803e-05,
"loss": 0.9398,
"step": 3525
},
{
"epoch": 1.95,
"learning_rate": 1.3606393343863182e-05,
"loss": 0.9423,
"step": 3530
},
{
"epoch": 1.95,
"learning_rate": 1.3541996950185227e-05,
"loss": 0.9592,
"step": 3535
},
{
"epoch": 1.96,
"learning_rate": 1.3477696653328598e-05,
"loss": 0.9489,
"step": 3540
},
{
"epoch": 1.96,
"learning_rate": 1.3413492992571713e-05,
"loss": 0.963,
"step": 3545
},
{
"epoch": 1.96,
"learning_rate": 1.3349386506382586e-05,
"loss": 0.9449,
"step": 3550
},
{
"epoch": 1.97,
"learning_rate": 1.3285377732414172e-05,
"loss": 0.9043,
"step": 3555
},
{
"epoch": 1.97,
"learning_rate": 1.3221467207499972e-05,
"loss": 0.9362,
"step": 3560
},
{
"epoch": 1.97,
"learning_rate": 1.3157655467649463e-05,
"loss": 0.945,
"step": 3565
},
{
"epoch": 1.97,
"learning_rate": 1.3093943048043634e-05,
"loss": 0.8693,
"step": 3570
},
{
"epoch": 1.98,
"learning_rate": 1.3030330483030479e-05,
"loss": 0.9659,
"step": 3575
},
{
"epoch": 1.98,
"learning_rate": 1.2966818306120535e-05,
"loss": 0.9036,
"step": 3580
},
{
"epoch": 1.98,
"learning_rate": 1.2903407049982386e-05,
"loss": 0.9282,
"step": 3585
},
{
"epoch": 1.99,
"learning_rate": 1.2840097246438215e-05,
"loss": 0.8747,
"step": 3590
},
{
"epoch": 1.99,
"learning_rate": 1.277688942645934e-05,
"loss": 0.936,
"step": 3595
},
{
"epoch": 1.99,
"learning_rate": 1.2713784120161725e-05,
"loss": 0.8945,
"step": 3600
},
{
"epoch": 1.99,
"learning_rate": 1.2650781856801598e-05,
"loss": 0.9273,
"step": 3605
},
{
"epoch": 2.0,
"learning_rate": 1.258788316477097e-05,
"loss": 0.9632,
"step": 3610
},
{
"epoch": 2.0,
"learning_rate": 1.2525088571593202e-05,
"loss": 0.8641,
"step": 3615
},
{
"epoch": 2.0,
"learning_rate": 1.2462398603918607e-05,
"loss": 0.9314,
"step": 3620
},
{
"epoch": 2.0,
"learning_rate": 1.2399813787520006e-05,
"loss": 0.9169,
"step": 3625
},
{
"epoch": 2.01,
"learning_rate": 1.2337334647288334e-05,
"loss": 0.9089,
"step": 3630
},
{
"epoch": 2.01,
"learning_rate": 1.2274961707228228e-05,
"loss": 0.9039,
"step": 3635
},
{
"epoch": 2.01,
"learning_rate": 1.2212695490453646e-05,
"loss": 0.9378,
"step": 3640
},
{
"epoch": 2.02,
"learning_rate": 1.2150536519183475e-05,
"loss": 0.9297,
"step": 3645
},
{
"epoch": 2.02,
"learning_rate": 1.2088485314737108e-05,
"loss": 0.9488,
"step": 3650
},
{
"epoch": 2.02,
"learning_rate": 1.2026542397530186e-05,
"loss": 0.9625,
"step": 3655
},
{
"epoch": 2.02,
"learning_rate": 1.1964708287070073e-05,
"loss": 0.8874,
"step": 3660
},
{
"epoch": 2.03,
"learning_rate": 1.1902983501951666e-05,
"loss": 0.9224,
"step": 3665
},
{
"epoch": 2.03,
"learning_rate": 1.1841368559852892e-05,
"loss": 0.9442,
"step": 3670
},
{
"epoch": 2.03,
"learning_rate": 1.17798639775305e-05,
"loss": 0.8688,
"step": 3675
},
{
"epoch": 2.03,
"learning_rate": 1.1718470270815608e-05,
"loss": 0.8912,
"step": 3680
},
{
"epoch": 2.04,
"learning_rate": 1.1657187954609496e-05,
"loss": 0.9131,
"step": 3685
},
{
"epoch": 2.04,
"learning_rate": 1.1596017542879168e-05,
"loss": 0.9827,
"step": 3690
},
{
"epoch": 2.04,
"learning_rate": 1.1534959548653132e-05,
"loss": 0.8588,
"step": 3695
},
{
"epoch": 2.05,
"learning_rate": 1.147401448401706e-05,
"loss": 0.9359,
"step": 3700
}
],
"max_steps": 5424,
"num_train_epochs": 3,
"total_flos": 1.2023495325332275e+18,
"trial_name": null,
"trial_params": null
}