Mongolian_Llama3-v0.1 / trainer_state.json

Upload 11 files

6c0ec9e verified 5 months ago

No virus

174 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.1737089201877935,
	"eval_steps": 500,
	"global_step": 1000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0011737089201877935,
	"grad_norm": 0.27773135900497437,
	"learning_rate": 4e-05,
	"loss": 1.1957,
	"step": 1
	},
	{
	"epoch": 0.002347417840375587,
	"grad_norm": 0.26547771692276,
	"learning_rate": 8e-05,
	"loss": 1.1284,
	"step": 2
	},
	{
	"epoch": 0.0035211267605633804,
	"grad_norm": 0.236787810921669,
	"learning_rate": 0.00012,
	"loss": 1.1823,
	"step": 3
	},
	{
	"epoch": 0.004694835680751174,
	"grad_norm": 0.2459038347005844,
	"learning_rate": 0.00016,
	"loss": 1.1409,
	"step": 4
	},
	{
	"epoch": 0.005868544600938967,
	"grad_norm": 0.2526487410068512,
	"learning_rate": 0.0002,
	"loss": 1.12,
	"step": 5
	},
	{
	"epoch": 0.007042253521126761,
	"grad_norm": 0.2795103192329407,
	"learning_rate": 0.00019976387249114524,
	"loss": 1.1579,
	"step": 6
	},
	{
	"epoch": 0.008215962441314555,
	"grad_norm": 0.2876183092594147,
	"learning_rate": 0.00019952774498229045,
	"loss": 1.1211,
	"step": 7
	},
	{
	"epoch": 0.009389671361502348,
	"grad_norm": 0.3014296293258667,
	"learning_rate": 0.00019929161747343565,
	"loss": 1.1118,
	"step": 8
	},
	{
	"epoch": 0.01056338028169014,
	"grad_norm": 0.29106494784355164,
	"learning_rate": 0.00019905548996458088,
	"loss": 1.1787,
	"step": 9
	},
	{
	"epoch": 0.011737089201877934,
	"grad_norm": 0.3211474120616913,
	"learning_rate": 0.00019881936245572609,
	"loss": 1.1004,
	"step": 10
	},
	{
	"epoch": 0.012910798122065728,
	"grad_norm": 0.3358176350593567,
	"learning_rate": 0.00019858323494687132,
	"loss": 1.1099,
	"step": 11
	},
	{
	"epoch": 0.014084507042253521,
	"grad_norm": 0.3236922323703766,
	"learning_rate": 0.00019834710743801655,
	"loss": 1.048,
	"step": 12
	},
	{
	"epoch": 0.015258215962441314,
	"grad_norm": 0.31388312578201294,
	"learning_rate": 0.00019811097992916175,
	"loss": 1.0532,
	"step": 13
	},
	{
	"epoch": 0.01643192488262911,
	"grad_norm": 0.320402055978775,
	"learning_rate": 0.00019787485242030696,
	"loss": 1.0757,
	"step": 14
	},
	{
	"epoch": 0.017605633802816902,
	"grad_norm": 0.32999494671821594,
	"learning_rate": 0.0001976387249114522,
	"loss": 1.122,
	"step": 15
	},
	{
	"epoch": 0.018779342723004695,
	"grad_norm": 0.30936214327812195,
	"learning_rate": 0.00019740259740259742,
	"loss": 1.1156,
	"step": 16
	},
	{
	"epoch": 0.01995305164319249,
	"grad_norm": 0.2863931953907013,
	"learning_rate": 0.00019716646989374263,
	"loss": 1.0414,
	"step": 17
	},
	{
	"epoch": 0.02112676056338028,
	"grad_norm": 0.29143351316452026,
	"learning_rate": 0.00019693034238488786,
	"loss": 1.0379,
	"step": 18
	},
	{
	"epoch": 0.022300469483568074,
	"grad_norm": 0.28874626755714417,
	"learning_rate": 0.0001966942148760331,
	"loss": 1.0388,
	"step": 19
	},
	{
	"epoch": 0.023474178403755867,
	"grad_norm": 0.30588293075561523,
	"learning_rate": 0.00019645808736717827,
	"loss": 1.0515,
	"step": 20
	},
	{
	"epoch": 0.02464788732394366,
	"grad_norm": 0.29231536388397217,
	"learning_rate": 0.0001962219598583235,
	"loss": 1.0472,
	"step": 21
	},
	{
	"epoch": 0.025821596244131457,
	"grad_norm": 0.2783581614494324,
	"learning_rate": 0.00019598583234946873,
	"loss": 1.0608,
	"step": 22
	},
	{
	"epoch": 0.02699530516431925,
	"grad_norm": 0.29816293716430664,
	"learning_rate": 0.00019574970484061393,
	"loss": 1.0986,
	"step": 23
	},
	{
	"epoch": 0.028169014084507043,
	"grad_norm": 0.27919578552246094,
	"learning_rate": 0.00019551357733175916,
	"loss": 1.0265,
	"step": 24
	},
	{
	"epoch": 0.029342723004694836,
	"grad_norm": 0.3144524097442627,
	"learning_rate": 0.00019527744982290437,
	"loss": 1.0699,
	"step": 25
	},
	{
	"epoch": 0.03051643192488263,
	"grad_norm": 0.3090282678604126,
	"learning_rate": 0.0001950413223140496,
	"loss": 1.0601,
	"step": 26
	},
	{
	"epoch": 0.03169014084507042,
	"grad_norm": 0.30304697155952454,
	"learning_rate": 0.0001948051948051948,
	"loss": 1.0926,
	"step": 27
	},
	{
	"epoch": 0.03286384976525822,
	"grad_norm": 0.29015883803367615,
	"learning_rate": 0.00019456906729634004,
	"loss": 1.0,
	"step": 28
	},
	{
	"epoch": 0.03403755868544601,
	"grad_norm": 0.29359501600265503,
	"learning_rate": 0.00019433293978748527,
	"loss": 0.988,
	"step": 29
	},
	{
	"epoch": 0.035211267605633804,
	"grad_norm": 0.2772333323955536,
	"learning_rate": 0.00019409681227863047,
	"loss": 0.9758,
	"step": 30
	},
	{
	"epoch": 0.036384976525821594,
	"grad_norm": 0.2761421799659729,
	"learning_rate": 0.00019386068476977568,
	"loss": 0.9926,
	"step": 31
	},
	{
	"epoch": 0.03755868544600939,
	"grad_norm": NaN,
	"learning_rate": 0.00019386068476977568,
	"loss": 1.0944,
	"step": 32
	},
	{
	"epoch": 0.03873239436619718,
	"grad_norm": 0.2766799330711365,
	"learning_rate": 0.0001936245572609209,
	"loss": 0.9813,
	"step": 33
	},
	{
	"epoch": 0.03990610328638498,
	"grad_norm": 0.28922533988952637,
	"learning_rate": 0.0001933884297520661,
	"loss": 0.9839,
	"step": 34
	},
	{
	"epoch": 0.04107981220657277,
	"grad_norm": 0.28271371126174927,
	"learning_rate": 0.00019315230224321134,
	"loss": 1.0125,
	"step": 35
	},
	{
	"epoch": 0.04225352112676056,
	"grad_norm": 0.2955509424209595,
	"learning_rate": 0.00019291617473435658,
	"loss": 1.0049,
	"step": 36
	},
	{
	"epoch": 0.04342723004694836,
	"grad_norm": 0.2909109592437744,
	"learning_rate": 0.00019268004722550178,
	"loss": 1.0015,
	"step": 37
	},
	{
	"epoch": 0.04460093896713615,
	"grad_norm": 0.29657021164894104,
	"learning_rate": 0.00019244391971664698,
	"loss": 1.0107,
	"step": 38
	},
	{
	"epoch": 0.045774647887323945,
	"grad_norm": 0.29010507464408875,
	"learning_rate": 0.00019220779220779222,
	"loss": 0.9918,
	"step": 39
	},
	{
	"epoch": 0.046948356807511735,
	"grad_norm": 0.2906627058982849,
	"learning_rate": 0.00019197166469893745,
	"loss": 0.9843,
	"step": 40
	},
	{
	"epoch": 0.04812206572769953,
	"grad_norm": 0.2919193208217621,
	"learning_rate": 0.00019173553719008265,
	"loss": 0.9889,
	"step": 41
	},
	{
	"epoch": 0.04929577464788732,
	"grad_norm": 0.3219091296195984,
	"learning_rate": 0.00019149940968122788,
	"loss": 0.9979,
	"step": 42
	},
	{
	"epoch": 0.05046948356807512,
	"grad_norm": 0.29512301087379456,
	"learning_rate": 0.0001912632821723731,
	"loss": 0.989,
	"step": 43
	},
	{
	"epoch": 0.051643192488262914,
	"grad_norm": 0.3190619647502899,
	"learning_rate": 0.0001910271546635183,
	"loss": 0.9563,
	"step": 44
	},
	{
	"epoch": 0.0528169014084507,
	"grad_norm": 0.310253381729126,
	"learning_rate": 0.00019079102715466352,
	"loss": 1.037,
	"step": 45
	},
	{
	"epoch": 0.0539906103286385,
	"grad_norm": 0.3140093684196472,
	"learning_rate": 0.00019055489964580876,
	"loss": 0.9687,
	"step": 46
	},
	{
	"epoch": 0.05516431924882629,
	"grad_norm": 0.2816644310951233,
	"learning_rate": 0.00019031877213695396,
	"loss": 0.9372,
	"step": 47
	},
	{
	"epoch": 0.056338028169014086,
	"grad_norm": 0.3012441396713257,
	"learning_rate": 0.0001900826446280992,
	"loss": 0.9968,
	"step": 48
	},
	{
	"epoch": 0.057511737089201875,
	"grad_norm": 0.29789185523986816,
	"learning_rate": 0.0001898465171192444,
	"loss": 0.9143,
	"step": 49
	},
	{
	"epoch": 0.05868544600938967,
	"grad_norm": 0.29454007744789124,
	"learning_rate": 0.00018961038961038963,
	"loss": 0.9837,
	"step": 50
	},
	{
	"epoch": 0.05985915492957746,
	"grad_norm": 0.321218341588974,
	"learning_rate": 0.00018937426210153483,
	"loss": 1.0135,
	"step": 51
	},
	{
	"epoch": 0.06103286384976526,
	"grad_norm": 0.30039164423942566,
	"learning_rate": 0.00018913813459268006,
	"loss": 0.9639,
	"step": 52
	},
	{
	"epoch": 0.062206572769953054,
	"grad_norm": 0.3052615225315094,
	"learning_rate": 0.0001889020070838253,
	"loss": 0.9401,
	"step": 53
	},
	{
	"epoch": 0.06338028169014084,
	"grad_norm": 0.3177138864994049,
	"learning_rate": 0.00018866587957497047,
	"loss": 0.9626,
	"step": 54
	},
	{
	"epoch": 0.06455399061032864,
	"grad_norm": 0.3098903298377991,
	"learning_rate": 0.0001884297520661157,
	"loss": 0.9535,
	"step": 55
	},
	{
	"epoch": 0.06572769953051644,
	"grad_norm": 0.33165299892425537,
	"learning_rate": 0.00018819362455726094,
	"loss": 1.0475,
	"step": 56
	},
	{
	"epoch": 0.06690140845070422,
	"grad_norm": 0.3054540455341339,
	"learning_rate": 0.00018795749704840614,
	"loss": 0.988,
	"step": 57
	},
	{
	"epoch": 0.06807511737089202,
	"grad_norm": 0.3412969708442688,
	"learning_rate": 0.00018772136953955137,
	"loss": 0.9531,
	"step": 58
	},
	{
	"epoch": 0.06924882629107981,
	"grad_norm": 0.3173505663871765,
	"learning_rate": 0.0001874852420306966,
	"loss": 1.0037,
	"step": 59
	},
	{
	"epoch": 0.07042253521126761,
	"grad_norm": 0.29377281665802,
	"learning_rate": 0.0001872491145218418,
	"loss": 0.9205,
	"step": 60
	},
	{
	"epoch": 0.0715962441314554,
	"grad_norm": 0.2970433831214905,
	"learning_rate": 0.000187012987012987,
	"loss": 0.8902,
	"step": 61
	},
	{
	"epoch": 0.07276995305164319,
	"grad_norm": 0.3081493675708771,
	"learning_rate": 0.00018677685950413224,
	"loss": 0.9498,
	"step": 62
	},
	{
	"epoch": 0.07394366197183098,
	"grad_norm": 0.31438371539115906,
	"learning_rate": 0.00018654073199527747,
	"loss": 0.9406,
	"step": 63
	},
	{
	"epoch": 0.07511737089201878,
	"grad_norm": 0.29640915989875793,
	"learning_rate": 0.00018630460448642268,
	"loss": 0.8948,
	"step": 64
	},
	{
	"epoch": 0.07629107981220658,
	"grad_norm": 0.33342233300209045,
	"learning_rate": 0.00018606847697756788,
	"loss": 0.941,
	"step": 65
	},
	{
	"epoch": 0.07746478873239436,
	"grad_norm": 0.31546634435653687,
	"learning_rate": 0.00018583234946871312,
	"loss": 0.9392,
	"step": 66
	},
	{
	"epoch": 0.07863849765258216,
	"grad_norm": 0.31528937816619873,
	"learning_rate": 0.00018559622195985832,
	"loss": 0.9293,
	"step": 67
	},
	{
	"epoch": 0.07981220657276995,
	"grad_norm": 0.33473101258277893,
	"learning_rate": 0.00018536009445100355,
	"loss": 0.9214,
	"step": 68
	},
	{
	"epoch": 0.08098591549295775,
	"grad_norm": 0.6588060259819031,
	"learning_rate": 0.00018512396694214878,
	"loss": 0.944,
	"step": 69
	},
	{
	"epoch": 0.08215962441314555,
	"grad_norm": 0.30120280385017395,
	"learning_rate": 0.000184887839433294,
	"loss": 0.9171,
	"step": 70
	},
	{
	"epoch": 0.08333333333333333,
	"grad_norm": 0.3417011499404907,
	"learning_rate": 0.0001846517119244392,
	"loss": 0.9382,
	"step": 71
	},
	{
	"epoch": 0.08450704225352113,
	"grad_norm": 0.3202987313270569,
	"learning_rate": 0.00018441558441558442,
	"loss": 0.931,
	"step": 72
	},
	{
	"epoch": 0.08568075117370892,
	"grad_norm": 0.3390517234802246,
	"learning_rate": 0.00018417945690672965,
	"loss": 0.9218,
	"step": 73
	},
	{
	"epoch": 0.08685446009389672,
	"grad_norm": 0.32109472155570984,
	"learning_rate": 0.00018394332939787486,
	"loss": 0.9226,
	"step": 74
	},
	{
	"epoch": 0.0880281690140845,
	"grad_norm": 0.3435365855693817,
	"learning_rate": 0.0001837072018890201,
	"loss": 0.9402,
	"step": 75
	},
	{
	"epoch": 0.0892018779342723,
	"grad_norm": 0.3335697054862976,
	"learning_rate": 0.00018347107438016532,
	"loss": 0.9385,
	"step": 76
	},
	{
	"epoch": 0.0903755868544601,
	"grad_norm": 0.32050758600234985,
	"learning_rate": 0.0001832349468713105,
	"loss": 0.8992,
	"step": 77
	},
	{
	"epoch": 0.09154929577464789,
	"grad_norm": 0.32620421051979065,
	"learning_rate": 0.00018299881936245573,
	"loss": 0.9476,
	"step": 78
	},
	{
	"epoch": 0.09272300469483569,
	"grad_norm": 0.33306750655174255,
	"learning_rate": 0.00018276269185360096,
	"loss": 0.9458,
	"step": 79
	},
	{
	"epoch": 0.09389671361502347,
	"grad_norm": 0.3500649034976959,
	"learning_rate": 0.00018252656434474617,
	"loss": 0.9612,
	"step": 80
	},
	{
	"epoch": 0.09507042253521127,
	"grad_norm": 0.3186359405517578,
	"learning_rate": 0.0001822904368358914,
	"loss": 0.9527,
	"step": 81
	},
	{
	"epoch": 0.09624413145539906,
	"grad_norm": 0.3317716717720032,
	"learning_rate": 0.0001820543093270366,
	"loss": 0.9648,
	"step": 82
	},
	{
	"epoch": 0.09741784037558686,
	"grad_norm": 0.3196907639503479,
	"learning_rate": 0.00018181818181818183,
	"loss": 0.9643,
	"step": 83
	},
	{
	"epoch": 0.09859154929577464,
	"grad_norm": 0.3195818066596985,
	"learning_rate": 0.00018158205430932704,
	"loss": 0.9121,
	"step": 84
	},
	{
	"epoch": 0.09976525821596244,
	"grad_norm": 0.33151793479919434,
	"learning_rate": 0.00018134592680047227,
	"loss": 0.9051,
	"step": 85
	},
	{
	"epoch": 0.10093896713615023,
	"grad_norm": 0.3110804259777069,
	"learning_rate": 0.00018110979929161747,
	"loss": 0.9241,
	"step": 86
	},
	{
	"epoch": 0.10211267605633803,
	"grad_norm": 0.34278568625450134,
	"learning_rate": 0.0001808736717827627,
	"loss": 0.9634,
	"step": 87
	},
	{
	"epoch": 0.10328638497652583,
	"grad_norm": 0.34013500809669495,
	"learning_rate": 0.0001806375442739079,
	"loss": 0.8822,
	"step": 88
	},
	{
	"epoch": 0.10446009389671361,
	"grad_norm": 0.3449755012989044,
	"learning_rate": 0.00018040141676505314,
	"loss": 0.969,
	"step": 89
	},
	{
	"epoch": 0.1056338028169014,
	"grad_norm": 0.3166862726211548,
	"learning_rate": 0.00018016528925619835,
	"loss": 0.885,
	"step": 90
	},
	{
	"epoch": 0.1068075117370892,
	"grad_norm": 0.3260084092617035,
	"learning_rate": 0.00017992916174734358,
	"loss": 0.8908,
	"step": 91
	},
	{
	"epoch": 0.107981220657277,
	"grad_norm": 0.32791605591773987,
	"learning_rate": 0.0001796930342384888,
	"loss": 0.8822,
	"step": 92
	},
	{
	"epoch": 0.10915492957746478,
	"grad_norm": 0.31909653544425964,
	"learning_rate": 0.000179456906729634,
	"loss": 0.8463,
	"step": 93
	},
	{
	"epoch": 0.11032863849765258,
	"grad_norm": 0.3413308262825012,
	"learning_rate": 0.00017922077922077922,
	"loss": 0.9232,
	"step": 94
	},
	{
	"epoch": 0.11150234741784038,
	"grad_norm": 0.32644134759902954,
	"learning_rate": 0.00017898465171192445,
	"loss": 0.9113,
	"step": 95
	},
	{
	"epoch": 0.11267605633802817,
	"grad_norm": 0.33090126514434814,
	"learning_rate": 0.00017874852420306965,
	"loss": 0.9286,
	"step": 96
	},
	{
	"epoch": 0.11384976525821597,
	"grad_norm": 0.37200361490249634,
	"learning_rate": 0.00017851239669421489,
	"loss": 0.9239,
	"step": 97
	},
	{
	"epoch": 0.11502347417840375,
	"grad_norm": 0.3274000585079193,
	"learning_rate": 0.00017827626918536012,
	"loss": 0.9038,
	"step": 98
	},
	{
	"epoch": 0.11619718309859155,
	"grad_norm": 0.3768482506275177,
	"learning_rate": 0.00017804014167650532,
	"loss": 0.8558,
	"step": 99
	},
	{
	"epoch": 0.11737089201877934,
	"grad_norm": 0.32970595359802246,
	"learning_rate": 0.00017780401416765053,
	"loss": 0.9057,
	"step": 100
	},
	{
	"epoch": 0.11854460093896714,
	"grad_norm": 0.37230944633483887,
	"learning_rate": 0.00017756788665879576,
	"loss": 0.9211,
	"step": 101
	},
	{
	"epoch": 0.11971830985915492,
	"grad_norm": 0.352201372385025,
	"learning_rate": 0.000177331759149941,
	"loss": 0.9497,
	"step": 102
	},
	{
	"epoch": 0.12089201877934272,
	"grad_norm": 0.363364577293396,
	"learning_rate": 0.0001770956316410862,
	"loss": 0.9535,
	"step": 103
	},
	{
	"epoch": 0.12206572769953052,
	"grad_norm": 0.3388724625110626,
	"learning_rate": 0.00017685950413223143,
	"loss": 0.8908,
	"step": 104
	},
	{
	"epoch": 0.12323943661971831,
	"grad_norm": 0.34684258699417114,
	"learning_rate": 0.00017662337662337663,
	"loss": 0.8981,
	"step": 105
	},
	{
	"epoch": 0.12441314553990611,
	"grad_norm": 0.31892621517181396,
	"learning_rate": 0.00017638724911452183,
	"loss": 0.8461,
	"step": 106
	},
	{
	"epoch": 0.1255868544600939,
	"grad_norm": 0.32913845777511597,
	"learning_rate": 0.00017615112160566707,
	"loss": 0.9087,
	"step": 107
	},
	{
	"epoch": 0.1267605633802817,
	"grad_norm": 0.3695410490036011,
	"learning_rate": 0.0001759149940968123,
	"loss": 0.8899,
	"step": 108
	},
	{
	"epoch": 0.12793427230046947,
	"grad_norm": 0.3455798923969269,
	"learning_rate": 0.0001756788665879575,
	"loss": 0.9045,
	"step": 109
	},
	{
	"epoch": 0.12910798122065728,
	"grad_norm": 0.3612275719642639,
	"learning_rate": 0.0001754427390791027,
	"loss": 0.8861,
	"step": 110
	},
	{
	"epoch": 0.13028169014084506,
	"grad_norm": 0.4106651544570923,
	"learning_rate": 0.00017520661157024794,
	"loss": 0.9152,
	"step": 111
	},
	{
	"epoch": 0.13145539906103287,
	"grad_norm": 0.3604993224143982,
	"learning_rate": 0.00017497048406139317,
	"loss": 0.9141,
	"step": 112
	},
	{
	"epoch": 0.13262910798122066,
	"grad_norm": 0.3496919870376587,
	"learning_rate": 0.00017473435655253837,
	"loss": 0.9061,
	"step": 113
	},
	{
	"epoch": 0.13380281690140844,
	"grad_norm": 0.33643972873687744,
	"learning_rate": 0.0001744982290436836,
	"loss": 0.8877,
	"step": 114
	},
	{
	"epoch": 0.13497652582159625,
	"grad_norm": 0.33064204454421997,
	"learning_rate": 0.00017426210153482884,
	"loss": 0.8967,
	"step": 115
	},
	{
	"epoch": 0.13615023474178403,
	"grad_norm": 0.37868356704711914,
	"learning_rate": 0.00017402597402597401,
	"loss": 0.8957,
	"step": 116
	},
	{
	"epoch": 0.13732394366197184,
	"grad_norm": 0.34379109740257263,
	"learning_rate": 0.00017378984651711925,
	"loss": 0.9332,
	"step": 117
	},
	{
	"epoch": 0.13849765258215962,
	"grad_norm": 0.37193912267684937,
	"learning_rate": 0.00017355371900826448,
	"loss": 0.9513,
	"step": 118
	},
	{
	"epoch": 0.1396713615023474,
	"grad_norm": 0.33701232075691223,
	"learning_rate": 0.00017331759149940968,
	"loss": 0.8946,
	"step": 119
	},
	{
	"epoch": 0.14084507042253522,
	"grad_norm": 0.35765206813812256,
	"learning_rate": 0.0001730814639905549,
	"loss": 0.8931,
	"step": 120
	},
	{
	"epoch": 0.142018779342723,
	"grad_norm": 0.3511311411857605,
	"learning_rate": 0.00017284533648170012,
	"loss": 0.9042,
	"step": 121
	},
	{
	"epoch": 0.1431924882629108,
	"grad_norm": 0.33516445755958557,
	"learning_rate": 0.00017260920897284535,
	"loss": 0.8564,
	"step": 122
	},
	{
	"epoch": 0.1443661971830986,
	"grad_norm": 0.385959267616272,
	"learning_rate": 0.00017237308146399055,
	"loss": 0.963,
	"step": 123
	},
	{
	"epoch": 0.14553990610328638,
	"grad_norm": 0.34608641266822815,
	"learning_rate": 0.00017213695395513578,
	"loss": 0.8666,
	"step": 124
	},
	{
	"epoch": 0.1467136150234742,
	"grad_norm": 0.3705556392669678,
	"learning_rate": 0.00017190082644628102,
	"loss": 0.7783,
	"step": 125
	},
	{
	"epoch": 0.14788732394366197,
	"grad_norm": 0.3213210701942444,
	"learning_rate": 0.00017166469893742622,
	"loss": 0.8428,
	"step": 126
	},
	{
	"epoch": 0.14906103286384975,
	"grad_norm": 0.3903498351573944,
	"learning_rate": 0.00017142857142857143,
	"loss": 0.8418,
	"step": 127
	},
	{
	"epoch": 0.15023474178403756,
	"grad_norm": 0.3556365668773651,
	"learning_rate": 0.00017119244391971666,
	"loss": 0.8612,
	"step": 128
	},
	{
	"epoch": 0.15140845070422534,
	"grad_norm": 0.3734995424747467,
	"learning_rate": 0.00017095631641086186,
	"loss": 0.8845,
	"step": 129
	},
	{
	"epoch": 0.15258215962441316,
	"grad_norm": 0.33735260367393494,
	"learning_rate": 0.0001707201889020071,
	"loss": 0.8752,
	"step": 130
	},
	{
	"epoch": 0.15375586854460094,
	"grad_norm": 0.38340267539024353,
	"learning_rate": 0.00017048406139315232,
	"loss": 0.8847,
	"step": 131
	},
	{
	"epoch": 0.15492957746478872,
	"grad_norm": 0.3654419779777527,
	"learning_rate": 0.00017024793388429753,
	"loss": 0.8448,
	"step": 132
	},
	{
	"epoch": 0.15610328638497653,
	"grad_norm": 0.3601568341255188,
	"learning_rate": 0.00017001180637544273,
	"loss": 0.8981,
	"step": 133
	},
	{
	"epoch": 0.1572769953051643,
	"grad_norm": 0.40733832120895386,
	"learning_rate": 0.00016977567886658796,
	"loss": 0.9135,
	"step": 134
	},
	{
	"epoch": 0.15845070422535212,
	"grad_norm": 0.34627673029899597,
	"learning_rate": 0.0001695395513577332,
	"loss": 0.9164,
	"step": 135
	},
	{
	"epoch": 0.1596244131455399,
	"grad_norm": 0.3865872621536255,
	"learning_rate": 0.0001693034238488784,
	"loss": 0.9222,
	"step": 136
	},
	{
	"epoch": 0.1607981220657277,
	"grad_norm": 0.4011456072330475,
	"learning_rate": 0.00016906729634002363,
	"loss": 0.8843,
	"step": 137
	},
	{
	"epoch": 0.1619718309859155,
	"grad_norm": 0.32259878516197205,
	"learning_rate": 0.00016883116883116884,
	"loss": 0.8427,
	"step": 138
	},
	{
	"epoch": 0.16314553990610328,
	"grad_norm": 0.3807618319988251,
	"learning_rate": 0.00016859504132231404,
	"loss": 0.8684,
	"step": 139
	},
	{
	"epoch": 0.1643192488262911,
	"grad_norm": 0.3658106327056885,
	"learning_rate": 0.00016835891381345927,
	"loss": 0.9024,
	"step": 140
	},
	{
	"epoch": 0.16549295774647887,
	"grad_norm": 0.3638787865638733,
	"learning_rate": 0.0001681227863046045,
	"loss": 0.8582,
	"step": 141
	},
	{
	"epoch": 0.16666666666666666,
	"grad_norm": 0.3839091360569,
	"learning_rate": 0.0001678866587957497,
	"loss": 0.8543,
	"step": 142
	},
	{
	"epoch": 0.16784037558685447,
	"grad_norm": 0.33579927682876587,
	"learning_rate": 0.00016765053128689494,
	"loss": 0.8765,
	"step": 143
	},
	{
	"epoch": 0.16901408450704225,
	"grad_norm": 0.35091203451156616,
	"learning_rate": 0.00016741440377804014,
	"loss": 0.8504,
	"step": 144
	},
	{
	"epoch": 0.17018779342723006,
	"grad_norm": 0.35823047161102295,
	"learning_rate": 0.00016717827626918538,
	"loss": 0.8534,
	"step": 145
	},
	{
	"epoch": 0.17136150234741784,
	"grad_norm": 0.37154486775398254,
	"learning_rate": 0.00016694214876033058,
	"loss": 0.851,
	"step": 146
	},
	{
	"epoch": 0.17253521126760563,
	"grad_norm": 0.33140066266059875,
	"learning_rate": 0.0001667060212514758,
	"loss": 0.8136,
	"step": 147
	},
	{
	"epoch": 0.17370892018779344,
	"grad_norm": 0.37408292293548584,
	"learning_rate": 0.00016646989374262104,
	"loss": 0.8933,
	"step": 148
	},
	{
	"epoch": 0.17488262910798122,
	"grad_norm": 0.36203357577323914,
	"learning_rate": 0.00016623376623376625,
	"loss": 0.8747,
	"step": 149
	},
	{
	"epoch": 0.176056338028169,
	"grad_norm": 0.35033532977104187,
	"learning_rate": 0.00016599763872491145,
	"loss": 0.8273,
	"step": 150
	},
	{
	"epoch": 0.1772300469483568,
	"grad_norm": 0.345048189163208,
	"learning_rate": 0.00016576151121605668,
	"loss": 0.8698,
	"step": 151
	},
	{
	"epoch": 0.1784037558685446,
	"grad_norm": 0.3592989146709442,
	"learning_rate": 0.0001655253837072019,
	"loss": 0.8483,
	"step": 152
	},
	{
	"epoch": 0.1795774647887324,
	"grad_norm": 0.3685864806175232,
	"learning_rate": 0.00016528925619834712,
	"loss": 0.915,
	"step": 153
	},
	{
	"epoch": 0.1807511737089202,
	"grad_norm": 0.3427909314632416,
	"learning_rate": 0.00016505312868949235,
	"loss": 0.8321,
	"step": 154
	},
	{
	"epoch": 0.18192488262910797,
	"grad_norm": 0.34697192907333374,
	"learning_rate": 0.00016481700118063756,
	"loss": 0.8801,
	"step": 155
	},
	{
	"epoch": 0.18309859154929578,
	"grad_norm": 0.3387276530265808,
	"learning_rate": 0.00016458087367178276,
	"loss": 0.8237,
	"step": 156
	},
	{
	"epoch": 0.18427230046948356,
	"grad_norm": 0.3547775447368622,
	"learning_rate": 0.000164344746162928,
	"loss": 0.8645,
	"step": 157
	},
	{
	"epoch": 0.18544600938967137,
	"grad_norm": 0.3342725932598114,
	"learning_rate": 0.00016410861865407322,
	"loss": 0.82,
	"step": 158
	},
	{
	"epoch": 0.18661971830985916,
	"grad_norm": 0.4317960739135742,
	"learning_rate": 0.00016387249114521843,
	"loss": 0.8614,
	"step": 159
	},
	{
	"epoch": 0.18779342723004694,
	"grad_norm": 0.35031062364578247,
	"learning_rate": 0.00016363636363636366,
	"loss": 0.8193,
	"step": 160
	},
	{
	"epoch": 0.18896713615023475,
	"grad_norm": 0.3616986572742462,
	"learning_rate": 0.00016340023612750886,
	"loss": 0.8571,
	"step": 161
	},
	{
	"epoch": 0.19014084507042253,
	"grad_norm": 0.36284518241882324,
	"learning_rate": 0.00016316410861865407,
	"loss": 0.8555,
	"step": 162
	},
	{
	"epoch": 0.19131455399061034,
	"grad_norm": 0.42962291836738586,
	"learning_rate": 0.0001629279811097993,
	"loss": 0.8574,
	"step": 163
	},
	{
	"epoch": 0.19248826291079812,
	"grad_norm": 0.330268532037735,
	"learning_rate": 0.00016269185360094453,
	"loss": 0.8952,
	"step": 164
	},
	{
	"epoch": 0.1936619718309859,
	"grad_norm": 0.33917295932769775,
	"learning_rate": 0.00016245572609208974,
	"loss": 0.8588,
	"step": 165
	},
	{
	"epoch": 0.19483568075117372,
	"grad_norm": 0.3963412046432495,
	"learning_rate": 0.00016221959858323494,
	"loss": 0.8451,
	"step": 166
	},
	{
	"epoch": 0.1960093896713615,
	"grad_norm": 0.33864182233810425,
	"learning_rate": 0.00016198347107438017,
	"loss": 0.8734,
	"step": 167
	},
	{
	"epoch": 0.19718309859154928,
	"grad_norm": 0.3751653730869293,
	"learning_rate": 0.00016174734356552538,
	"loss": 0.8786,
	"step": 168
	},
	{
	"epoch": 0.1983568075117371,
	"grad_norm": 0.4138842821121216,
	"learning_rate": 0.0001615112160566706,
	"loss": 0.8608,
	"step": 169
	},
	{
	"epoch": 0.19953051643192488,
	"grad_norm": 0.3747748136520386,
	"learning_rate": 0.00016127508854781584,
	"loss": 0.8901,
	"step": 170
	},
	{
	"epoch": 0.2007042253521127,
	"grad_norm": 0.3302014172077179,
	"learning_rate": 0.00016103896103896104,
	"loss": 0.8538,
	"step": 171
	},
	{
	"epoch": 0.20187793427230047,
	"grad_norm": 0.36144372820854187,
	"learning_rate": 0.00016080283353010625,
	"loss": 0.8634,
	"step": 172
	},
	{
	"epoch": 0.20305164319248825,
	"grad_norm": 0.3579455018043518,
	"learning_rate": 0.00016056670602125148,
	"loss": 0.8536,
	"step": 173
	},
	{
	"epoch": 0.20422535211267606,
	"grad_norm": 0.3475671410560608,
	"learning_rate": 0.0001603305785123967,
	"loss": 0.8304,
	"step": 174
	},
	{
	"epoch": 0.20539906103286384,
	"grad_norm": 0.34114810824394226,
	"learning_rate": 0.00016009445100354192,
	"loss": 0.8276,
	"step": 175
	},
	{
	"epoch": 0.20657276995305165,
	"grad_norm": 0.32198190689086914,
	"learning_rate": 0.00015985832349468715,
	"loss": 0.815,
	"step": 176
	},
	{
	"epoch": 0.20774647887323944,
	"grad_norm": 0.4003874361515045,
	"learning_rate": 0.00015962219598583238,
	"loss": 0.8523,
	"step": 177
	},
	{
	"epoch": 0.20892018779342722,
	"grad_norm": 0.32290229201316833,
	"learning_rate": 0.00015938606847697756,
	"loss": 0.8465,
	"step": 178
	},
	{
	"epoch": 0.21009389671361503,
	"grad_norm": 0.35729506611824036,
	"learning_rate": 0.0001591499409681228,
	"loss": 0.8437,
	"step": 179
	},
	{
	"epoch": 0.2112676056338028,
	"grad_norm": 0.33743324875831604,
	"learning_rate": 0.00015891381345926802,
	"loss": 0.8351,
	"step": 180
	},
	{
	"epoch": 0.21244131455399062,
	"grad_norm": 0.34673774242401123,
	"learning_rate": 0.00015867768595041322,
	"loss": 0.8146,
	"step": 181
	},
	{
	"epoch": 0.2136150234741784,
	"grad_norm": 0.37883323431015015,
	"learning_rate": 0.00015844155844155845,
	"loss": 0.8889,
	"step": 182
	},
	{
	"epoch": 0.2147887323943662,
	"grad_norm": 0.34172534942626953,
	"learning_rate": 0.00015820543093270366,
	"loss": 0.8479,
	"step": 183
	},
	{
	"epoch": 0.215962441314554,
	"grad_norm": 0.39948219060897827,
	"learning_rate": 0.0001579693034238489,
	"loss": 0.8383,
	"step": 184
	},
	{
	"epoch": 0.21713615023474178,
	"grad_norm": 0.33746814727783203,
	"learning_rate": 0.0001577331759149941,
	"loss": 0.8713,
	"step": 185
	},
	{
	"epoch": 0.21830985915492956,
	"grad_norm": 0.34141069650650024,
	"learning_rate": 0.00015749704840613933,
	"loss": 0.8303,
	"step": 186
	},
	{
	"epoch": 0.21948356807511737,
	"grad_norm": 0.35994264483451843,
	"learning_rate": 0.00015726092089728456,
	"loss": 0.7919,
	"step": 187
	},
	{
	"epoch": 0.22065727699530516,
	"grad_norm": 0.34234684705734253,
	"learning_rate": 0.00015702479338842976,
	"loss": 0.8225,
	"step": 188
	},
	{
	"epoch": 0.22183098591549297,
	"grad_norm": 0.3601793050765991,
	"learning_rate": 0.00015678866587957497,
	"loss": 0.8395,
	"step": 189
	},
	{
	"epoch": 0.22300469483568075,
	"grad_norm": 0.3154338002204895,
	"learning_rate": 0.0001565525383707202,
	"loss": 0.7735,
	"step": 190
	},
	{
	"epoch": 0.22417840375586853,
	"grad_norm": 0.3758296072483063,
	"learning_rate": 0.0001563164108618654,
	"loss": 0.8241,
	"step": 191
	},
	{
	"epoch": 0.22535211267605634,
	"grad_norm": 0.3732200264930725,
	"learning_rate": 0.00015608028335301063,
	"loss": 0.8116,
	"step": 192
	},
	{
	"epoch": 0.22652582159624413,
	"grad_norm": 0.3601556718349457,
	"learning_rate": 0.00015584415584415587,
	"loss": 0.8242,
	"step": 193
	},
	{
	"epoch": 0.22769953051643194,
	"grad_norm": 0.360442191362381,
	"learning_rate": 0.00015560802833530107,
	"loss": 0.832,
	"step": 194
	},
	{
	"epoch": 0.22887323943661972,
	"grad_norm": 0.35598254203796387,
	"learning_rate": 0.00015537190082644627,
	"loss": 0.8938,
	"step": 195
	},
	{
	"epoch": 0.2300469483568075,
	"grad_norm": 0.3962613046169281,
	"learning_rate": 0.0001551357733175915,
	"loss": 0.8409,
	"step": 196
	},
	{
	"epoch": 0.2312206572769953,
	"grad_norm": 0.3521510064601898,
	"learning_rate": 0.00015489964580873674,
	"loss": 0.8298,
	"step": 197
	},
	{
	"epoch": 0.2323943661971831,
	"grad_norm": 0.34407946467399597,
	"learning_rate": 0.00015466351829988194,
	"loss": 0.7921,
	"step": 198
	},
	{
	"epoch": 0.2335680751173709,
	"grad_norm": 0.3572155237197876,
	"learning_rate": 0.00015442739079102717,
	"loss": 0.8997,
	"step": 199
	},
	{
	"epoch": 0.2347417840375587,
	"grad_norm": 0.345745712518692,
	"learning_rate": 0.00015419126328217238,
	"loss": 0.8563,
	"step": 200
	},
	{
	"epoch": 0.23591549295774647,
	"grad_norm": 0.3741077780723572,
	"learning_rate": 0.00015395513577331758,
	"loss": 0.8334,
	"step": 201
	},
	{
	"epoch": 0.23708920187793428,
	"grad_norm": 0.36866459250450134,
	"learning_rate": 0.00015371900826446281,
	"loss": 0.8398,
	"step": 202
	},
	{
	"epoch": 0.23826291079812206,
	"grad_norm": 0.3834739625453949,
	"learning_rate": 0.00015348288075560805,
	"loss": 0.8181,
	"step": 203
	},
	{
	"epoch": 0.23943661971830985,
	"grad_norm": 0.373045951128006,
	"learning_rate": 0.00015324675324675325,
	"loss": 0.8044,
	"step": 204
	},
	{
	"epoch": 0.24061032863849766,
	"grad_norm": 0.3418562412261963,
	"learning_rate": 0.00015301062573789848,
	"loss": 0.8454,
	"step": 205
	},
	{
	"epoch": 0.24178403755868544,
	"grad_norm": 0.36289098858833313,
	"learning_rate": 0.00015277449822904369,
	"loss": 0.8478,
	"step": 206
	},
	{
	"epoch": 0.24295774647887325,
	"grad_norm": 0.38806968927383423,
	"learning_rate": 0.00015253837072018892,
	"loss": 0.804,
	"step": 207
	},
	{
	"epoch": 0.24413145539906103,
	"grad_norm": 0.34217599034309387,
	"learning_rate": 0.00015230224321133412,
	"loss": 0.8391,
	"step": 208
	},
	{
	"epoch": 0.24530516431924881,
	"grad_norm": 0.3738957643508911,
	"learning_rate": 0.00015206611570247935,
	"loss": 0.9026,
	"step": 209
	},
	{
	"epoch": 0.24647887323943662,
	"grad_norm": 0.3481609523296356,
	"learning_rate": 0.00015182998819362458,
	"loss": 0.8674,
	"step": 210
	},
	{
	"epoch": 0.2476525821596244,
	"grad_norm": 0.38967254757881165,
	"learning_rate": 0.00015159386068476976,
	"loss": 0.8796,
	"step": 211
	},
	{
	"epoch": 0.24882629107981222,
	"grad_norm": 0.34841835498809814,
	"learning_rate": 0.000151357733175915,
	"loss": 0.7913,
	"step": 212
	},
	{
	"epoch": 0.25,
	"grad_norm": 0.33826395869255066,
	"learning_rate": 0.00015112160566706023,
	"loss": 0.8539,
	"step": 213
	},
	{
	"epoch": 0.2511737089201878,
	"grad_norm": 0.35131266713142395,
	"learning_rate": 0.00015088547815820543,
	"loss": 0.8072,
	"step": 214
	},
	{
	"epoch": 0.25234741784037557,
	"grad_norm": 0.3298250734806061,
	"learning_rate": 0.00015064935064935066,
	"loss": 0.7688,
	"step": 215
	},
	{
	"epoch": 0.2535211267605634,
	"grad_norm": 0.33808133006095886,
	"learning_rate": 0.0001504132231404959,
	"loss": 0.7609,
	"step": 216
	},
	{
	"epoch": 0.2546948356807512,
	"grad_norm": 0.37146687507629395,
	"learning_rate": 0.0001501770956316411,
	"loss": 0.843,
	"step": 217
	},
	{
	"epoch": 0.25586854460093894,
	"grad_norm": 0.33817118406295776,
	"learning_rate": 0.0001499409681227863,
	"loss": 0.7828,
	"step": 218
	},
	{
	"epoch": 0.25704225352112675,
	"grad_norm": 0.35203686356544495,
	"learning_rate": 0.00014970484061393153,
	"loss": 0.8236,
	"step": 219
	},
	{
	"epoch": 0.25821596244131456,
	"grad_norm": 0.34176716208457947,
	"learning_rate": 0.00014946871310507676,
	"loss": 0.8191,
	"step": 220
	},
	{
	"epoch": 0.25938967136150237,
	"grad_norm": 0.34649035334587097,
	"learning_rate": 0.00014923258559622197,
	"loss": 0.8284,
	"step": 221
	},
	{
	"epoch": 0.2605633802816901,
	"grad_norm": 0.35891467332839966,
	"learning_rate": 0.00014899645808736717,
	"loss": 0.8149,
	"step": 222
	},
	{
	"epoch": 0.26173708920187794,
	"grad_norm": 0.3408451974391937,
	"learning_rate": 0.0001487603305785124,
	"loss": 0.8049,
	"step": 223
	},
	{
	"epoch": 0.26291079812206575,
	"grad_norm": 0.36554664373397827,
	"learning_rate": 0.0001485242030696576,
	"loss": 0.8478,
	"step": 224
	},
	{
	"epoch": 0.2640845070422535,
	"grad_norm": 0.3355228304862976,
	"learning_rate": 0.00014828807556080284,
	"loss": 0.815,
	"step": 225
	},
	{
	"epoch": 0.2652582159624413,
	"grad_norm": 0.3500598669052124,
	"learning_rate": 0.00014805194805194807,
	"loss": 0.8571,
	"step": 226
	},
	{
	"epoch": 0.2664319248826291,
	"grad_norm": 0.3362652659416199,
	"learning_rate": 0.00014781582054309328,
	"loss": 0.8363,
	"step": 227
	},
	{
	"epoch": 0.2676056338028169,
	"grad_norm": 0.34258243441581726,
	"learning_rate": 0.00014757969303423848,
	"loss": 0.7648,
	"step": 228
	},
	{
	"epoch": 0.2687793427230047,
	"grad_norm": 0.34023317694664,
	"learning_rate": 0.0001473435655253837,
	"loss": 0.8373,
	"step": 229
	},
	{
	"epoch": 0.2699530516431925,
	"grad_norm": 0.35829535126686096,
	"learning_rate": 0.00014710743801652894,
	"loss": 0.8255,
	"step": 230
	},
	{
	"epoch": 0.2711267605633803,
	"grad_norm": 0.3499360978603363,
	"learning_rate": 0.00014687131050767415,
	"loss": 0.8514,
	"step": 231
	},
	{
	"epoch": 0.27230046948356806,
	"grad_norm": 0.3703480362892151,
	"learning_rate": 0.00014663518299881938,
	"loss": 0.8615,
	"step": 232
	},
	{
	"epoch": 0.2734741784037559,
	"grad_norm": 0.3460928499698639,
	"learning_rate": 0.0001463990554899646,
	"loss": 0.7891,
	"step": 233
	},
	{
	"epoch": 0.2746478873239437,
	"grad_norm": 0.34184372425079346,
	"learning_rate": 0.0001461629279811098,
	"loss": 0.8168,
	"step": 234
	},
	{
	"epoch": 0.27582159624413144,
	"grad_norm": 0.34520068764686584,
	"learning_rate": 0.00014592680047225502,
	"loss": 0.8271,
	"step": 235
	},
	{
	"epoch": 0.27699530516431925,
	"grad_norm": 0.3415423631668091,
	"learning_rate": 0.00014569067296340025,
	"loss": 0.783,
	"step": 236
	},
	{
	"epoch": 0.27816901408450706,
	"grad_norm": 0.34584441781044006,
	"learning_rate": 0.00014545454545454546,
	"loss": 0.8488,
	"step": 237
	},
	{
	"epoch": 0.2793427230046948,
	"grad_norm": 0.33898866176605225,
	"learning_rate": 0.0001452184179456907,
	"loss": 0.8786,
	"step": 238
	},
	{
	"epoch": 0.2805164319248826,
	"grad_norm": 0.3591814339160919,
	"learning_rate": 0.0001449822904368359,
	"loss": 0.8081,
	"step": 239
	},
	{
	"epoch": 0.28169014084507044,
	"grad_norm": 0.34305432438850403,
	"learning_rate": 0.0001447461629279811,
	"loss": 0.7911,
	"step": 240
	},
	{
	"epoch": 0.2828638497652582,
	"grad_norm": 0.35866865515708923,
	"learning_rate": 0.00014451003541912633,
	"loss": 0.8393,
	"step": 241
	},
	{
	"epoch": 0.284037558685446,
	"grad_norm": 0.3422331213951111,
	"learning_rate": 0.00014427390791027156,
	"loss": 0.848,
	"step": 242
	},
	{
	"epoch": 0.2852112676056338,
	"grad_norm": 0.33504337072372437,
	"learning_rate": 0.00014403778040141676,
	"loss": 0.7782,
	"step": 243
	},
	{
	"epoch": 0.2863849765258216,
	"grad_norm": 0.3509252667427063,
	"learning_rate": 0.000143801652892562,
	"loss": 0.8535,
	"step": 244
	},
	{
	"epoch": 0.2875586854460094,
	"grad_norm": 0.3254059851169586,
	"learning_rate": 0.0001435655253837072,
	"loss": 0.7642,
	"step": 245
	},
	{
	"epoch": 0.2887323943661972,
	"grad_norm": 0.33594879508018494,
	"learning_rate": 0.00014332939787485243,
	"loss": 0.814,
	"step": 246
	},
	{
	"epoch": 0.289906103286385,
	"grad_norm": 0.3620656132698059,
	"learning_rate": 0.00014309327036599764,
	"loss": 0.8248,
	"step": 247
	},
	{
	"epoch": 0.29107981220657275,
	"grad_norm": 0.3325202167034149,
	"learning_rate": 0.00014285714285714287,
	"loss": 0.7408,
	"step": 248
	},
	{
	"epoch": 0.29225352112676056,
	"grad_norm": 0.33905264735221863,
	"learning_rate": 0.0001426210153482881,
	"loss": 0.8446,
	"step": 249
	},
	{
	"epoch": 0.2934272300469484,
	"grad_norm": 0.3577309548854828,
	"learning_rate": 0.0001423848878394333,
	"loss": 0.784,
	"step": 250
	},
	{
	"epoch": 0.29460093896713613,
	"grad_norm": 0.3840247392654419,
	"learning_rate": 0.0001421487603305785,
	"loss": 0.8068,
	"step": 251
	},
	{
	"epoch": 0.29577464788732394,
	"grad_norm": 0.3539847433567047,
	"learning_rate": 0.00014191263282172374,
	"loss": 0.8232,
	"step": 252
	},
	{
	"epoch": 0.29694835680751175,
	"grad_norm": 0.33225932717323303,
	"learning_rate": 0.00014167650531286894,
	"loss": 0.7946,
	"step": 253
	},
	{
	"epoch": 0.2981220657276995,
	"grad_norm": 0.3429291546344757,
	"learning_rate": 0.00014144037780401418,
	"loss": 0.816,
	"step": 254
	},
	{
	"epoch": 0.2992957746478873,
	"grad_norm": 0.3584197163581848,
	"learning_rate": 0.0001412042502951594,
	"loss": 0.8351,
	"step": 255
	},
	{
	"epoch": 0.3004694835680751,
	"grad_norm": 0.35585007071495056,
	"learning_rate": 0.0001409681227863046,
	"loss": 0.8255,
	"step": 256
	},
	{
	"epoch": 0.30164319248826293,
	"grad_norm": 0.3510012924671173,
	"learning_rate": 0.00014073199527744982,
	"loss": 0.7889,
	"step": 257
	},
	{
	"epoch": 0.3028169014084507,
	"grad_norm": 0.36646419763565063,
	"learning_rate": 0.00014049586776859505,
	"loss": 0.8161,
	"step": 258
	},
	{
	"epoch": 0.3039906103286385,
	"grad_norm": 0.35207659006118774,
	"learning_rate": 0.00014025974025974028,
	"loss": 0.8151,
	"step": 259
	},
	{
	"epoch": 0.3051643192488263,
	"grad_norm": 0.33348143100738525,
	"learning_rate": 0.00014002361275088548,
	"loss": 0.8108,
	"step": 260
	},
	{
	"epoch": 0.30633802816901406,
	"grad_norm": 0.3474767506122589,
	"learning_rate": 0.00013978748524203072,
	"loss": 0.8105,
	"step": 261
	},
	{
	"epoch": 0.3075117370892019,
	"grad_norm": 0.37046462297439575,
	"learning_rate": 0.00013955135773317592,
	"loss": 0.867,
	"step": 262
	},
	{
	"epoch": 0.3086854460093897,
	"grad_norm": 0.3426377475261688,
	"learning_rate": 0.00013931523022432112,
	"loss": 0.8281,
	"step": 263
	},
	{
	"epoch": 0.30985915492957744,
	"grad_norm": 0.3340952694416046,
	"learning_rate": 0.00013907910271546636,
	"loss": 0.7805,
	"step": 264
	},
	{
	"epoch": 0.31103286384976525,
	"grad_norm": 0.3546634316444397,
	"learning_rate": 0.0001388429752066116,
	"loss": 0.824,
	"step": 265
	},
	{
	"epoch": 0.31220657276995306,
	"grad_norm": 0.3211507499217987,
	"learning_rate": 0.0001386068476977568,
	"loss": 0.7572,
	"step": 266
	},
	{
	"epoch": 0.31338028169014087,
	"grad_norm": 0.3440265357494354,
	"learning_rate": 0.000138370720188902,
	"loss": 0.8247,
	"step": 267
	},
	{
	"epoch": 0.3145539906103286,
	"grad_norm": 0.34174132347106934,
	"learning_rate": 0.00013813459268004723,
	"loss": 0.7939,
	"step": 268
	},
	{
	"epoch": 0.31572769953051644,
	"grad_norm": 0.3415057361125946,
	"learning_rate": 0.00013789846517119246,
	"loss": 0.8184,
	"step": 269
	},
	{
	"epoch": 0.31690140845070425,
	"grad_norm": 0.3313206732273102,
	"learning_rate": 0.00013766233766233766,
	"loss": 0.7936,
	"step": 270
	},
	{
	"epoch": 0.318075117370892,
	"grad_norm": 0.35693395137786865,
	"learning_rate": 0.0001374262101534829,
	"loss": 0.7738,
	"step": 271
	},
	{
	"epoch": 0.3192488262910798,
	"grad_norm": 0.3530910313129425,
	"learning_rate": 0.00013719008264462813,
	"loss": 0.7901,
	"step": 272
	},
	{
	"epoch": 0.3204225352112676,
	"grad_norm": 0.34867924451828003,
	"learning_rate": 0.0001369539551357733,
	"loss": 0.8281,
	"step": 273
	},
	{
	"epoch": 0.3215962441314554,
	"grad_norm": 0.34141889214515686,
	"learning_rate": 0.00013671782762691854,
	"loss": 0.7987,
	"step": 274
	},
	{
	"epoch": 0.3227699530516432,
	"grad_norm": 0.3511849045753479,
	"learning_rate": 0.00013648170011806377,
	"loss": 0.8306,
	"step": 275
	},
	{
	"epoch": 0.323943661971831,
	"grad_norm": 0.343523770570755,
	"learning_rate": 0.00013624557260920897,
	"loss": 0.7813,
	"step": 276
	},
	{
	"epoch": 0.32511737089201875,
	"grad_norm": 0.3539726138114929,
	"learning_rate": 0.0001360094451003542,
	"loss": 0.8258,
	"step": 277
	},
	{
	"epoch": 0.32629107981220656,
	"grad_norm": 0.35628989338874817,
	"learning_rate": 0.00013577331759149943,
	"loss": 0.829,
	"step": 278
	},
	{
	"epoch": 0.3274647887323944,
	"grad_norm": 0.3531114459037781,
	"learning_rate": 0.00013553719008264464,
	"loss": 0.8475,
	"step": 279
	},
	{
	"epoch": 0.3286384976525822,
	"grad_norm": 0.35344576835632324,
	"learning_rate": 0.00013530106257378984,
	"loss": 0.8343,
	"step": 280
	},
	{
	"epoch": 0.32981220657276994,
	"grad_norm": 0.37604016065597534,
	"learning_rate": 0.00013506493506493507,
	"loss": 0.7598,
	"step": 281
	},
	{
	"epoch": 0.33098591549295775,
	"grad_norm": 0.35646241903305054,
	"learning_rate": 0.0001348288075560803,
	"loss": 0.83,
	"step": 282
	},
	{
	"epoch": 0.33215962441314556,
	"grad_norm": 0.36084675788879395,
	"learning_rate": 0.0001345926800472255,
	"loss": 0.7465,
	"step": 283
	},
	{
	"epoch": 0.3333333333333333,
	"grad_norm": 0.3514406085014343,
	"learning_rate": 0.00013435655253837071,
	"loss": 0.7979,
	"step": 284
	},
	{
	"epoch": 0.3345070422535211,
	"grad_norm": 0.3554603159427643,
	"learning_rate": 0.00013412042502951595,
	"loss": 0.8487,
	"step": 285
	},
	{
	"epoch": 0.33568075117370894,
	"grad_norm": 0.3360341787338257,
	"learning_rate": 0.00013388429752066115,
	"loss": 0.7787,
	"step": 286
	},
	{
	"epoch": 0.3368544600938967,
	"grad_norm": 0.35026323795318604,
	"learning_rate": 0.00013364817001180638,
	"loss": 0.7845,
	"step": 287
	},
	{
	"epoch": 0.3380281690140845,
	"grad_norm": 0.3419228494167328,
	"learning_rate": 0.00013341204250295161,
	"loss": 0.7971,
	"step": 288
	},
	{
	"epoch": 0.3392018779342723,
	"grad_norm": 0.3314400315284729,
	"learning_rate": 0.00013317591499409682,
	"loss": 0.7899,
	"step": 289
	},
	{
	"epoch": 0.3403755868544601,
	"grad_norm": 0.3434331715106964,
	"learning_rate": 0.00013293978748524202,
	"loss": 0.827,
	"step": 290
	},
	{
	"epoch": 0.3415492957746479,
	"grad_norm": 0.34718382358551025,
	"learning_rate": 0.00013270365997638725,
	"loss": 0.7835,
	"step": 291
	},
	{
	"epoch": 0.3427230046948357,
	"grad_norm": 0.3585168421268463,
	"learning_rate": 0.00013246753246753249,
	"loss": 0.8728,
	"step": 292
	},
	{
	"epoch": 0.3438967136150235,
	"grad_norm": 0.3508673906326294,
	"learning_rate": 0.0001322314049586777,
	"loss": 0.836,
	"step": 293
	},
	{
	"epoch": 0.34507042253521125,
	"grad_norm": 0.40241560339927673,
	"learning_rate": 0.00013199527744982292,
	"loss": 0.8043,
	"step": 294
	},
	{
	"epoch": 0.34624413145539906,
	"grad_norm": 0.33775267004966736,
	"learning_rate": 0.00013175914994096813,
	"loss": 0.8047,
	"step": 295
	},
	{
	"epoch": 0.3474178403755869,
	"grad_norm": 0.3423898220062256,
	"learning_rate": 0.00013152302243211333,
	"loss": 0.7894,
	"step": 296
	},
	{
	"epoch": 0.3485915492957746,
	"grad_norm": 0.3472992479801178,
	"learning_rate": 0.00013128689492325856,
	"loss": 0.8198,
	"step": 297
	},
	{
	"epoch": 0.34976525821596244,
	"grad_norm": 0.3425481915473938,
	"learning_rate": 0.0001310507674144038,
	"loss": 0.8178,
	"step": 298
	},
	{
	"epoch": 0.35093896713615025,
	"grad_norm": 0.3459112048149109,
	"learning_rate": 0.000130814639905549,
	"loss": 0.7749,
	"step": 299
	},
	{
	"epoch": 0.352112676056338,
	"grad_norm": 0.353595495223999,
	"learning_rate": 0.00013057851239669423,
	"loss": 0.7886,
	"step": 300
	},
	{
	"epoch": 0.3532863849765258,
	"grad_norm": 0.35495465993881226,
	"learning_rate": 0.00013034238488783943,
	"loss": 0.771,
	"step": 301
	},
	{
	"epoch": 0.3544600938967136,
	"grad_norm": 0.34812483191490173,
	"learning_rate": 0.00013010625737898467,
	"loss": 0.8335,
	"step": 302
	},
	{
	"epoch": 0.35563380281690143,
	"grad_norm": 0.3655085861682892,
	"learning_rate": 0.00012987012987012987,
	"loss": 0.8117,
	"step": 303
	},
	{
	"epoch": 0.3568075117370892,
	"grad_norm": 0.35925915837287903,
	"learning_rate": 0.0001296340023612751,
	"loss": 0.8147,
	"step": 304
	},
	{
	"epoch": 0.357981220657277,
	"grad_norm": 0.3293222486972809,
	"learning_rate": 0.00012939787485242033,
	"loss": 0.7602,
	"step": 305
	},
	{
	"epoch": 0.3591549295774648,
	"grad_norm": 0.3486446738243103,
	"learning_rate": 0.00012916174734356554,
	"loss": 0.7857,
	"step": 306
	},
	{
	"epoch": 0.36032863849765256,
	"grad_norm": 0.382565975189209,
	"learning_rate": 0.00012892561983471074,
	"loss": 0.863,
	"step": 307
	},
	{
	"epoch": 0.3615023474178404,
	"grad_norm": 0.32544344663619995,
	"learning_rate": 0.00012868949232585597,
	"loss": 0.781,
	"step": 308
	},
	{
	"epoch": 0.3626760563380282,
	"grad_norm": 0.38700491189956665,
	"learning_rate": 0.00012845336481700118,
	"loss": 0.8102,
	"step": 309
	},
	{
	"epoch": 0.36384976525821594,
	"grad_norm": 0.3503759503364563,
	"learning_rate": 0.0001282172373081464,
	"loss": 0.7699,
	"step": 310
	},
	{
	"epoch": 0.36502347417840375,
	"grad_norm": 0.3323630094528198,
	"learning_rate": 0.00012798110979929164,
	"loss": 0.7511,
	"step": 311
	},
	{
	"epoch": 0.36619718309859156,
	"grad_norm": 0.3668995797634125,
	"learning_rate": 0.00012774498229043685,
	"loss": 0.7374,
	"step": 312
	},
	{
	"epoch": 0.3673708920187793,
	"grad_norm": 0.37373387813568115,
	"learning_rate": 0.00012750885478158205,
	"loss": 0.8077,
	"step": 313
	},
	{
	"epoch": 0.3685446009389671,
	"grad_norm": 0.3601135015487671,
	"learning_rate": 0.00012727272727272728,
	"loss": 0.7991,
	"step": 314
	},
	{
	"epoch": 0.36971830985915494,
	"grad_norm": 0.3527435064315796,
	"learning_rate": 0.00012703659976387249,
	"loss": 0.7971,
	"step": 315
	},
	{
	"epoch": 0.37089201877934275,
	"grad_norm": 0.3584372401237488,
	"learning_rate": 0.00012680047225501772,
	"loss": 0.7513,
	"step": 316
	},
	{
	"epoch": 0.3720657276995305,
	"grad_norm": 0.3517726957798004,
	"learning_rate": 0.00012656434474616295,
	"loss": 0.8206,
	"step": 317
	},
	{
	"epoch": 0.3732394366197183,
	"grad_norm": 0.3655302822589874,
	"learning_rate": 0.00012632821723730815,
	"loss": 0.771,
	"step": 318
	},
	{
	"epoch": 0.3744131455399061,
	"grad_norm": 0.3659893274307251,
	"learning_rate": 0.00012609208972845336,
	"loss": 0.8048,
	"step": 319
	},
	{
	"epoch": 0.3755868544600939,
	"grad_norm": 0.36364591121673584,
	"learning_rate": 0.0001258559622195986,
	"loss": 0.7832,
	"step": 320
	},
	{
	"epoch": 0.3767605633802817,
	"grad_norm": 0.37528395652770996,
	"learning_rate": 0.00012561983471074382,
	"loss": 0.7926,
	"step": 321
	},
	{
	"epoch": 0.3779342723004695,
	"grad_norm": 0.37137654423713684,
	"learning_rate": 0.00012538370720188903,
	"loss": 0.8486,
	"step": 322
	},
	{
	"epoch": 0.37910798122065725,
	"grad_norm": 0.3466728925704956,
	"learning_rate": 0.00012514757969303423,
	"loss": 0.7961,
	"step": 323
	},
	{
	"epoch": 0.38028169014084506,
	"grad_norm": 0.38629114627838135,
	"learning_rate": 0.00012491145218417946,
	"loss": 0.8071,
	"step": 324
	},
	{
	"epoch": 0.3814553990610329,
	"grad_norm": 0.34686383605003357,
	"learning_rate": 0.00012467532467532467,
	"loss": 0.7698,
	"step": 325
	},
	{
	"epoch": 0.3826291079812207,
	"grad_norm": 0.36625292897224426,
	"learning_rate": 0.0001244391971664699,
	"loss": 0.8486,
	"step": 326
	},
	{
	"epoch": 0.38380281690140844,
	"grad_norm": 0.38903650641441345,
	"learning_rate": 0.00012420306965761513,
	"loss": 0.8031,
	"step": 327
	},
	{
	"epoch": 0.38497652582159625,
	"grad_norm": 0.3456287980079651,
	"learning_rate": 0.00012396694214876033,
	"loss": 0.7887,
	"step": 328
	},
	{
	"epoch": 0.38615023474178406,
	"grad_norm": 0.36374613642692566,
	"learning_rate": 0.00012373081463990554,
	"loss": 0.7588,
	"step": 329
	},
	{
	"epoch": 0.3873239436619718,
	"grad_norm": 0.360626220703125,
	"learning_rate": 0.00012349468713105077,
	"loss": 0.8239,
	"step": 330
	},
	{
	"epoch": 0.3884976525821596,
	"grad_norm": 0.40213796496391296,
	"learning_rate": 0.000123258559622196,
	"loss": 0.8029,
	"step": 331
	},
	{
	"epoch": 0.38967136150234744,
	"grad_norm": 0.3273613750934601,
	"learning_rate": 0.0001230224321133412,
	"loss": 0.7567,
	"step": 332
	},
	{
	"epoch": 0.3908450704225352,
	"grad_norm": 0.34953057765960693,
	"learning_rate": 0.00012278630460448644,
	"loss": 0.7512,
	"step": 333
	},
	{
	"epoch": 0.392018779342723,
	"grad_norm": 0.34772762656211853,
	"learning_rate": 0.00012255017709563167,
	"loss": 0.7551,
	"step": 334
	},
	{
	"epoch": 0.3931924882629108,
	"grad_norm": 0.34170207381248474,
	"learning_rate": 0.00012231404958677685,
	"loss": 0.7884,
	"step": 335
	},
	{
	"epoch": 0.39436619718309857,
	"grad_norm": 0.3696103096008301,
	"learning_rate": 0.00012207792207792208,
	"loss": 0.8658,
	"step": 336
	},
	{
	"epoch": 0.3955399061032864,
	"grad_norm": 0.3513827621936798,
	"learning_rate": 0.00012184179456906731,
	"loss": 0.8199,
	"step": 337
	},
	{
	"epoch": 0.3967136150234742,
	"grad_norm": 0.3454856872558594,
	"learning_rate": 0.00012160566706021253,
	"loss": 0.7627,
	"step": 338
	},
	{
	"epoch": 0.397887323943662,
	"grad_norm": 0.3246639370918274,
	"learning_rate": 0.00012136953955135774,
	"loss": 0.7454,
	"step": 339
	},
	{
	"epoch": 0.39906103286384975,
	"grad_norm": 0.33567938208580017,
	"learning_rate": 0.00012113341204250295,
	"loss": 0.7611,
	"step": 340
	},
	{
	"epoch": 0.40023474178403756,
	"grad_norm": 0.33728334307670593,
	"learning_rate": 0.00012089728453364817,
	"loss": 0.7575,
	"step": 341
	},
	{
	"epoch": 0.4014084507042254,
	"grad_norm": 0.35161352157592773,
	"learning_rate": 0.0001206611570247934,
	"loss": 0.8117,
	"step": 342
	},
	{
	"epoch": 0.4025821596244131,
	"grad_norm": 0.3425585925579071,
	"learning_rate": 0.00012042502951593862,
	"loss": 0.8019,
	"step": 343
	},
	{
	"epoch": 0.40375586854460094,
	"grad_norm": 0.3406507968902588,
	"learning_rate": 0.00012018890200708383,
	"loss": 0.8235,
	"step": 344
	},
	{
	"epoch": 0.40492957746478875,
	"grad_norm": 0.37840309739112854,
	"learning_rate": 0.00011995277449822907,
	"loss": 0.7866,
	"step": 345
	},
	{
	"epoch": 0.4061032863849765,
	"grad_norm": 0.35816213488578796,
	"learning_rate": 0.00011971664698937426,
	"loss": 0.8425,
	"step": 346
	},
	{
	"epoch": 0.4072769953051643,
	"grad_norm": 0.3441546559333801,
	"learning_rate": 0.00011948051948051949,
	"loss": 0.8094,
	"step": 347
	},
	{
	"epoch": 0.4084507042253521,
	"grad_norm": 0.34275054931640625,
	"learning_rate": 0.0001192443919716647,
	"loss": 0.7244,
	"step": 348
	},
	{
	"epoch": 0.4096244131455399,
	"grad_norm": 0.33207401633262634,
	"learning_rate": 0.00011900826446280992,
	"loss": 0.8108,
	"step": 349
	},
	{
	"epoch": 0.4107981220657277,
	"grad_norm": 0.3412252962589264,
	"learning_rate": 0.00011877213695395516,
	"loss": 0.7818,
	"step": 350
	},
	{
	"epoch": 0.4119718309859155,
	"grad_norm": 0.36701643466949463,
	"learning_rate": 0.00011853600944510035,
	"loss": 0.8293,
	"step": 351
	},
	{
	"epoch": 0.4131455399061033,
	"grad_norm": 0.34462520480155945,
	"learning_rate": 0.00011829988193624558,
	"loss": 0.7603,
	"step": 352
	},
	{
	"epoch": 0.41431924882629106,
	"grad_norm": 0.35232508182525635,
	"learning_rate": 0.0001180637544273908,
	"loss": 0.7616,
	"step": 353
	},
	{
	"epoch": 0.4154929577464789,
	"grad_norm": 0.37428373098373413,
	"learning_rate": 0.00011782762691853601,
	"loss": 0.7919,
	"step": 354
	},
	{
	"epoch": 0.4166666666666667,
	"grad_norm": 0.3429507911205292,
	"learning_rate": 0.00011759149940968123,
	"loss": 0.7859,
	"step": 355
	},
	{
	"epoch": 0.41784037558685444,
	"grad_norm": 0.3584844470024109,
	"learning_rate": 0.00011735537190082646,
	"loss": 0.7934,
	"step": 356
	},
	{
	"epoch": 0.41901408450704225,
	"grad_norm": 0.356391578912735,
	"learning_rate": 0.00011711924439197165,
	"loss": 0.8222,
	"step": 357
	},
	{
	"epoch": 0.42018779342723006,
	"grad_norm": 0.3663417100906372,
	"learning_rate": 0.00011688311688311689,
	"loss": 0.7507,
	"step": 358
	},
	{
	"epoch": 0.4213615023474178,
	"grad_norm": 0.3388553559780121,
	"learning_rate": 0.0001166469893742621,
	"loss": 0.8263,
	"step": 359
	},
	{
	"epoch": 0.4225352112676056,
	"grad_norm": 0.34876593947410583,
	"learning_rate": 0.00011641086186540732,
	"loss": 0.7969,
	"step": 360
	},
	{
	"epoch": 0.42370892018779344,
	"grad_norm": 0.3500271737575531,
	"learning_rate": 0.00011617473435655255,
	"loss": 0.7789,
	"step": 361
	},
	{
	"epoch": 0.42488262910798125,
	"grad_norm": 0.3554798662662506,
	"learning_rate": 0.00011593860684769777,
	"loss": 0.7681,
	"step": 362
	},
	{
	"epoch": 0.426056338028169,
	"grad_norm": 0.34559762477874756,
	"learning_rate": 0.00011570247933884298,
	"loss": 0.7676,
	"step": 363
	},
	{
	"epoch": 0.4272300469483568,
	"grad_norm": 0.3520505726337433,
	"learning_rate": 0.0001154663518299882,
	"loss": 0.7494,
	"step": 364
	},
	{
	"epoch": 0.4284037558685446,
	"grad_norm": 0.35454803705215454,
	"learning_rate": 0.00011523022432113341,
	"loss": 0.7516,
	"step": 365
	},
	{
	"epoch": 0.4295774647887324,
	"grad_norm": 0.36526602506637573,
	"learning_rate": 0.00011499409681227864,
	"loss": 0.7789,
	"step": 366
	},
	{
	"epoch": 0.4307511737089202,
	"grad_norm": 0.34084445238113403,
	"learning_rate": 0.00011475796930342386,
	"loss": 0.7446,
	"step": 367
	},
	{
	"epoch": 0.431924882629108,
	"grad_norm": 0.3405500054359436,
	"learning_rate": 0.00011452184179456907,
	"loss": 0.8217,
	"step": 368
	},
	{
	"epoch": 0.43309859154929575,
	"grad_norm": 0.3523256182670593,
	"learning_rate": 0.00011428571428571428,
	"loss": 0.7311,
	"step": 369
	},
	{
	"epoch": 0.43427230046948356,
	"grad_norm": 0.3336530327796936,
	"learning_rate": 0.0001140495867768595,
	"loss": 0.7806,
	"step": 370
	},
	{
	"epoch": 0.4354460093896714,
	"grad_norm": 0.3268769383430481,
	"learning_rate": 0.00011381345926800473,
	"loss": 0.7945,
	"step": 371
	},
	{
	"epoch": 0.43661971830985913,
	"grad_norm": 0.35258617997169495,
	"learning_rate": 0.00011357733175914995,
	"loss": 0.7468,
	"step": 372
	},
	{
	"epoch": 0.43779342723004694,
	"grad_norm": 0.3546913266181946,
	"learning_rate": 0.00011334120425029517,
	"loss": 0.7921,
	"step": 373
	},
	{
	"epoch": 0.43896713615023475,
	"grad_norm": 0.36266180872917175,
	"learning_rate": 0.00011310507674144037,
	"loss": 0.7623,
	"step": 374
	},
	{
	"epoch": 0.44014084507042256,
	"grad_norm": 0.3355543613433838,
	"learning_rate": 0.00011286894923258559,
	"loss": 0.7436,
	"step": 375
	},
	{
	"epoch": 0.4413145539906103,
	"grad_norm": 0.33666127920150757,
	"learning_rate": 0.00011263282172373082,
	"loss": 0.7609,
	"step": 376
	},
	{
	"epoch": 0.4424882629107981,
	"grad_norm": 0.3505670428276062,
	"learning_rate": 0.00011239669421487604,
	"loss": 0.7868,
	"step": 377
	},
	{
	"epoch": 0.44366197183098594,
	"grad_norm": 0.3446255028247833,
	"learning_rate": 0.00011216056670602126,
	"loss": 0.765,
	"step": 378
	},
	{
	"epoch": 0.4448356807511737,
	"grad_norm": 0.3761040270328522,
	"learning_rate": 0.00011192443919716649,
	"loss": 0.8104,
	"step": 379
	},
	{
	"epoch": 0.4460093896713615,
	"grad_norm": 0.35692986845970154,
	"learning_rate": 0.00011168831168831168,
	"loss": 0.7896,
	"step": 380
	},
	{
	"epoch": 0.4471830985915493,
	"grad_norm": 0.34384050965309143,
	"learning_rate": 0.00011145218417945691,
	"loss": 0.7716,
	"step": 381
	},
	{
	"epoch": 0.44835680751173707,
	"grad_norm": 0.3477395176887512,
	"learning_rate": 0.00011121605667060213,
	"loss": 0.8146,
	"step": 382
	},
	{
	"epoch": 0.4495305164319249,
	"grad_norm": 0.35172998905181885,
	"learning_rate": 0.00011097992916174735,
	"loss": 0.7844,
	"step": 383
	},
	{
	"epoch": 0.4507042253521127,
	"grad_norm": 0.33881857991218567,
	"learning_rate": 0.00011074380165289258,
	"loss": 0.7528,
	"step": 384
	},
	{
	"epoch": 0.4518779342723005,
	"grad_norm": 0.3429534137248993,
	"learning_rate": 0.00011050767414403777,
	"loss": 0.7826,
	"step": 385
	},
	{
	"epoch": 0.45305164319248825,
	"grad_norm": 0.34472665190696716,
	"learning_rate": 0.000110271546635183,
	"loss": 0.7153,
	"step": 386
	},
	{
	"epoch": 0.45422535211267606,
	"grad_norm": 0.3572479486465454,
	"learning_rate": 0.00011003541912632822,
	"loss": 0.7811,
	"step": 387
	},
	{
	"epoch": 0.45539906103286387,
	"grad_norm": 0.3531682789325714,
	"learning_rate": 0.00010979929161747344,
	"loss": 0.8016,
	"step": 388
	},
	{
	"epoch": 0.4565727699530516,
	"grad_norm": 0.3845299780368805,
	"learning_rate": 0.00010956316410861867,
	"loss": 0.7817,
	"step": 389
	},
	{
	"epoch": 0.45774647887323944,
	"grad_norm": 0.35217660665512085,
	"learning_rate": 0.00010932703659976389,
	"loss": 0.7495,
	"step": 390
	},
	{
	"epoch": 0.45892018779342725,
	"grad_norm": 0.35103702545166016,
	"learning_rate": 0.00010909090909090909,
	"loss": 0.7602,
	"step": 391
	},
	{
	"epoch": 0.460093896713615,
	"grad_norm": 0.3511259853839874,
	"learning_rate": 0.00010885478158205431,
	"loss": 0.7923,
	"step": 392
	},
	{
	"epoch": 0.4612676056338028,
	"grad_norm": 0.33732983469963074,
	"learning_rate": 0.00010861865407319953,
	"loss": 0.7875,
	"step": 393
	},
	{
	"epoch": 0.4624413145539906,
	"grad_norm": 0.35035955905914307,
	"learning_rate": 0.00010838252656434476,
	"loss": 0.7737,
	"step": 394
	},
	{
	"epoch": 0.4636150234741784,
	"grad_norm": 0.3277076482772827,
	"learning_rate": 0.00010814639905548998,
	"loss": 0.7619,
	"step": 395
	},
	{
	"epoch": 0.4647887323943662,
	"grad_norm": 0.34461456537246704,
	"learning_rate": 0.00010791027154663518,
	"loss": 0.7394,
	"step": 396
	},
	{
	"epoch": 0.465962441314554,
	"grad_norm": 0.36000820994377136,
	"learning_rate": 0.0001076741440377804,
	"loss": 0.8004,
	"step": 397
	},
	{
	"epoch": 0.4671361502347418,
	"grad_norm": 0.3291054666042328,
	"learning_rate": 0.00010743801652892562,
	"loss": 0.721,
	"step": 398
	},
	{
	"epoch": 0.46830985915492956,
	"grad_norm": 0.37541574239730835,
	"learning_rate": 0.00010720188902007085,
	"loss": 0.7673,
	"step": 399
	},
	{
	"epoch": 0.4694835680751174,
	"grad_norm": 0.33268067240715027,
	"learning_rate": 0.00010696576151121607,
	"loss": 0.7439,
	"step": 400
	},
	{
	"epoch": 0.4706572769953052,
	"grad_norm": 0.34383484721183777,
	"learning_rate": 0.00010672963400236129,
	"loss": 0.7453,
	"step": 401
	},
	{
	"epoch": 0.47183098591549294,
	"grad_norm": 0.3543702960014343,
	"learning_rate": 0.00010649350649350649,
	"loss": 0.7544,
	"step": 402
	},
	{
	"epoch": 0.47300469483568075,
	"grad_norm": 0.34553685784339905,
	"learning_rate": 0.00010625737898465171,
	"loss": 0.7656,
	"step": 403
	},
	{
	"epoch": 0.47417840375586856,
	"grad_norm": 0.3437071144580841,
	"learning_rate": 0.00010602125147579694,
	"loss": 0.773,
	"step": 404
	},
	{
	"epoch": 0.4753521126760563,
	"grad_norm": 0.34917253255844116,
	"learning_rate": 0.00010578512396694216,
	"loss": 0.7607,
	"step": 405
	},
	{
	"epoch": 0.4765258215962441,
	"grad_norm": 0.33429262042045593,
	"learning_rate": 0.00010554899645808738,
	"loss": 0.768,
	"step": 406
	},
	{
	"epoch": 0.47769953051643194,
	"grad_norm": 0.33842045068740845,
	"learning_rate": 0.00010531286894923261,
	"loss": 0.7665,
	"step": 407
	},
	{
	"epoch": 0.4788732394366197,
	"grad_norm": 0.3419265151023865,
	"learning_rate": 0.0001050767414403778,
	"loss": 0.7717,
	"step": 408
	},
	{
	"epoch": 0.4800469483568075,
	"grad_norm": 0.3458483815193176,
	"learning_rate": 0.00010484061393152303,
	"loss": 0.8031,
	"step": 409
	},
	{
	"epoch": 0.4812206572769953,
	"grad_norm": 0.37077274918556213,
	"learning_rate": 0.00010460448642266825,
	"loss": 0.8009,
	"step": 410
	},
	{
	"epoch": 0.4823943661971831,
	"grad_norm": 0.35040315985679626,
	"learning_rate": 0.00010436835891381347,
	"loss": 0.7545,
	"step": 411
	},
	{
	"epoch": 0.4835680751173709,
	"grad_norm": 0.3503456115722656,
	"learning_rate": 0.0001041322314049587,
	"loss": 0.8515,
	"step": 412
	},
	{
	"epoch": 0.4847417840375587,
	"grad_norm": 0.34627342224121094,
	"learning_rate": 0.00010389610389610389,
	"loss": 0.716,
	"step": 413
	},
	{
	"epoch": 0.4859154929577465,
	"grad_norm": 0.3596992790699005,
	"learning_rate": 0.00010365997638724912,
	"loss": 0.7636,
	"step": 414
	},
	{
	"epoch": 0.48708920187793425,
	"grad_norm": 0.3346829116344452,
	"learning_rate": 0.00010342384887839434,
	"loss": 0.7635,
	"step": 415
	},
	{
	"epoch": 0.48826291079812206,
	"grad_norm": 0.37179237604141235,
	"learning_rate": 0.00010318772136953956,
	"loss": 0.7642,
	"step": 416
	},
	{
	"epoch": 0.4894366197183099,
	"grad_norm": 0.34897381067276,
	"learning_rate": 0.00010295159386068479,
	"loss": 0.7792,
	"step": 417
	},
	{
	"epoch": 0.49061032863849763,
	"grad_norm": 0.3820830285549164,
	"learning_rate": 0.00010271546635183,
	"loss": 0.7722,
	"step": 418
	},
	{
	"epoch": 0.49178403755868544,
	"grad_norm": 0.3688552677631378,
	"learning_rate": 0.00010247933884297521,
	"loss": 0.7927,
	"step": 419
	},
	{
	"epoch": 0.49295774647887325,
	"grad_norm": 0.35100415349006653,
	"learning_rate": 0.00010224321133412043,
	"loss": 0.7848,
	"step": 420
	},
	{
	"epoch": 0.49413145539906106,
	"grad_norm": 0.3596225082874298,
	"learning_rate": 0.00010200708382526565,
	"loss": 0.7383,
	"step": 421
	},
	{
	"epoch": 0.4953051643192488,
	"grad_norm": 0.36203423142433167,
	"learning_rate": 0.00010177095631641088,
	"loss": 0.769,
	"step": 422
	},
	{
	"epoch": 0.4964788732394366,
	"grad_norm": 0.3776590824127197,
	"learning_rate": 0.0001015348288075561,
	"loss": 0.8007,
	"step": 423
	},
	{
	"epoch": 0.49765258215962443,
	"grad_norm": 0.36009421944618225,
	"learning_rate": 0.0001012987012987013,
	"loss": 0.7557,
	"step": 424
	},
	{
	"epoch": 0.4988262910798122,
	"grad_norm": 0.3442706763744354,
	"learning_rate": 0.00010106257378984652,
	"loss": 0.7488,
	"step": 425
	},
	{
	"epoch": 0.5,
	"grad_norm": 0.3635407090187073,
	"learning_rate": 0.00010082644628099174,
	"loss": 0.7922,
	"step": 426
	},
	{
	"epoch": 0.5011737089201878,
	"grad_norm": 0.3766370117664337,
	"learning_rate": 0.00010059031877213697,
	"loss": 0.7818,
	"step": 427
	},
	{
	"epoch": 0.5023474178403756,
	"grad_norm": 0.34344202280044556,
	"learning_rate": 0.00010035419126328218,
	"loss": 0.8308,
	"step": 428
	},
	{
	"epoch": 0.5035211267605634,
	"grad_norm": 0.3495674133300781,
	"learning_rate": 0.0001001180637544274,
	"loss": 0.799,
	"step": 429
	},
	{
	"epoch": 0.5046948356807511,
	"grad_norm": 0.36545464396476746,
	"learning_rate": 9.988193624557262e-05,
	"loss": 0.7453,
	"step": 430
	},
	{
	"epoch": 0.505868544600939,
	"grad_norm": 0.3482630252838135,
	"learning_rate": 9.964580873671782e-05,
	"loss": 0.7422,
	"step": 431
	},
	{
	"epoch": 0.5070422535211268,
	"grad_norm": 0.3745418190956116,
	"learning_rate": 9.940968122786304e-05,
	"loss": 0.7333,
	"step": 432
	},
	{
	"epoch": 0.5082159624413145,
	"grad_norm": 0.3470025062561035,
	"learning_rate": 9.917355371900827e-05,
	"loss": 0.7907,
	"step": 433
	},
	{
	"epoch": 0.5093896713615024,
	"grad_norm": 0.38251325488090515,
	"learning_rate": 9.893742621015348e-05,
	"loss": 0.7629,
	"step": 434
	},
	{
	"epoch": 0.5105633802816901,
	"grad_norm": 0.3829626739025116,
	"learning_rate": 9.870129870129871e-05,
	"loss": 0.7939,
	"step": 435
	},
	{
	"epoch": 0.5117370892018779,
	"grad_norm": 0.35726287961006165,
	"learning_rate": 9.846517119244393e-05,
	"loss": 0.755,
	"step": 436
	},
	{
	"epoch": 0.5129107981220657,
	"grad_norm": 0.38168108463287354,
	"learning_rate": 9.822904368358913e-05,
	"loss": 0.7396,
	"step": 437
	},
	{
	"epoch": 0.5140845070422535,
	"grad_norm": 0.35728660225868225,
	"learning_rate": 9.799291617473436e-05,
	"loss": 0.7568,
	"step": 438
	},
	{
	"epoch": 0.5152582159624414,
	"grad_norm": 0.37819668650627136,
	"learning_rate": 9.775678866587958e-05,
	"loss": 0.8046,
	"step": 439
	},
	{
	"epoch": 0.5164319248826291,
	"grad_norm": 0.4106784760951996,
	"learning_rate": 9.75206611570248e-05,
	"loss": 0.7116,
	"step": 440
	},
	{
	"epoch": 0.5176056338028169,
	"grad_norm": 0.3476578891277313,
	"learning_rate": 9.728453364817002e-05,
	"loss": 0.7824,
	"step": 441
	},
	{
	"epoch": 0.5187793427230047,
	"grad_norm": 0.36705800890922546,
	"learning_rate": 9.704840613931524e-05,
	"loss": 0.7631,
	"step": 442
	},
	{
	"epoch": 0.5199530516431925,
	"grad_norm": 0.3880864977836609,
	"learning_rate": 9.681227863046045e-05,
	"loss": 0.7608,
	"step": 443
	},
	{
	"epoch": 0.5211267605633803,
	"grad_norm": 0.3610959053039551,
	"learning_rate": 9.657615112160567e-05,
	"loss": 0.7909,
	"step": 444
	},
	{
	"epoch": 0.5223004694835681,
	"grad_norm": 0.33494657278060913,
	"learning_rate": 9.634002361275089e-05,
	"loss": 0.7108,
	"step": 445
	},
	{
	"epoch": 0.5234741784037559,
	"grad_norm": 0.352055162191391,
	"learning_rate": 9.610389610389611e-05,
	"loss": 0.7177,
	"step": 446
	},
	{
	"epoch": 0.5246478873239436,
	"grad_norm": 0.35466742515563965,
	"learning_rate": 9.586776859504133e-05,
	"loss": 0.7762,
	"step": 447
	},
	{
	"epoch": 0.5258215962441315,
	"grad_norm": 0.34477657079696655,
	"learning_rate": 9.563164108618654e-05,
	"loss": 0.7583,
	"step": 448
	},
	{
	"epoch": 0.5269953051643192,
	"grad_norm": 0.37008315324783325,
	"learning_rate": 9.539551357733176e-05,
	"loss": 0.7954,
	"step": 449
	},
	{
	"epoch": 0.528169014084507,
	"grad_norm": 0.34141793847084045,
	"learning_rate": 9.515938606847698e-05,
	"loss": 0.7444,
	"step": 450
	},
	{
	"epoch": 0.5293427230046949,
	"grad_norm": 0.3429400622844696,
	"learning_rate": 9.49232585596222e-05,
	"loss": 0.7499,
	"step": 451
	},
	{
	"epoch": 0.5305164319248826,
	"grad_norm": 0.3666730225086212,
	"learning_rate": 9.468713105076742e-05,
	"loss": 0.7704,
	"step": 452
	},
	{
	"epoch": 0.5316901408450704,
	"grad_norm": 0.34185874462127686,
	"learning_rate": 9.445100354191265e-05,
	"loss": 0.7446,
	"step": 453
	},
	{
	"epoch": 0.5328638497652582,
	"grad_norm": 0.3718375861644745,
	"learning_rate": 9.421487603305785e-05,
	"loss": 0.7316,
	"step": 454
	},
	{
	"epoch": 0.534037558685446,
	"grad_norm": 0.35064697265625,
	"learning_rate": 9.397874852420307e-05,
	"loss": 0.7651,
	"step": 455
	},
	{
	"epoch": 0.5352112676056338,
	"grad_norm": 0.3724139630794525,
	"learning_rate": 9.37426210153483e-05,
	"loss": 0.7639,
	"step": 456
	},
	{
	"epoch": 0.5363849765258216,
	"grad_norm": 0.3420800566673279,
	"learning_rate": 9.35064935064935e-05,
	"loss": 0.7578,
	"step": 457
	},
	{
	"epoch": 0.5375586854460094,
	"grad_norm": 0.3437943160533905,
	"learning_rate": 9.327036599763874e-05,
	"loss": 0.7898,
	"step": 458
	},
	{
	"epoch": 0.5387323943661971,
	"grad_norm": 0.3799413740634918,
	"learning_rate": 9.303423848878394e-05,
	"loss": 0.7216,
	"step": 459
	},
	{
	"epoch": 0.539906103286385,
	"grad_norm": 0.35702013969421387,
	"learning_rate": 9.279811097992916e-05,
	"loss": 0.7509,
	"step": 460
	},
	{
	"epoch": 0.5410798122065728,
	"grad_norm": 0.36074140667915344,
	"learning_rate": 9.256198347107439e-05,
	"loss": 0.7448,
	"step": 461
	},
	{
	"epoch": 0.5422535211267606,
	"grad_norm": 0.34211182594299316,
	"learning_rate": 9.23258559622196e-05,
	"loss": 0.7143,
	"step": 462
	},
	{
	"epoch": 0.5434272300469484,
	"grad_norm": 0.3816893398761749,
	"learning_rate": 9.208972845336483e-05,
	"loss": 0.7178,
	"step": 463
	},
	{
	"epoch": 0.5446009389671361,
	"grad_norm": 0.36033767461776733,
	"learning_rate": 9.185360094451005e-05,
	"loss": 0.7406,
	"step": 464
	},
	{
	"epoch": 0.545774647887324,
	"grad_norm": 0.38050010800361633,
	"learning_rate": 9.161747343565525e-05,
	"loss": 0.7528,
	"step": 465
	},
	{
	"epoch": 0.5469483568075117,
	"grad_norm": 0.3648395240306854,
	"learning_rate": 9.138134592680048e-05,
	"loss": 0.7802,
	"step": 466
	},
	{
	"epoch": 0.5481220657276995,
	"grad_norm": 0.35185542702674866,
	"learning_rate": 9.11452184179457e-05,
	"loss": 0.7489,
	"step": 467
	},
	{
	"epoch": 0.5492957746478874,
	"grad_norm": 0.3487717807292938,
	"learning_rate": 9.090909090909092e-05,
	"loss": 0.7742,
	"step": 468
	},
	{
	"epoch": 0.5504694835680751,
	"grad_norm": 0.36121654510498047,
	"learning_rate": 9.067296340023614e-05,
	"loss": 0.7974,
	"step": 469
	},
	{
	"epoch": 0.5516431924882629,
	"grad_norm": 0.3470339775085449,
	"learning_rate": 9.043683589138135e-05,
	"loss": 0.723,
	"step": 470
	},
	{
	"epoch": 0.5528169014084507,
	"grad_norm": 0.33549764752388,
	"learning_rate": 9.020070838252657e-05,
	"loss": 0.7334,
	"step": 471
	},
	{
	"epoch": 0.5539906103286385,
	"grad_norm": 0.36101868748664856,
	"learning_rate": 8.996458087367179e-05,
	"loss": 0.6817,
	"step": 472
	},
	{
	"epoch": 0.5551643192488263,
	"grad_norm": 0.36847153306007385,
	"learning_rate": 8.9728453364817e-05,
	"loss": 0.7942,
	"step": 473
	},
	{
	"epoch": 0.5563380281690141,
	"grad_norm": 0.3564891815185547,
	"learning_rate": 8.949232585596222e-05,
	"loss": 0.7071,
	"step": 474
	},
	{
	"epoch": 0.5575117370892019,
	"grad_norm": 0.36866652965545654,
	"learning_rate": 8.925619834710744e-05,
	"loss": 0.7685,
	"step": 475
	},
	{
	"epoch": 0.5586854460093896,
	"grad_norm": 0.370924711227417,
	"learning_rate": 8.902007083825266e-05,
	"loss": 0.7313,
	"step": 476
	},
	{
	"epoch": 0.5598591549295775,
	"grad_norm": 0.3611142039299011,
	"learning_rate": 8.878394332939788e-05,
	"loss": 0.7666,
	"step": 477
	},
	{
	"epoch": 0.5610328638497653,
	"grad_norm": 0.3418121635913849,
	"learning_rate": 8.85478158205431e-05,
	"loss": 0.7194,
	"step": 478
	},
	{
	"epoch": 0.562206572769953,
	"grad_norm": 0.3478650748729706,
	"learning_rate": 8.831168831168831e-05,
	"loss": 0.7145,
	"step": 479
	},
	{
	"epoch": 0.5633802816901409,
	"grad_norm": 0.3567008078098297,
	"learning_rate": 8.807556080283353e-05,
	"loss": 0.7591,
	"step": 480
	},
	{
	"epoch": 0.5645539906103286,
	"grad_norm": 0.3629607558250427,
	"learning_rate": 8.783943329397875e-05,
	"loss": 0.7856,
	"step": 481
	},
	{
	"epoch": 0.5657276995305164,
	"grad_norm": 0.37257978320121765,
	"learning_rate": 8.760330578512397e-05,
	"loss": 0.709,
	"step": 482
	},
	{
	"epoch": 0.5669014084507042,
	"grad_norm": 0.3570626676082611,
	"learning_rate": 8.736717827626919e-05,
	"loss": 0.7639,
	"step": 483
	},
	{
	"epoch": 0.568075117370892,
	"grad_norm": 0.34790506958961487,
	"learning_rate": 8.713105076741442e-05,
	"loss": 0.7375,
	"step": 484
	},
	{
	"epoch": 0.5692488262910798,
	"grad_norm": 0.3525756895542145,
	"learning_rate": 8.689492325855962e-05,
	"loss": 0.7274,
	"step": 485
	},
	{
	"epoch": 0.5704225352112676,
	"grad_norm": 0.3545394837856293,
	"learning_rate": 8.665879574970484e-05,
	"loss": 0.7531,
	"step": 486
	},
	{
	"epoch": 0.5715962441314554,
	"grad_norm": 0.35677066445350647,
	"learning_rate": 8.642266824085006e-05,
	"loss": 0.7682,
	"step": 487
	},
	{
	"epoch": 0.5727699530516432,
	"grad_norm": 0.3439461290836334,
	"learning_rate": 8.618654073199528e-05,
	"loss": 0.7176,
	"step": 488
	},
	{
	"epoch": 0.573943661971831,
	"grad_norm": 0.3622515797615051,
	"learning_rate": 8.595041322314051e-05,
	"loss": 0.7004,
	"step": 489
	},
	{
	"epoch": 0.5751173708920188,
	"grad_norm": 0.36056646704673767,
	"learning_rate": 8.571428571428571e-05,
	"loss": 0.74,
	"step": 490
	},
	{
	"epoch": 0.5762910798122066,
	"grad_norm": 0.3509630262851715,
	"learning_rate": 8.547815820543093e-05,
	"loss": 0.8006,
	"step": 491
	},
	{
	"epoch": 0.5774647887323944,
	"grad_norm": 0.3422422707080841,
	"learning_rate": 8.524203069657616e-05,
	"loss": 0.7162,
	"step": 492
	},
	{
	"epoch": 0.5786384976525821,
	"grad_norm": 0.35553744435310364,
	"learning_rate": 8.500590318772137e-05,
	"loss": 0.7554,
	"step": 493
	},
	{
	"epoch": 0.57981220657277,
	"grad_norm": 0.3443603813648224,
	"learning_rate": 8.47697756788666e-05,
	"loss": 0.7128,
	"step": 494
	},
	{
	"epoch": 0.5809859154929577,
	"grad_norm": 0.3314555883407593,
	"learning_rate": 8.453364817001182e-05,
	"loss": 0.7123,
	"step": 495
	},
	{
	"epoch": 0.5821596244131455,
	"grad_norm": 0.33951112627983093,
	"learning_rate": 8.429752066115702e-05,
	"loss": 0.7501,
	"step": 496
	},
	{
	"epoch": 0.5833333333333334,
	"grad_norm": 0.327809602022171,
	"learning_rate": 8.406139315230225e-05,
	"loss": 0.7543,
	"step": 497
	},
	{
	"epoch": 0.5845070422535211,
	"grad_norm": 0.33205023407936096,
	"learning_rate": 8.382526564344747e-05,
	"loss": 0.7395,
	"step": 498
	},
	{
	"epoch": 0.5856807511737089,
	"grad_norm": 0.3762659430503845,
	"learning_rate": 8.358913813459269e-05,
	"loss": 0.7424,
	"step": 499
	},
	{
	"epoch": 0.5868544600938967,
	"grad_norm": 0.3421575427055359,
	"learning_rate": 8.33530106257379e-05,
	"loss": 0.7167,
	"step": 500
	},
	{
	"epoch": 0.5880281690140845,
	"grad_norm": 0.3560996353626251,
	"learning_rate": 8.311688311688312e-05,
	"loss": 0.7464,
	"step": 501
	},
	{
	"epoch": 0.5892018779342723,
	"grad_norm": 0.3566039800643921,
	"learning_rate": 8.288075560802834e-05,
	"loss": 0.715,
	"step": 502
	},
	{
	"epoch": 0.5903755868544601,
	"grad_norm": 0.3481593430042267,
	"learning_rate": 8.264462809917356e-05,
	"loss": 0.7506,
	"step": 503
	},
	{
	"epoch": 0.5915492957746479,
	"grad_norm": 0.34428590536117554,
	"learning_rate": 8.240850059031878e-05,
	"loss": 0.7272,
	"step": 504
	},
	{
	"epoch": 0.5927230046948356,
	"grad_norm": 0.35629555583000183,
	"learning_rate": 8.2172373081464e-05,
	"loss": 0.7334,
	"step": 505
	},
	{
	"epoch": 0.5938967136150235,
	"grad_norm": 0.37292811274528503,
	"learning_rate": 8.193624557260921e-05,
	"loss": 0.7505,
	"step": 506
	},
	{
	"epoch": 0.5950704225352113,
	"grad_norm": 0.359614759683609,
	"learning_rate": 8.170011806375443e-05,
	"loss": 0.8006,
	"step": 507
	},
	{
	"epoch": 0.596244131455399,
	"grad_norm": 0.3388945460319519,
	"learning_rate": 8.146399055489965e-05,
	"loss": 0.7542,
	"step": 508
	},
	{
	"epoch": 0.5974178403755869,
	"grad_norm": 0.3528054356575012,
	"learning_rate": 8.122786304604487e-05,
	"loss": 0.7412,
	"step": 509
	},
	{
	"epoch": 0.5985915492957746,
	"grad_norm": 0.3354608416557312,
	"learning_rate": 8.099173553719009e-05,
	"loss": 0.7062,
	"step": 510
	},
	{
	"epoch": 0.5997652582159625,
	"grad_norm": 0.35168859362602234,
	"learning_rate": 8.07556080283353e-05,
	"loss": 0.7653,
	"step": 511
	},
	{
	"epoch": 0.6009389671361502,
	"grad_norm": 0.33843398094177246,
	"learning_rate": 8.051948051948052e-05,
	"loss": 0.7339,
	"step": 512
	},
	{
	"epoch": 0.602112676056338,
	"grad_norm": 0.32910212874412537,
	"learning_rate": 8.028335301062574e-05,
	"loss": 0.6966,
	"step": 513
	},
	{
	"epoch": 0.6032863849765259,
	"grad_norm": 0.3462936580181122,
	"learning_rate": 8.004722550177096e-05,
	"loss": 0.7386,
	"step": 514
	},
	{
	"epoch": 0.6044600938967136,
	"grad_norm": 0.3483426868915558,
	"learning_rate": 7.981109799291619e-05,
	"loss": 0.7548,
	"step": 515
	},
	{
	"epoch": 0.6056338028169014,
	"grad_norm": 0.3555918335914612,
	"learning_rate": 7.95749704840614e-05,
	"loss": 0.7144,
	"step": 516
	},
	{
	"epoch": 0.6068075117370892,
	"grad_norm": 0.3545628786087036,
	"learning_rate": 7.933884297520661e-05,
	"loss": 0.7601,
	"step": 517
	},
	{
	"epoch": 0.607981220657277,
	"grad_norm": 0.3554907441139221,
	"learning_rate": 7.910271546635183e-05,
	"loss": 0.7464,
	"step": 518
	},
	{
	"epoch": 0.6091549295774648,
	"grad_norm": 0.3457619547843933,
	"learning_rate": 7.886658795749705e-05,
	"loss": 0.7372,
	"step": 519
	},
	{
	"epoch": 0.6103286384976526,
	"grad_norm": 0.3450148105621338,
	"learning_rate": 7.863046044864228e-05,
	"loss": 0.7265,
	"step": 520
	},
	{
	"epoch": 0.6115023474178404,
	"grad_norm": 0.3475225567817688,
	"learning_rate": 7.839433293978748e-05,
	"loss": 0.798,
	"step": 521
	},
	{
	"epoch": 0.6126760563380281,
	"grad_norm": 0.34560921788215637,
	"learning_rate": 7.81582054309327e-05,
	"loss": 0.7583,
	"step": 522
	},
	{
	"epoch": 0.613849765258216,
	"grad_norm": 0.33480820059776306,
	"learning_rate": 7.792207792207793e-05,
	"loss": 0.7658,
	"step": 523
	},
	{
	"epoch": 0.6150234741784038,
	"grad_norm": 0.34581395983695984,
	"learning_rate": 7.768595041322314e-05,
	"loss": 0.7368,
	"step": 524
	},
	{
	"epoch": 0.6161971830985915,
	"grad_norm": 0.35383906960487366,
	"learning_rate": 7.744982290436837e-05,
	"loss": 0.7963,
	"step": 525
	},
	{
	"epoch": 0.6173708920187794,
	"grad_norm": 0.352117121219635,
	"learning_rate": 7.721369539551359e-05,
	"loss": 0.7589,
	"step": 526
	},
	{
	"epoch": 0.6185446009389671,
	"grad_norm": 0.34420257806777954,
	"learning_rate": 7.697756788665879e-05,
	"loss": 0.7209,
	"step": 527
	},
	{
	"epoch": 0.6197183098591549,
	"grad_norm": 0.3449562191963196,
	"learning_rate": 7.674144037780402e-05,
	"loss": 0.7526,
	"step": 528
	},
	{
	"epoch": 0.6208920187793427,
	"grad_norm": 0.37377694249153137,
	"learning_rate": 7.650531286894924e-05,
	"loss": 0.7348,
	"step": 529
	},
	{
	"epoch": 0.6220657276995305,
	"grad_norm": 0.32662031054496765,
	"learning_rate": 7.626918536009446e-05,
	"loss": 0.7125,
	"step": 530
	},
	{
	"epoch": 0.6232394366197183,
	"grad_norm": 0.3551415801048279,
	"learning_rate": 7.603305785123968e-05,
	"loss": 0.7497,
	"step": 531
	},
	{
	"epoch": 0.6244131455399061,
	"grad_norm": 0.3519802689552307,
	"learning_rate": 7.579693034238488e-05,
	"loss": 0.7864,
	"step": 532
	},
	{
	"epoch": 0.6255868544600939,
	"grad_norm": 0.3773750364780426,
	"learning_rate": 7.556080283353011e-05,
	"loss": 0.7681,
	"step": 533
	},
	{
	"epoch": 0.6267605633802817,
	"grad_norm": 0.3558037281036377,
	"learning_rate": 7.532467532467533e-05,
	"loss": 0.7392,
	"step": 534
	},
	{
	"epoch": 0.6279342723004695,
	"grad_norm": 0.33910447359085083,
	"learning_rate": 7.508854781582055e-05,
	"loss": 0.7036,
	"step": 535
	},
	{
	"epoch": 0.6291079812206573,
	"grad_norm": 0.35620275139808655,
	"learning_rate": 7.485242030696577e-05,
	"loss": 0.7272,
	"step": 536
	},
	{
	"epoch": 0.6302816901408451,
	"grad_norm": 0.3377542495727539,
	"learning_rate": 7.461629279811098e-05,
	"loss": 0.7244,
	"step": 537
	},
	{
	"epoch": 0.6314553990610329,
	"grad_norm": 0.35217198729515076,
	"learning_rate": 7.43801652892562e-05,
	"loss": 0.7655,
	"step": 538
	},
	{
	"epoch": 0.6326291079812206,
	"grad_norm": 0.34656718373298645,
	"learning_rate": 7.414403778040142e-05,
	"loss": 0.7474,
	"step": 539
	},
	{
	"epoch": 0.6338028169014085,
	"grad_norm": 0.34429579973220825,
	"learning_rate": 7.390791027154664e-05,
	"loss": 0.7333,
	"step": 540
	},
	{
	"epoch": 0.6349765258215962,
	"grad_norm": 0.374262273311615,
	"learning_rate": 7.367178276269186e-05,
	"loss": 0.7876,
	"step": 541
	},
	{
	"epoch": 0.636150234741784,
	"grad_norm": 0.363299161195755,
	"learning_rate": 7.343565525383707e-05,
	"loss": 0.7784,
	"step": 542
	},
	{
	"epoch": 0.6373239436619719,
	"grad_norm": 0.36767125129699707,
	"learning_rate": 7.31995277449823e-05,
	"loss": 0.7329,
	"step": 543
	},
	{
	"epoch": 0.6384976525821596,
	"grad_norm": 0.3338686525821686,
	"learning_rate": 7.296340023612751e-05,
	"loss": 0.7737,
	"step": 544
	},
	{
	"epoch": 0.6396713615023474,
	"grad_norm": 0.3493046164512634,
	"learning_rate": 7.272727272727273e-05,
	"loss": 0.7461,
	"step": 545
	},
	{
	"epoch": 0.6408450704225352,
	"grad_norm": 0.3691573441028595,
	"learning_rate": 7.249114521841795e-05,
	"loss": 0.765,
	"step": 546
	},
	{
	"epoch": 0.642018779342723,
	"grad_norm": 0.3573099374771118,
	"learning_rate": 7.225501770956316e-05,
	"loss": 0.7589,
	"step": 547
	},
	{
	"epoch": 0.6431924882629108,
	"grad_norm": 0.36218926310539246,
	"learning_rate": 7.201889020070838e-05,
	"loss": 0.7314,
	"step": 548
	},
	{
	"epoch": 0.6443661971830986,
	"grad_norm": 0.35753628611564636,
	"learning_rate": 7.17827626918536e-05,
	"loss": 0.7564,
	"step": 549
	},
	{
	"epoch": 0.6455399061032864,
	"grad_norm": 0.3394756615161896,
	"learning_rate": 7.154663518299882e-05,
	"loss": 0.7162,
	"step": 550
	},
	{
	"epoch": 0.6467136150234741,
	"grad_norm": 0.350090891122818,
	"learning_rate": 7.131050767414405e-05,
	"loss": 0.7561,
	"step": 551
	},
	{
	"epoch": 0.647887323943662,
	"grad_norm": 0.328924298286438,
	"learning_rate": 7.107438016528925e-05,
	"loss": 0.7143,
	"step": 552
	},
	{
	"epoch": 0.6490610328638498,
	"grad_norm": 0.3552818298339844,
	"learning_rate": 7.083825265643447e-05,
	"loss": 0.7264,
	"step": 553
	},
	{
	"epoch": 0.6502347417840375,
	"grad_norm": 0.3504960536956787,
	"learning_rate": 7.06021251475797e-05,
	"loss": 0.7512,
	"step": 554
	},
	{
	"epoch": 0.6514084507042254,
	"grad_norm": 0.33755823969841003,
	"learning_rate": 7.036599763872491e-05,
	"loss": 0.7621,
	"step": 555
	},
	{
	"epoch": 0.6525821596244131,
	"grad_norm": 0.35977354645729065,
	"learning_rate": 7.012987012987014e-05,
	"loss": 0.776,
	"step": 556
	},
	{
	"epoch": 0.653755868544601,
	"grad_norm": 0.37304726243019104,
	"learning_rate": 6.989374262101536e-05,
	"loss": 0.7601,
	"step": 557
	},
	{
	"epoch": 0.6549295774647887,
	"grad_norm": 0.3569071590900421,
	"learning_rate": 6.965761511216056e-05,
	"loss": 0.7303,
	"step": 558
	},
	{
	"epoch": 0.6561032863849765,
	"grad_norm": 0.348264217376709,
	"learning_rate": 6.94214876033058e-05,
	"loss": 0.759,
	"step": 559
	},
	{
	"epoch": 0.6572769953051644,
	"grad_norm": 0.3501366674900055,
	"learning_rate": 6.9185360094451e-05,
	"loss": 0.7588,
	"step": 560
	},
	{
	"epoch": 0.6584507042253521,
	"grad_norm": 0.3633224666118622,
	"learning_rate": 6.894923258559623e-05,
	"loss": 0.7741,
	"step": 561
	},
	{
	"epoch": 0.6596244131455399,
	"grad_norm": 0.35944506525993347,
	"learning_rate": 6.871310507674145e-05,
	"loss": 0.756,
	"step": 562
	},
	{
	"epoch": 0.6607981220657277,
	"grad_norm": 0.3479359745979309,
	"learning_rate": 6.847697756788665e-05,
	"loss": 0.7292,
	"step": 563
	},
	{
	"epoch": 0.6619718309859155,
	"grad_norm": 0.37013959884643555,
	"learning_rate": 6.824085005903188e-05,
	"loss": 0.7618,
	"step": 564
	},
	{
	"epoch": 0.6631455399061033,
	"grad_norm": 0.36679190397262573,
	"learning_rate": 6.80047225501771e-05,
	"loss": 0.7797,
	"step": 565
	},
	{
	"epoch": 0.6643192488262911,
	"grad_norm": 0.35092490911483765,
	"learning_rate": 6.776859504132232e-05,
	"loss": 0.705,
	"step": 566
	},
	{
	"epoch": 0.6654929577464789,
	"grad_norm": 0.3594275712966919,
	"learning_rate": 6.753246753246754e-05,
	"loss": 0.7215,
	"step": 567
	},
	{
	"epoch": 0.6666666666666666,
	"grad_norm": 0.3503059148788452,
	"learning_rate": 6.729634002361276e-05,
	"loss": 0.7248,
	"step": 568
	},
	{
	"epoch": 0.6678403755868545,
	"grad_norm": 0.35919633507728577,
	"learning_rate": 6.706021251475797e-05,
	"loss": 0.7718,
	"step": 569
	},
	{
	"epoch": 0.6690140845070423,
	"grad_norm": 0.36752262711524963,
	"learning_rate": 6.682408500590319e-05,
	"loss": 0.7738,
	"step": 570
	},
	{
	"epoch": 0.67018779342723,
	"grad_norm": 0.33812567591667175,
	"learning_rate": 6.658795749704841e-05,
	"loss": 0.7846,
	"step": 571
	},
	{
	"epoch": 0.6713615023474179,
	"grad_norm": 0.3429810404777527,
	"learning_rate": 6.635182998819363e-05,
	"loss": 0.7371,
	"step": 572
	},
	{
	"epoch": 0.6725352112676056,
	"grad_norm": 0.3457571864128113,
	"learning_rate": 6.611570247933885e-05,
	"loss": 0.7318,
	"step": 573
	},
	{
	"epoch": 0.6737089201877934,
	"grad_norm": 0.3476294279098511,
	"learning_rate": 6.587957497048406e-05,
	"loss": 0.7344,
	"step": 574
	},
	{
	"epoch": 0.6748826291079812,
	"grad_norm": 0.34464409947395325,
	"learning_rate": 6.564344746162928e-05,
	"loss": 0.7429,
	"step": 575
	},
	{
	"epoch": 0.676056338028169,
	"grad_norm": 0.34444373846054077,
	"learning_rate": 6.54073199527745e-05,
	"loss": 0.7663,
	"step": 576
	},
	{
	"epoch": 0.6772300469483568,
	"grad_norm": 0.3656728267669678,
	"learning_rate": 6.517119244391972e-05,
	"loss": 0.7068,
	"step": 577
	},
	{
	"epoch": 0.6784037558685446,
	"grad_norm": 0.3591727614402771,
	"learning_rate": 6.493506493506494e-05,
	"loss": 0.7481,
	"step": 578
	},
	{
	"epoch": 0.6795774647887324,
	"grad_norm": 0.38865676522254944,
	"learning_rate": 6.469893742621017e-05,
	"loss": 0.7659,
	"step": 579
	},
	{
	"epoch": 0.6807511737089202,
	"grad_norm": 0.3438194990158081,
	"learning_rate": 6.446280991735537e-05,
	"loss": 0.6748,
	"step": 580
	},
	{
	"epoch": 0.681924882629108,
	"grad_norm": 0.34979990124702454,
	"learning_rate": 6.422668240850059e-05,
	"loss": 0.7529,
	"step": 581
	},
	{
	"epoch": 0.6830985915492958,
	"grad_norm": 0.37309062480926514,
	"learning_rate": 6.399055489964582e-05,
	"loss": 0.7417,
	"step": 582
	},
	{
	"epoch": 0.6842723004694836,
	"grad_norm": 0.3737837076187134,
	"learning_rate": 6.375442739079102e-05,
	"loss": 0.773,
	"step": 583
	},
	{
	"epoch": 0.6854460093896714,
	"grad_norm": 0.3397013247013092,
	"learning_rate": 6.351829988193624e-05,
	"loss": 0.7093,
	"step": 584
	},
	{
	"epoch": 0.6866197183098591,
	"grad_norm": 0.37165701389312744,
	"learning_rate": 6.328217237308147e-05,
	"loss": 0.7078,
	"step": 585
	},
	{
	"epoch": 0.687793427230047,
	"grad_norm": 0.3533116579055786,
	"learning_rate": 6.304604486422668e-05,
	"loss": 0.7105,
	"step": 586
	},
	{
	"epoch": 0.6889671361502347,
	"grad_norm": 0.35352569818496704,
	"learning_rate": 6.280991735537191e-05,
	"loss": 0.7282,
	"step": 587
	},
	{
	"epoch": 0.6901408450704225,
	"grad_norm": 0.3754810094833374,
	"learning_rate": 6.257378984651711e-05,
	"loss": 0.7364,
	"step": 588
	},
	{
	"epoch": 0.6913145539906104,
	"grad_norm": 0.36235493421554565,
	"learning_rate": 6.233766233766233e-05,
	"loss": 0.7024,
	"step": 589
	},
	{
	"epoch": 0.6924882629107981,
	"grad_norm": 0.3446933627128601,
	"learning_rate": 6.210153482880756e-05,
	"loss": 0.7392,
	"step": 590
	},
	{
	"epoch": 0.6936619718309859,
	"grad_norm": 0.34918078780174255,
	"learning_rate": 6.186540731995277e-05,
	"loss": 0.6716,
	"step": 591
	},
	{
	"epoch": 0.6948356807511737,
	"grad_norm": 0.3438567519187927,
	"learning_rate": 6.1629279811098e-05,
	"loss": 0.7812,
	"step": 592
	},
	{
	"epoch": 0.6960093896713615,
	"grad_norm": 0.346626341342926,
	"learning_rate": 6.139315230224322e-05,
	"loss": 0.7538,
	"step": 593
	},
	{
	"epoch": 0.6971830985915493,
	"grad_norm": 0.3506343960762024,
	"learning_rate": 6.115702479338842e-05,
	"loss": 0.7434,
	"step": 594
	},
	{
	"epoch": 0.6983568075117371,
	"grad_norm": 0.35403555631637573,
	"learning_rate": 6.0920897284533654e-05,
	"loss": 0.7333,
	"step": 595
	},
	{
	"epoch": 0.6995305164319249,
	"grad_norm": 0.3391430377960205,
	"learning_rate": 6.068476977567887e-05,
	"loss": 0.7486,
	"step": 596
	},
	{
	"epoch": 0.7007042253521126,
	"grad_norm": 0.33783578872680664,
	"learning_rate": 6.044864226682408e-05,
	"loss": 0.7588,
	"step": 597
	},
	{
	"epoch": 0.7018779342723005,
	"grad_norm": 0.3333738446235657,
	"learning_rate": 6.021251475796931e-05,
	"loss": 0.7268,
	"step": 598
	},
	{
	"epoch": 0.7030516431924883,
	"grad_norm": 0.3494018316268921,
	"learning_rate": 5.997638724911453e-05,
	"loss": 0.7363,
	"step": 599
	},
	{
	"epoch": 0.704225352112676,
	"grad_norm": 0.34416642785072327,
	"learning_rate": 5.9740259740259744e-05,
	"loss": 0.7322,
	"step": 600
	},
	{
	"epoch": 0.7053990610328639,
	"grad_norm": 0.3523387312889099,
	"learning_rate": 5.950413223140496e-05,
	"loss": 0.6986,
	"step": 601
	},
	{
	"epoch": 0.7065727699530516,
	"grad_norm": 0.33000361919403076,
	"learning_rate": 5.926800472255017e-05,
	"loss": 0.7535,
	"step": 602
	},
	{
	"epoch": 0.7077464788732394,
	"grad_norm": 0.33932214975357056,
	"learning_rate": 5.90318772136954e-05,
	"loss": 0.7051,
	"step": 603
	},
	{
	"epoch": 0.7089201877934272,
	"grad_norm": 0.3373797833919525,
	"learning_rate": 5.8795749704840616e-05,
	"loss": 0.7022,
	"step": 604
	},
	{
	"epoch": 0.710093896713615,
	"grad_norm": 0.35239875316619873,
	"learning_rate": 5.855962219598583e-05,
	"loss": 0.7893,
	"step": 605
	},
	{
	"epoch": 0.7112676056338029,
	"grad_norm": 0.36973506212234497,
	"learning_rate": 5.832349468713105e-05,
	"loss": 0.7157,
	"step": 606
	},
	{
	"epoch": 0.7124413145539906,
	"grad_norm": 0.3447434604167938,
	"learning_rate": 5.8087367178276277e-05,
	"loss": 0.7306,
	"step": 607
	},
	{
	"epoch": 0.7136150234741784,
	"grad_norm": 0.36380118131637573,
	"learning_rate": 5.785123966942149e-05,
	"loss": 0.7238,
	"step": 608
	},
	{
	"epoch": 0.7147887323943662,
	"grad_norm": 0.33784252405166626,
	"learning_rate": 5.7615112160566706e-05,
	"loss": 0.6792,
	"step": 609
	},
	{
	"epoch": 0.715962441314554,
	"grad_norm": 0.34995025396347046,
	"learning_rate": 5.737898465171193e-05,
	"loss": 0.7158,
	"step": 610
	},
	{
	"epoch": 0.7171361502347418,
	"grad_norm": 0.3586655259132385,
	"learning_rate": 5.714285714285714e-05,
	"loss": 0.7345,
	"step": 611
	},
	{
	"epoch": 0.7183098591549296,
	"grad_norm": 0.3490711450576782,
	"learning_rate": 5.6906729634002366e-05,
	"loss": 0.759,
	"step": 612
	},
	{
	"epoch": 0.7194835680751174,
	"grad_norm": 0.3405636250972748,
	"learning_rate": 5.6670602125147584e-05,
	"loss": 0.7069,
	"step": 613
	},
	{
	"epoch": 0.7206572769953051,
	"grad_norm": 0.3362460136413574,
	"learning_rate": 5.6434474616292796e-05,
	"loss": 0.7413,
	"step": 614
	},
	{
	"epoch": 0.721830985915493,
	"grad_norm": 0.3571033775806427,
	"learning_rate": 5.619834710743802e-05,
	"loss": 0.7138,
	"step": 615
	},
	{
	"epoch": 0.7230046948356808,
	"grad_norm": 0.33801379799842834,
	"learning_rate": 5.5962219598583245e-05,
	"loss": 0.7004,
	"step": 616
	},
	{
	"epoch": 0.7241784037558685,
	"grad_norm": 0.350063294172287,
	"learning_rate": 5.5726092089728456e-05,
	"loss": 0.7342,
	"step": 617
	},
	{
	"epoch": 0.7253521126760564,
	"grad_norm": 0.3471220135688782,
	"learning_rate": 5.5489964580873674e-05,
	"loss": 0.7591,
	"step": 618
	},
	{
	"epoch": 0.7265258215962441,
	"grad_norm": 0.3600592613220215,
	"learning_rate": 5.5253837072018886e-05,
	"loss": 0.7427,
	"step": 619
	},
	{
	"epoch": 0.7276995305164319,
	"grad_norm": 0.34294822812080383,
	"learning_rate": 5.501770956316411e-05,
	"loss": 0.7085,
	"step": 620
	},
	{
	"epoch": 0.7288732394366197,
	"grad_norm": 0.3481101989746094,
	"learning_rate": 5.4781582054309335e-05,
	"loss": 0.7465,
	"step": 621
	},
	{
	"epoch": 0.7300469483568075,
	"grad_norm": 0.3402861952781677,
	"learning_rate": 5.4545454545454546e-05,
	"loss": 0.7613,
	"step": 622
	},
	{
	"epoch": 0.7312206572769953,
	"grad_norm": 0.3475019335746765,
	"learning_rate": 5.4309327036599764e-05,
	"loss": 0.775,
	"step": 623
	},
	{
	"epoch": 0.7323943661971831,
	"grad_norm": 0.34003034234046936,
	"learning_rate": 5.407319952774499e-05,
	"loss": 0.6817,
	"step": 624
	},
	{
	"epoch": 0.7335680751173709,
	"grad_norm": 0.33620044589042664,
	"learning_rate": 5.38370720188902e-05,
	"loss": 0.7392,
	"step": 625
	},
	{
	"epoch": 0.7347417840375586,
	"grad_norm": 0.34645119309425354,
	"learning_rate": 5.3600944510035425e-05,
	"loss": 0.717,
	"step": 626
	},
	{
	"epoch": 0.7359154929577465,
	"grad_norm": 0.3485560417175293,
	"learning_rate": 5.336481700118064e-05,
	"loss": 0.7361,
	"step": 627
	},
	{
	"epoch": 0.7370892018779343,
	"grad_norm": 0.36997392773628235,
	"learning_rate": 5.3128689492325854e-05,
	"loss": 0.7264,
	"step": 628
	},
	{
	"epoch": 0.7382629107981221,
	"grad_norm": 0.3379404842853546,
	"learning_rate": 5.289256198347108e-05,
	"loss": 0.7303,
	"step": 629
	},
	{
	"epoch": 0.7394366197183099,
	"grad_norm": 0.3385223150253296,
	"learning_rate": 5.2656434474616304e-05,
	"loss": 0.7174,
	"step": 630
	},
	{
	"epoch": 0.7406103286384976,
	"grad_norm": 0.38303306698799133,
	"learning_rate": 5.2420306965761515e-05,
	"loss": 0.7539,
	"step": 631
	},
	{
	"epoch": 0.7417840375586855,
	"grad_norm": 0.3544706404209137,
	"learning_rate": 5.218417945690673e-05,
	"loss": 0.7108,
	"step": 632
	},
	{
	"epoch": 0.7429577464788732,
	"grad_norm": 0.35137131810188293,
	"learning_rate": 5.1948051948051944e-05,
	"loss": 0.7184,
	"step": 633
	},
	{
	"epoch": 0.744131455399061,
	"grad_norm": 0.35326629877090454,
	"learning_rate": 5.171192443919717e-05,
	"loss": 0.7114,
	"step": 634
	},
	{
	"epoch": 0.7453051643192489,
	"grad_norm": 0.35051414370536804,
	"learning_rate": 5.1475796930342393e-05,
	"loss": 0.6966,
	"step": 635
	},
	{
	"epoch": 0.7464788732394366,
	"grad_norm": 0.37491628527641296,
	"learning_rate": 5.1239669421487605e-05,
	"loss": 0.7061,
	"step": 636
	},
	{
	"epoch": 0.7476525821596244,
	"grad_norm": 0.37242433428764343,
	"learning_rate": 5.100354191263282e-05,
	"loss": 0.6904,
	"step": 637
	},
	{
	"epoch": 0.7488262910798122,
	"grad_norm": 0.376429945230484,
	"learning_rate": 5.076741440377805e-05,
	"loss": 0.7203,
	"step": 638
	},
	{
	"epoch": 0.75,
	"grad_norm": 0.34106218814849854,
	"learning_rate": 5.053128689492326e-05,
	"loss": 0.6878,
	"step": 639
	},
	{
	"epoch": 0.7511737089201878,
	"grad_norm": 0.37987956404685974,
	"learning_rate": 5.029515938606848e-05,
	"loss": 0.7835,
	"step": 640
	},
	{
	"epoch": 0.7523474178403756,
	"grad_norm": 0.355932354927063,
	"learning_rate": 5.00590318772137e-05,
	"loss": 0.7382,
	"step": 641
	},
	{
	"epoch": 0.7535211267605634,
	"grad_norm": 0.33495378494262695,
	"learning_rate": 4.982290436835891e-05,
	"loss": 0.7244,
	"step": 642
	},
	{
	"epoch": 0.7546948356807511,
	"grad_norm": 0.36573663353919983,
	"learning_rate": 4.958677685950414e-05,
	"loss": 0.7339,
	"step": 643
	},
	{
	"epoch": 0.755868544600939,
	"grad_norm": 0.34233418107032776,
	"learning_rate": 4.9350649350649355e-05,
	"loss": 0.7303,
	"step": 644
	},
	{
	"epoch": 0.7570422535211268,
	"grad_norm": 0.36358365416526794,
	"learning_rate": 4.9114521841794566e-05,
	"loss": 0.7169,
	"step": 645
	},
	{
	"epoch": 0.7582159624413145,
	"grad_norm": 0.3423750400543213,
	"learning_rate": 4.887839433293979e-05,
	"loss": 0.7413,
	"step": 646
	},
	{
	"epoch": 0.7593896713615024,
	"grad_norm": 0.34080007672309875,
	"learning_rate": 4.864226682408501e-05,
	"loss": 0.7319,
	"step": 647
	},
	{
	"epoch": 0.7605633802816901,
	"grad_norm": 0.35408544540405273,
	"learning_rate": 4.840613931523023e-05,
	"loss": 0.6895,
	"step": 648
	},
	{
	"epoch": 0.7617370892018779,
	"grad_norm": 0.34515753388404846,
	"learning_rate": 4.8170011806375445e-05,
	"loss": 0.7181,
	"step": 649
	},
	{
	"epoch": 0.7629107981220657,
	"grad_norm": 0.3446560502052307,
	"learning_rate": 4.793388429752066e-05,
	"loss": 0.7156,
	"step": 650
	},
	{
	"epoch": 0.7640845070422535,
	"grad_norm": 0.3451150357723236,
	"learning_rate": 4.769775678866588e-05,
	"loss": 0.7232,
	"step": 651
	},
	{
	"epoch": 0.7652582159624414,
	"grad_norm": 0.357740193605423,
	"learning_rate": 4.74616292798111e-05,
	"loss": 0.6872,
	"step": 652
	},
	{
	"epoch": 0.7664319248826291,
	"grad_norm": 0.3685015141963959,
	"learning_rate": 4.7225501770956324e-05,
	"loss": 0.735,
	"step": 653
	},
	{
	"epoch": 0.7676056338028169,
	"grad_norm": 0.3503192961215973,
	"learning_rate": 4.6989374262101535e-05,
	"loss": 0.7336,
	"step": 654
	},
	{
	"epoch": 0.7687793427230047,
	"grad_norm": 0.33453887701034546,
	"learning_rate": 4.675324675324675e-05,
	"loss": 0.7101,
	"step": 655
	},
	{
	"epoch": 0.7699530516431925,
	"grad_norm": 0.3708442747592926,
	"learning_rate": 4.651711924439197e-05,
	"loss": 0.7153,
	"step": 656
	},
	{
	"epoch": 0.7711267605633803,
	"grad_norm": 0.3736172318458557,
	"learning_rate": 4.6280991735537196e-05,
	"loss": 0.7071,
	"step": 657
	},
	{
	"epoch": 0.7723004694835681,
	"grad_norm": 0.35988256335258484,
	"learning_rate": 4.6044864226682414e-05,
	"loss": 0.7285,
	"step": 658
	},
	{
	"epoch": 0.7734741784037559,
	"grad_norm": 0.34314337372779846,
	"learning_rate": 4.5808736717827625e-05,
	"loss": 0.7137,
	"step": 659
	},
	{
	"epoch": 0.7746478873239436,
	"grad_norm": 0.3723309338092804,
	"learning_rate": 4.557260920897285e-05,
	"loss": 0.7391,
	"step": 660
	},
	{
	"epoch": 0.7758215962441315,
	"grad_norm": 0.3581268787384033,
	"learning_rate": 4.533648170011807e-05,
	"loss": 0.7157,
	"step": 661
	},
	{
	"epoch": 0.7769953051643192,
	"grad_norm": 0.36784443259239197,
	"learning_rate": 4.5100354191263286e-05,
	"loss": 0.6865,
	"step": 662
	},
	{
	"epoch": 0.778169014084507,
	"grad_norm": 0.36377546191215515,
	"learning_rate": 4.48642266824085e-05,
	"loss": 0.7437,
	"step": 663
	},
	{
	"epoch": 0.7793427230046949,
	"grad_norm": 0.349101722240448,
	"learning_rate": 4.462809917355372e-05,
	"loss": 0.7226,
	"step": 664
	},
	{
	"epoch": 0.7805164319248826,
	"grad_norm": 0.36608216166496277,
	"learning_rate": 4.439197166469894e-05,
	"loss": 0.7543,
	"step": 665
	},
	{
	"epoch": 0.7816901408450704,
	"grad_norm": 0.3495696783065796,
	"learning_rate": 4.415584415584416e-05,
	"loss": 0.708,
	"step": 666
	},
	{
	"epoch": 0.7828638497652582,
	"grad_norm": 0.3664140999317169,
	"learning_rate": 4.3919716646989375e-05,
	"loss": 0.7225,
	"step": 667
	},
	{
	"epoch": 0.784037558685446,
	"grad_norm": 0.3560849726200104,
	"learning_rate": 4.368358913813459e-05,
	"loss": 0.6972,
	"step": 668
	},
	{
	"epoch": 0.7852112676056338,
	"grad_norm": 0.3571857511997223,
	"learning_rate": 4.344746162927981e-05,
	"loss": 0.694,
	"step": 669
	},
	{
	"epoch": 0.7863849765258216,
	"grad_norm": 0.37072160840034485,
	"learning_rate": 4.321133412042503e-05,
	"loss": 0.7202,
	"step": 670
	},
	{
	"epoch": 0.7875586854460094,
	"grad_norm": 0.354948490858078,
	"learning_rate": 4.2975206611570254e-05,
	"loss": 0.7481,
	"step": 671
	},
	{
	"epoch": 0.7887323943661971,
	"grad_norm": 0.3736347258090973,
	"learning_rate": 4.2739079102715465e-05,
	"loss": 0.7261,
	"step": 672
	},
	{
	"epoch": 0.789906103286385,
	"grad_norm": 0.3690294623374939,
	"learning_rate": 4.250295159386068e-05,
	"loss": 0.7529,
	"step": 673
	},
	{
	"epoch": 0.7910798122065728,
	"grad_norm": 0.354192316532135,
	"learning_rate": 4.226682408500591e-05,
	"loss": 0.7176,
	"step": 674
	},
	{
	"epoch": 0.7922535211267606,
	"grad_norm": 0.355185866355896,
	"learning_rate": 4.2030696576151126e-05,
	"loss": 0.7099,
	"step": 675
	},
	{
	"epoch": 0.7934272300469484,
	"grad_norm": 0.3503565490245819,
	"learning_rate": 4.1794569067296344e-05,
	"loss": 0.7072,
	"step": 676
	},
	{
	"epoch": 0.7946009389671361,
	"grad_norm": 0.3727845549583435,
	"learning_rate": 4.155844155844156e-05,
	"loss": 0.7334,
	"step": 677
	},
	{
	"epoch": 0.795774647887324,
	"grad_norm": 0.33894312381744385,
	"learning_rate": 4.132231404958678e-05,
	"loss": 0.6946,
	"step": 678
	},
	{
	"epoch": 0.7969483568075117,
	"grad_norm": 0.3385523855686188,
	"learning_rate": 4.1086186540732e-05,
	"loss": 0.7096,
	"step": 679
	},
	{
	"epoch": 0.7981220657276995,
	"grad_norm": 0.3488437235355377,
	"learning_rate": 4.0850059031877216e-05,
	"loss": 0.6942,
	"step": 680
	},
	{
	"epoch": 0.7992957746478874,
	"grad_norm": 0.34666576981544495,
	"learning_rate": 4.0613931523022434e-05,
	"loss": 0.7329,
	"step": 681
	},
	{
	"epoch": 0.8004694835680751,
	"grad_norm": 0.3557136356830597,
	"learning_rate": 4.037780401416765e-05,
	"loss": 0.7655,
	"step": 682
	},
	{
	"epoch": 0.8016431924882629,
	"grad_norm": 0.3647683262825012,
	"learning_rate": 4.014167650531287e-05,
	"loss": 0.7578,
	"step": 683
	},
	{
	"epoch": 0.8028169014084507,
	"grad_norm": 0.3452191650867462,
	"learning_rate": 3.9905548996458095e-05,
	"loss": 0.7145,
	"step": 684
	},
	{
	"epoch": 0.8039906103286385,
	"grad_norm": 0.3540481925010681,
	"learning_rate": 3.9669421487603306e-05,
	"loss": 0.7347,
	"step": 685
	},
	{
	"epoch": 0.8051643192488263,
	"grad_norm": 0.3536418378353119,
	"learning_rate": 3.9433293978748524e-05,
	"loss": 0.7103,
	"step": 686
	},
	{
	"epoch": 0.8063380281690141,
	"grad_norm": 0.34728798270225525,
	"learning_rate": 3.919716646989374e-05,
	"loss": 0.7376,
	"step": 687
	},
	{
	"epoch": 0.8075117370892019,
	"grad_norm": 0.354643851518631,
	"learning_rate": 3.8961038961038966e-05,
	"loss": 0.7223,
	"step": 688
	},
	{
	"epoch": 0.8086854460093896,
	"grad_norm": 0.3438583016395569,
	"learning_rate": 3.8724911452184184e-05,
	"loss": 0.6906,
	"step": 689
	},
	{
	"epoch": 0.8098591549295775,
	"grad_norm": 0.34713107347488403,
	"learning_rate": 3.8488783943329396e-05,
	"loss": 0.7361,
	"step": 690
	},
	{
	"epoch": 0.8110328638497653,
	"grad_norm": 0.3483150005340576,
	"learning_rate": 3.825265643447462e-05,
	"loss": 0.7016,
	"step": 691
	},
	{
	"epoch": 0.812206572769953,
	"grad_norm": 0.34848445653915405,
	"learning_rate": 3.801652892561984e-05,
	"loss": 0.6966,
	"step": 692
	},
	{
	"epoch": 0.8133802816901409,
	"grad_norm": 0.34223318099975586,
	"learning_rate": 3.7780401416765056e-05,
	"loss": 0.7088,
	"step": 693
	},
	{
	"epoch": 0.8145539906103286,
	"grad_norm": 0.33693239092826843,
	"learning_rate": 3.7544273907910274e-05,
	"loss": 0.7108,
	"step": 694
	},
	{
	"epoch": 0.8157276995305164,
	"grad_norm": 0.34613272547721863,
	"learning_rate": 3.730814639905549e-05,
	"loss": 0.7075,
	"step": 695
	},
	{
	"epoch": 0.8169014084507042,
	"grad_norm": 0.3430733382701874,
	"learning_rate": 3.707201889020071e-05,
	"loss": 0.7246,
	"step": 696
	},
	{
	"epoch": 0.818075117370892,
	"grad_norm": 0.35237351059913635,
	"learning_rate": 3.683589138134593e-05,
	"loss": 0.6918,
	"step": 697
	},
	{
	"epoch": 0.8192488262910798,
	"grad_norm": 0.3375650644302368,
	"learning_rate": 3.659976387249115e-05,
	"loss": 0.6978,
	"step": 698
	},
	{
	"epoch": 0.8204225352112676,
	"grad_norm": 0.3585062026977539,
	"learning_rate": 3.6363636363636364e-05,
	"loss": 0.7241,
	"step": 699
	},
	{
	"epoch": 0.8215962441314554,
	"grad_norm": 0.35660460591316223,
	"learning_rate": 3.612750885478158e-05,
	"loss": 0.6946,
	"step": 700
	},
	{
	"epoch": 0.8227699530516432,
	"grad_norm": 0.3468845784664154,
	"learning_rate": 3.58913813459268e-05,
	"loss": 0.7535,
	"step": 701
	},
	{
	"epoch": 0.823943661971831,
	"grad_norm": 0.365291029214859,
	"learning_rate": 3.5655253837072025e-05,
	"loss": 0.7438,
	"step": 702
	},
	{
	"epoch": 0.8251173708920188,
	"grad_norm": 0.353506863117218,
	"learning_rate": 3.5419126328217236e-05,
	"loss": 0.7359,
	"step": 703
	},
	{
	"epoch": 0.8262910798122066,
	"grad_norm": 0.381610244512558,
	"learning_rate": 3.5182998819362454e-05,
	"loss": 0.7821,
	"step": 704
	},
	{
	"epoch": 0.8274647887323944,
	"grad_norm": 0.37710806727409363,
	"learning_rate": 3.494687131050768e-05,
	"loss": 0.7349,
	"step": 705
	},
	{
	"epoch": 0.8286384976525821,
	"grad_norm": 0.361545592546463,
	"learning_rate": 3.47107438016529e-05,
	"loss": 0.7229,
	"step": 706
	},
	{
	"epoch": 0.82981220657277,
	"grad_norm": 0.3615299463272095,
	"learning_rate": 3.4474616292798115e-05,
	"loss": 0.748,
	"step": 707
	},
	{
	"epoch": 0.8309859154929577,
	"grad_norm": 0.3437252342700958,
	"learning_rate": 3.4238488783943326e-05,
	"loss": 0.7165,
	"step": 708
	},
	{
	"epoch": 0.8321596244131455,
	"grad_norm": 0.35603129863739014,
	"learning_rate": 3.400236127508855e-05,
	"loss": 0.7373,
	"step": 709
	},
	{
	"epoch": 0.8333333333333334,
	"grad_norm": 0.3586898446083069,
	"learning_rate": 3.376623376623377e-05,
	"loss": 0.7056,
	"step": 710
	},
	{
	"epoch": 0.8345070422535211,
	"grad_norm": 0.3558507263660431,
	"learning_rate": 3.353010625737899e-05,
	"loss": 0.742,
	"step": 711
	},
	{
	"epoch": 0.8356807511737089,
	"grad_norm": 0.3359735608100891,
	"learning_rate": 3.3293978748524205e-05,
	"loss": 0.6994,
	"step": 712
	},
	{
	"epoch": 0.8368544600938967,
	"grad_norm": 0.34250345826148987,
	"learning_rate": 3.305785123966942e-05,
	"loss": 0.6762,
	"step": 713
	},
	{
	"epoch": 0.8380281690140845,
	"grad_norm": 0.38417667150497437,
	"learning_rate": 3.282172373081464e-05,
	"loss": 0.7213,
	"step": 714
	},
	{
	"epoch": 0.8392018779342723,
	"grad_norm": 0.3643978536128998,
	"learning_rate": 3.258559622195986e-05,
	"loss": 0.6884,
	"step": 715
	},
	{
	"epoch": 0.8403755868544601,
	"grad_norm": 0.3544299602508545,
	"learning_rate": 3.234946871310508e-05,
	"loss": 0.712,
	"step": 716
	},
	{
	"epoch": 0.8415492957746479,
	"grad_norm": 0.36903661489486694,
	"learning_rate": 3.2113341204250294e-05,
	"loss": 0.7227,
	"step": 717
	},
	{
	"epoch": 0.8427230046948356,
	"grad_norm": 0.3557377755641937,
	"learning_rate": 3.187721369539551e-05,
	"loss": 0.6904,
	"step": 718
	},
	{
	"epoch": 0.8438967136150235,
	"grad_norm": 0.36762547492980957,
	"learning_rate": 3.164108618654074e-05,
	"loss": 0.7469,
	"step": 719
	},
	{
	"epoch": 0.8450704225352113,
	"grad_norm": 0.35805556178092957,
	"learning_rate": 3.1404958677685955e-05,
	"loss": 0.7443,
	"step": 720
	},
	{
	"epoch": 0.846244131455399,
	"grad_norm": 0.38130536675453186,
	"learning_rate": 3.1168831168831166e-05,
	"loss": 0.7664,
	"step": 721
	},
	{
	"epoch": 0.8474178403755869,
	"grad_norm": 0.3599521219730377,
	"learning_rate": 3.0932703659976384e-05,
	"loss": 0.7065,
	"step": 722
	},
	{
	"epoch": 0.8485915492957746,
	"grad_norm": 0.3531062602996826,
	"learning_rate": 3.069657615112161e-05,
	"loss": 0.7451,
	"step": 723
	},
	{
	"epoch": 0.8497652582159625,
	"grad_norm": 0.36916878819465637,
	"learning_rate": 3.0460448642266827e-05,
	"loss": 0.7077,
	"step": 724
	},
	{
	"epoch": 0.8509389671361502,
	"grad_norm": 0.38139578700065613,
	"learning_rate": 3.022432113341204e-05,
	"loss": 0.7452,
	"step": 725
	},
	{
	"epoch": 0.852112676056338,
	"grad_norm": 0.337944358587265,
	"learning_rate": 2.9988193624557266e-05,
	"loss": 0.6596,
	"step": 726
	},
	{
	"epoch": 0.8532863849765259,
	"grad_norm": 0.36196213960647583,
	"learning_rate": 2.975206611570248e-05,
	"loss": 0.7081,
	"step": 727
	},
	{
	"epoch": 0.8544600938967136,
	"grad_norm": 0.34913602471351624,
	"learning_rate": 2.95159386068477e-05,
	"loss": 0.6901,
	"step": 728
	},
	{
	"epoch": 0.8556338028169014,
	"grad_norm": 0.343414843082428,
	"learning_rate": 2.9279811097992914e-05,
	"loss": 0.675,
	"step": 729
	},
	{
	"epoch": 0.8568075117370892,
	"grad_norm": 0.3704102039337158,
	"learning_rate": 2.9043683589138138e-05,
	"loss": 0.7566,
	"step": 730
	},
	{
	"epoch": 0.857981220657277,
	"grad_norm": 0.3464911878108978,
	"learning_rate": 2.8807556080283353e-05,
	"loss": 0.6872,
	"step": 731
	},
	{
	"epoch": 0.8591549295774648,
	"grad_norm": 0.3615940511226654,
	"learning_rate": 2.857142857142857e-05,
	"loss": 0.7755,
	"step": 732
	},
	{
	"epoch": 0.8603286384976526,
	"grad_norm": 0.35284191370010376,
	"learning_rate": 2.8335301062573792e-05,
	"loss": 0.7483,
	"step": 733
	},
	{
	"epoch": 0.8615023474178404,
	"grad_norm": 0.3469059467315674,
	"learning_rate": 2.809917355371901e-05,
	"loss": 0.6902,
	"step": 734
	},
	{
	"epoch": 0.8626760563380281,
	"grad_norm": 0.35148003697395325,
	"learning_rate": 2.7863046044864228e-05,
	"loss": 0.732,
	"step": 735
	},
	{
	"epoch": 0.863849765258216,
	"grad_norm": 0.3533206880092621,
	"learning_rate": 2.7626918536009443e-05,
	"loss": 0.7287,
	"step": 736
	},
	{
	"epoch": 0.8650234741784038,
	"grad_norm": 0.383095383644104,
	"learning_rate": 2.7390791027154668e-05,
	"loss": 0.8017,
	"step": 737
	},
	{
	"epoch": 0.8661971830985915,
	"grad_norm": 0.3541397452354431,
	"learning_rate": 2.7154663518299882e-05,
	"loss": 0.7291,
	"step": 738
	},
	{
	"epoch": 0.8673708920187794,
	"grad_norm": 0.35989582538604736,
	"learning_rate": 2.69185360094451e-05,
	"loss": 0.7211,
	"step": 739
	},
	{
	"epoch": 0.8685446009389671,
	"grad_norm": 0.34245404601097107,
	"learning_rate": 2.668240850059032e-05,
	"loss": 0.7062,
	"step": 740
	},
	{
	"epoch": 0.8697183098591549,
	"grad_norm": 0.3396112024784088,
	"learning_rate": 2.644628099173554e-05,
	"loss": 0.6946,
	"step": 741
	},
	{
	"epoch": 0.8708920187793427,
	"grad_norm": 0.34901162981987,
	"learning_rate": 2.6210153482880757e-05,
	"loss": 0.7742,
	"step": 742
	},
	{
	"epoch": 0.8720657276995305,
	"grad_norm": 0.3654363453388214,
	"learning_rate": 2.5974025974025972e-05,
	"loss": 0.7894,
	"step": 743
	},
	{
	"epoch": 0.8732394366197183,
	"grad_norm": 0.3478833734989166,
	"learning_rate": 2.5737898465171197e-05,
	"loss": 0.6909,
	"step": 744
	},
	{
	"epoch": 0.8744131455399061,
	"grad_norm": 0.3447161912918091,
	"learning_rate": 2.550177095631641e-05,
	"loss": 0.7166,
	"step": 745
	},
	{
	"epoch": 0.8755868544600939,
	"grad_norm": 0.35436901450157166,
	"learning_rate": 2.526564344746163e-05,
	"loss": 0.6962,
	"step": 746
	},
	{
	"epoch": 0.8767605633802817,
	"grad_norm": 0.3359661400318146,
	"learning_rate": 2.502951593860685e-05,
	"loss": 0.7345,
	"step": 747
	},
	{
	"epoch": 0.8779342723004695,
	"grad_norm": 0.35876211524009705,
	"learning_rate": 2.479338842975207e-05,
	"loss": 0.6723,
	"step": 748
	},
	{
	"epoch": 0.8791079812206573,
	"grad_norm": 0.35507625341415405,
	"learning_rate": 2.4557260920897283e-05,
	"loss": 0.6744,
	"step": 749
	},
	{
	"epoch": 0.8802816901408451,
	"grad_norm": 0.3504907786846161,
	"learning_rate": 2.4321133412042505e-05,
	"loss": 0.7281,
	"step": 750
	},
	{
	"epoch": 0.8814553990610329,
	"grad_norm": 0.3498130440711975,
	"learning_rate": 2.4085005903187723e-05,
	"loss": 0.7079,
	"step": 751
	},
	{
	"epoch": 0.8826291079812206,
	"grad_norm": 0.36793026328086853,
	"learning_rate": 2.384887839433294e-05,
	"loss": 0.747,
	"step": 752
	},
	{
	"epoch": 0.8838028169014085,
	"grad_norm": 0.3484232723712921,
	"learning_rate": 2.3612750885478162e-05,
	"loss": 0.7347,
	"step": 753
	},
	{
	"epoch": 0.8849765258215962,
	"grad_norm": 0.34402692317962646,
	"learning_rate": 2.3376623376623376e-05,
	"loss": 0.6717,
	"step": 754
	},
	{
	"epoch": 0.886150234741784,
	"grad_norm": 0.377380907535553,
	"learning_rate": 2.3140495867768598e-05,
	"loss": 0.7642,
	"step": 755
	},
	{
	"epoch": 0.8873239436619719,
	"grad_norm": 0.361382395029068,
	"learning_rate": 2.2904368358913812e-05,
	"loss": 0.7081,
	"step": 756
	},
	{
	"epoch": 0.8884976525821596,
	"grad_norm": 0.3643784821033478,
	"learning_rate": 2.2668240850059034e-05,
	"loss": 0.7219,
	"step": 757
	},
	{
	"epoch": 0.8896713615023474,
	"grad_norm": 0.3974801301956177,
	"learning_rate": 2.243211334120425e-05,
	"loss": 0.712,
	"step": 758
	},
	{
	"epoch": 0.8908450704225352,
	"grad_norm": 0.35573598742485046,
	"learning_rate": 2.219598583234947e-05,
	"loss": 0.7335,
	"step": 759
	},
	{
	"epoch": 0.892018779342723,
	"grad_norm": 0.3532857596874237,
	"learning_rate": 2.1959858323494688e-05,
	"loss": 0.7013,
	"step": 760
	},
	{
	"epoch": 0.8931924882629108,
	"grad_norm": 0.33362728357315063,
	"learning_rate": 2.1723730814639906e-05,
	"loss": 0.6739,
	"step": 761
	},
	{
	"epoch": 0.8943661971830986,
	"grad_norm": 0.3325813412666321,
	"learning_rate": 2.1487603305785127e-05,
	"loss": 0.7099,
	"step": 762
	},
	{
	"epoch": 0.8955399061032864,
	"grad_norm": 0.3451225459575653,
	"learning_rate": 2.125147579693034e-05,
	"loss": 0.6959,
	"step": 763
	},
	{
	"epoch": 0.8967136150234741,
	"grad_norm": 0.3604796528816223,
	"learning_rate": 2.1015348288075563e-05,
	"loss": 0.737,
	"step": 764
	},
	{
	"epoch": 0.897887323943662,
	"grad_norm": 0.34980282187461853,
	"learning_rate": 2.077922077922078e-05,
	"loss": 0.7206,
	"step": 765
	},
	{
	"epoch": 0.8990610328638498,
	"grad_norm": 0.35130617022514343,
	"learning_rate": 2.0543093270366e-05,
	"loss": 0.7153,
	"step": 766
	},
	{
	"epoch": 0.9002347417840375,
	"grad_norm": 0.34524810314178467,
	"learning_rate": 2.0306965761511217e-05,
	"loss": 0.7237,
	"step": 767
	},
	{
	"epoch": 0.9014084507042254,
	"grad_norm": 0.35661572217941284,
	"learning_rate": 2.0070838252656435e-05,
	"loss": 0.6831,
	"step": 768
	},
	{
	"epoch": 0.9025821596244131,
	"grad_norm": 0.35206255316734314,
	"learning_rate": 1.9834710743801653e-05,
	"loss": 0.7721,
	"step": 769
	},
	{
	"epoch": 0.903755868544601,
	"grad_norm": 0.35439351201057434,
	"learning_rate": 1.959858323494687e-05,
	"loss": 0.7142,
	"step": 770
	},
	{
	"epoch": 0.9049295774647887,
	"grad_norm": 0.33722493052482605,
	"learning_rate": 1.9362455726092092e-05,
	"loss": 0.6968,
	"step": 771
	},
	{
	"epoch": 0.9061032863849765,
	"grad_norm": 0.3573172092437744,
	"learning_rate": 1.912632821723731e-05,
	"loss": 0.7301,
	"step": 772
	},
	{
	"epoch": 0.9072769953051644,
	"grad_norm": 0.3347008526325226,
	"learning_rate": 1.8890200708382528e-05,
	"loss": 0.6721,
	"step": 773
	},
	{
	"epoch": 0.9084507042253521,
	"grad_norm": 0.3563063144683838,
	"learning_rate": 1.8654073199527746e-05,
	"loss": 0.7233,
	"step": 774
	},
	{
	"epoch": 0.9096244131455399,
	"grad_norm": 0.35159915685653687,
	"learning_rate": 1.8417945690672964e-05,
	"loss": 0.7184,
	"step": 775
	},
	{
	"epoch": 0.9107981220657277,
	"grad_norm": 0.35826948285102844,
	"learning_rate": 1.8181818181818182e-05,
	"loss": 0.7301,
	"step": 776
	},
	{
	"epoch": 0.9119718309859155,
	"grad_norm": 0.3533133268356323,
	"learning_rate": 1.79456906729634e-05,
	"loss": 0.7373,
	"step": 777
	},
	{
	"epoch": 0.9131455399061033,
	"grad_norm": 0.3495820164680481,
	"learning_rate": 1.7709563164108618e-05,
	"loss": 0.7379,
	"step": 778
	},
	{
	"epoch": 0.9143192488262911,
	"grad_norm": 0.33082085847854614,
	"learning_rate": 1.747343565525384e-05,
	"loss": 0.6789,
	"step": 779
	},
	{
	"epoch": 0.9154929577464789,
	"grad_norm": 0.34669029712677,
	"learning_rate": 1.7237308146399057e-05,
	"loss": 0.6962,
	"step": 780
	},
	{
	"epoch": 0.9166666666666666,
	"grad_norm": 0.3401969373226166,
	"learning_rate": 1.7001180637544275e-05,
	"loss": 0.717,
	"step": 781
	},
	{
	"epoch": 0.9178403755868545,
	"grad_norm": 0.3488728702068329,
	"learning_rate": 1.6765053128689493e-05,
	"loss": 0.7087,
	"step": 782
	},
	{
	"epoch": 0.9190140845070423,
	"grad_norm": 0.39244547486305237,
	"learning_rate": 1.652892561983471e-05,
	"loss": 0.7331,
	"step": 783
	},
	{
	"epoch": 0.92018779342723,
	"grad_norm": 0.33185505867004395,
	"learning_rate": 1.629279811097993e-05,
	"loss": 0.6821,
	"step": 784
	},
	{
	"epoch": 0.9213615023474179,
	"grad_norm": 0.34186288714408875,
	"learning_rate": 1.6056670602125147e-05,
	"loss": 0.6766,
	"step": 785
	},
	{
	"epoch": 0.9225352112676056,
	"grad_norm": 0.34512627124786377,
	"learning_rate": 1.582054309327037e-05,
	"loss": 0.6837,
	"step": 786
	},
	{
	"epoch": 0.9237089201877934,
	"grad_norm": 0.34042122960090637,
	"learning_rate": 1.5584415584415583e-05,
	"loss": 0.7266,
	"step": 787
	},
	{
	"epoch": 0.9248826291079812,
	"grad_norm": 0.34173402190208435,
	"learning_rate": 1.5348288075560805e-05,
	"loss": 0.6998,
	"step": 788
	},
	{
	"epoch": 0.926056338028169,
	"grad_norm": 0.34008073806762695,
	"learning_rate": 1.511216056670602e-05,
	"loss": 0.7211,
	"step": 789
	},
	{
	"epoch": 0.9272300469483568,
	"grad_norm": 0.3400252163410187,
	"learning_rate": 1.487603305785124e-05,
	"loss": 0.6771,
	"step": 790
	},
	{
	"epoch": 0.9284037558685446,
	"grad_norm": 0.3393029570579529,
	"learning_rate": 1.4639905548996457e-05,
	"loss": 0.7274,
	"step": 791
	},
	{
	"epoch": 0.9295774647887324,
	"grad_norm": 0.3489772379398346,
	"learning_rate": 1.4403778040141676e-05,
	"loss": 0.7195,
	"step": 792
	},
	{
	"epoch": 0.9307511737089202,
	"grad_norm": 0.3434072732925415,
	"learning_rate": 1.4167650531286896e-05,
	"loss": 0.6806,
	"step": 793
	},
	{
	"epoch": 0.931924882629108,
	"grad_norm": 0.35593146085739136,
	"learning_rate": 1.3931523022432114e-05,
	"loss": 0.7026,
	"step": 794
	},
	{
	"epoch": 0.9330985915492958,
	"grad_norm": 0.33654287457466125,
	"learning_rate": 1.3695395513577334e-05,
	"loss": 0.6655,
	"step": 795
	},
	{
	"epoch": 0.9342723004694836,
	"grad_norm": 0.35049983859062195,
	"learning_rate": 1.345926800472255e-05,
	"loss": 0.686,
	"step": 796
	},
	{
	"epoch": 0.9354460093896714,
	"grad_norm": 0.3442087471485138,
	"learning_rate": 1.322314049586777e-05,
	"loss": 0.7048,
	"step": 797
	},
	{
	"epoch": 0.9366197183098591,
	"grad_norm": 0.3569439649581909,
	"learning_rate": 1.2987012987012986e-05,
	"loss": 0.7271,
	"step": 798
	},
	{
	"epoch": 0.937793427230047,
	"grad_norm": 0.3418942391872406,
	"learning_rate": 1.2750885478158206e-05,
	"loss": 0.7132,
	"step": 799
	},
	{
	"epoch": 0.9389671361502347,
	"grad_norm": 0.3399513363838196,
	"learning_rate": 1.2514757969303425e-05,
	"loss": 0.7046,
	"step": 800
	},
	{
	"epoch": 0.9401408450704225,
	"grad_norm": 0.34055379033088684,
	"learning_rate": 0.00010641553855208948,
	"loss": 0.7293,
	"step": 801
	},
	{
	"epoch": 0.9413145539906104,
	"grad_norm": 0.3299119770526886,
	"learning_rate": 0.0001062978222483814,
	"loss": 0.6779,
	"step": 802
	},
	{
	"epoch": 0.9424882629107981,
	"grad_norm": 0.3833242356777191,
	"learning_rate": 0.00010618010594467334,
	"loss": 0.6909,
	"step": 803
	},
	{
	"epoch": 0.9436619718309859,
	"grad_norm": 0.39958855509757996,
	"learning_rate": 0.00010606238964096529,
	"loss": 0.7307,
	"step": 804
	},
	{
	"epoch": 0.9448356807511737,
	"grad_norm": 0.38618725538253784,
	"learning_rate": 0.00010594467333725722,
	"loss": 0.6984,
	"step": 805
	},
	{
	"epoch": 0.9460093896713615,
	"grad_norm": 0.4084942936897278,
	"learning_rate": 0.00010582695703354914,
	"loss": 0.7456,
	"step": 806
	},
	{
	"epoch": 0.9471830985915493,
	"grad_norm": 0.4109421372413635,
	"learning_rate": 0.00010570924072984109,
	"loss": 0.6991,
	"step": 807
	},
	{
	"epoch": 0.9483568075117371,
	"grad_norm": 0.382415771484375,
	"learning_rate": 0.00010559152442613303,
	"loss": 0.726,
	"step": 808
	},
	{
	"epoch": 0.9495305164319249,
	"grad_norm": 0.4036392867565155,
	"learning_rate": 0.00010547380812242496,
	"loss": 0.7264,
	"step": 809
	},
	{
	"epoch": 0.9507042253521126,
	"grad_norm": 0.38903331756591797,
	"learning_rate": 0.00010535609181871691,
	"loss": 0.691,
	"step": 810
	},
	{
	"epoch": 0.9518779342723005,
	"grad_norm": 0.3803318440914154,
	"learning_rate": 0.00010523837551500883,
	"loss": 0.7271,
	"step": 811
	},
	{
	"epoch": 0.9530516431924883,
	"grad_norm": 0.3850460350513458,
	"learning_rate": 0.00010512065921130076,
	"loss": 0.7111,
	"step": 812
	},
	{
	"epoch": 0.954225352112676,
	"grad_norm": 0.4110994040966034,
	"learning_rate": 0.00010500294290759271,
	"loss": 0.7282,
	"step": 813
	},
	{
	"epoch": 0.9553990610328639,
	"grad_norm": 0.3853722810745239,
	"learning_rate": 0.00010488522660388465,
	"loss": 0.7194,
	"step": 814
	},
	{
	"epoch": 0.9565727699530516,
	"grad_norm": 0.37440797686576843,
	"learning_rate": 0.00010476751030017658,
	"loss": 0.7116,
	"step": 815
	},
	{
	"epoch": 0.9577464788732394,
	"grad_norm": 0.42637899518013,
	"learning_rate": 0.00010464979399646853,
	"loss": 0.7189,
	"step": 816
	},
	{
	"epoch": 0.9589201877934272,
	"grad_norm": 0.4067356288433075,
	"learning_rate": 0.00010453207769276045,
	"loss": 0.7509,
	"step": 817
	},
	{
	"epoch": 0.960093896713615,
	"grad_norm": 0.3854503929615021,
	"learning_rate": 0.00010441436138905238,
	"loss": 0.7426,
	"step": 818
	},
	{
	"epoch": 0.9612676056338029,
	"grad_norm": 0.4298991858959198,
	"learning_rate": 0.00010429664508534433,
	"loss": 0.7528,
	"step": 819
	},
	{
	"epoch": 0.9624413145539906,
	"grad_norm": 0.3748774826526642,
	"learning_rate": 0.00010417892878163627,
	"loss": 0.6512,
	"step": 820
	},
	{
	"epoch": 0.9636150234741784,
	"grad_norm": 0.38448989391326904,
	"learning_rate": 0.00010406121247792819,
	"loss": 0.6929,
	"step": 821
	},
	{
	"epoch": 0.9647887323943662,
	"grad_norm": 0.42416030168533325,
	"learning_rate": 0.00010394349617422015,
	"loss": 0.7312,
	"step": 822
	},
	{
	"epoch": 0.965962441314554,
	"grad_norm": 0.3875625729560852,
	"learning_rate": 0.00010382577987051207,
	"loss": 0.7121,
	"step": 823
	},
	{
	"epoch": 0.9671361502347418,
	"grad_norm": 0.4241638481616974,
	"learning_rate": 0.000103708063566804,
	"loss": 0.7248,
	"step": 824
	},
	{
	"epoch": 0.9683098591549296,
	"grad_norm": 0.4026165306568146,
	"learning_rate": 0.00010359034726309595,
	"loss": 0.7224,
	"step": 825
	},
	{
	"epoch": 0.9694835680751174,
	"grad_norm": 0.39895206689834595,
	"learning_rate": 0.00010347263095938789,
	"loss": 0.7193,
	"step": 826
	},
	{
	"epoch": 0.9706572769953051,
	"grad_norm": 0.395463228225708,
	"learning_rate": 0.00010335491465567981,
	"loss": 0.7673,
	"step": 827
	},
	{
	"epoch": 0.971830985915493,
	"grad_norm": 0.4351494312286377,
	"learning_rate": 0.00010323719835197174,
	"loss": 0.7684,
	"step": 828
	},
	{
	"epoch": 0.9730046948356808,
	"grad_norm": 0.4378681182861328,
	"learning_rate": 0.00010311948204826369,
	"loss": 0.7277,
	"step": 829
	},
	{
	"epoch": 0.9741784037558685,
	"grad_norm": 0.4214630722999573,
	"learning_rate": 0.00010300176574455563,
	"loss": 0.7107,
	"step": 830
	},
	{
	"epoch": 0.9753521126760564,
	"grad_norm": 0.41999107599258423,
	"learning_rate": 0.00010288404944084755,
	"loss": 0.7328,
	"step": 831
	},
	{
	"epoch": 0.9765258215962441,
	"grad_norm": 0.49026909470558167,
	"learning_rate": 0.00010276633313713951,
	"loss": 0.7345,
	"step": 832
	},
	{
	"epoch": 0.9776995305164319,
	"grad_norm": 0.4068211317062378,
	"learning_rate": 0.00010264861683343143,
	"loss": 0.701,
	"step": 833
	},
	{
	"epoch": 0.9788732394366197,
	"grad_norm": 0.42514288425445557,
	"learning_rate": 0.00010253090052972336,
	"loss": 0.729,
	"step": 834
	},
	{
	"epoch": 0.9800469483568075,
	"grad_norm": 0.4883005619049072,
	"learning_rate": 0.00010241318422601531,
	"loss": 0.7183,
	"step": 835
	},
	{
	"epoch": 0.9812206572769953,
	"grad_norm": 0.38146787881851196,
	"learning_rate": 0.00010229546792230725,
	"loss": 0.6977,
	"step": 836
	},
	{
	"epoch": 0.9823943661971831,
	"grad_norm": 0.3898909389972687,
	"learning_rate": 0.00010217775161859917,
	"loss": 0.7131,
	"step": 837
	},
	{
	"epoch": 0.9835680751173709,
	"grad_norm": 0.39693424105644226,
	"learning_rate": 0.00010206003531489112,
	"loss": 0.7184,
	"step": 838
	},
	{
	"epoch": 0.9847417840375586,
	"grad_norm": 0.3968975841999054,
	"learning_rate": 0.00010194231901118305,
	"loss": 0.7536,
	"step": 839
	},
	{
	"epoch": 0.9859154929577465,
	"grad_norm": 0.4030087888240814,
	"learning_rate": 0.00010182460270747499,
	"loss": 0.7156,
	"step": 840
	},
	{
	"epoch": 0.9870892018779343,
	"grad_norm": 0.37477344274520874,
	"learning_rate": 0.00010170688640376693,
	"loss": 0.6815,
	"step": 841
	},
	{
	"epoch": 0.9882629107981221,
	"grad_norm": 0.40929409861564636,
	"learning_rate": 0.00010158917010005887,
	"loss": 0.6827,
	"step": 842
	},
	{
	"epoch": 0.9894366197183099,
	"grad_norm": 0.36350882053375244,
	"learning_rate": 0.00010147145379635079,
	"loss": 0.6927,
	"step": 843
	},
	{
	"epoch": 0.9906103286384976,
	"grad_norm": 0.3828059434890747,
	"learning_rate": 0.00010135373749264274,
	"loss": 0.7254,
	"step": 844
	},
	{
	"epoch": 0.9917840375586855,
	"grad_norm": 0.4095743000507355,
	"learning_rate": 0.00010123602118893467,
	"loss": 0.719,
	"step": 845
	},
	{
	"epoch": 0.9929577464788732,
	"grad_norm": 0.37418296933174133,
	"learning_rate": 0.0001011183048852266,
	"loss": 0.682,
	"step": 846
	},
	{
	"epoch": 0.994131455399061,
	"grad_norm": 0.39427751302719116,
	"learning_rate": 0.00010100058858151855,
	"loss": 0.7742,
	"step": 847
	},
	{
	"epoch": 0.9953051643192489,
	"grad_norm": 0.3696395754814148,
	"learning_rate": 0.00010088287227781048,
	"loss": 0.7377,
	"step": 848
	},
	{
	"epoch": 0.9964788732394366,
	"grad_norm": 0.36249879002571106,
	"learning_rate": 0.00010076515597410241,
	"loss": 0.7237,
	"step": 849
	},
	{
	"epoch": 0.9976525821596244,
	"grad_norm": 0.3712272047996521,
	"learning_rate": 0.00010064743967039436,
	"loss": 0.6737,
	"step": 850
	},
	{
	"epoch": 0.9988262910798122,
	"grad_norm": 0.37550613284111023,
	"learning_rate": 0.00010052972336668629,
	"loss": 0.7147,
	"step": 851
	},
	{
	"epoch": 1.0,
	"grad_norm": 0.405351459980011,
	"learning_rate": 0.00010041200706297821,
	"loss": 0.7364,
	"step": 852
	},
	{
	"epoch": 1.0011737089201878,
	"grad_norm": 0.39747750759124756,
	"learning_rate": 0.00010029429075927018,
	"loss": 0.6934,
	"step": 853
	},
	{
	"epoch": 1.0023474178403755,
	"grad_norm": 0.3695623576641083,
	"learning_rate": 0.0001001765744555621,
	"loss": 0.6971,
	"step": 854
	},
	{
	"epoch": 1.0035211267605635,
	"grad_norm": 0.3880208134651184,
	"learning_rate": 0.00010005885815185403,
	"loss": 0.7219,
	"step": 855
	},
	{
	"epoch": 1.0046948356807512,
	"grad_norm": 0.40131011605262756,
	"learning_rate": 9.994114184814597e-05,
	"loss": 0.6925,
	"step": 856
	},
	{
	"epoch": 1.005868544600939,
	"grad_norm": 0.38630256056785583,
	"learning_rate": 9.982342554443791e-05,
	"loss": 0.7412,
	"step": 857
	},
	{
	"epoch": 1.0070422535211268,
	"grad_norm": 0.39141979813575745,
	"learning_rate": 9.970570924072985e-05,
	"loss": 0.7089,
	"step": 858
	},
	{
	"epoch": 1.0082159624413145,
	"grad_norm": 0.3811167776584625,
	"learning_rate": 9.958799293702178e-05,
	"loss": 0.6979,
	"step": 859
	},
	{
	"epoch": 1.0093896713615023,
	"grad_norm": 0.38177528977394104,
	"learning_rate": 9.947027663331372e-05,
	"loss": 0.7181,
	"step": 860
	},
	{
	"epoch": 1.0105633802816902,
	"grad_norm": 0.36225804686546326,
	"learning_rate": 9.935256032960567e-05,
	"loss": 0.6495,
	"step": 861
	},
	{
	"epoch": 1.011737089201878,
	"grad_norm": 0.3796376585960388,
	"learning_rate": 9.923484402589759e-05,
	"loss": 0.6661,
	"step": 862
	},
	{
	"epoch": 1.0129107981220657,
	"grad_norm": 0.3896029591560364,
	"learning_rate": 9.911712772218953e-05,
	"loss": 0.6705,
	"step": 863
	},
	{
	"epoch": 1.0140845070422535,
	"grad_norm": 0.35688912868499756,
	"learning_rate": 9.899941141848147e-05,
	"loss": 0.6835,
	"step": 864
	},
	{
	"epoch": 1.0152582159624413,
	"grad_norm": 0.3919657766819,
	"learning_rate": 9.88816951147734e-05,
	"loss": 0.6771,
	"step": 865
	},
	{
	"epoch": 1.016431924882629,
	"grad_norm": 0.390311062335968,
	"learning_rate": 9.876397881106534e-05,
	"loss": 0.7208,
	"step": 866
	},
	{
	"epoch": 1.017605633802817,
	"grad_norm": 0.3857402205467224,
	"learning_rate": 9.864626250735727e-05,
	"loss": 0.7321,
	"step": 867
	},
	{
	"epoch": 1.0187793427230047,
	"grad_norm": 0.3688738942146301,
	"learning_rate": 9.852854620364921e-05,
	"loss": 0.6853,
	"step": 868
	},
	{
	"epoch": 1.0199530516431925,
	"grad_norm": 0.3814820647239685,
	"learning_rate": 9.841082989994114e-05,
	"loss": 0.664,
	"step": 869
	},
	{
	"epoch": 1.0211267605633803,
	"grad_norm": 0.3849344253540039,
	"learning_rate": 9.829311359623309e-05,
	"loss": 0.6844,
	"step": 870
	},
	{
	"epoch": 1.022300469483568,
	"grad_norm": 0.36203038692474365,
	"learning_rate": 9.817539729252502e-05,
	"loss": 0.7201,
	"step": 871
	},
	{
	"epoch": 1.0234741784037558,
	"grad_norm": 0.36614471673965454,
	"learning_rate": 9.805768098881696e-05,
	"loss": 0.659,
	"step": 872
	},
	{
	"epoch": 1.0246478873239437,
	"grad_norm": 0.3908173143863678,
	"learning_rate": 9.79399646851089e-05,
	"loss": 0.6638,
	"step": 873
	},
	{
	"epoch": 1.0258215962441315,
	"grad_norm": 0.35966452956199646,
	"learning_rate": 9.782224838140083e-05,
	"loss": 0.7187,
	"step": 874
	},
	{
	"epoch": 1.0269953051643192,
	"grad_norm": 0.40878093242645264,
	"learning_rate": 9.770453207769276e-05,
	"loss": 0.691,
	"step": 875
	},
	{
	"epoch": 1.028169014084507,
	"grad_norm": 0.38903382420539856,
	"learning_rate": 9.75868157739847e-05,
	"loss": 0.718,
	"step": 876
	},
	{
	"epoch": 1.0293427230046948,
	"grad_norm": 0.3865324556827545,
	"learning_rate": 9.746909947027663e-05,
	"loss": 0.7331,
	"step": 877
	},
	{
	"epoch": 1.0305164319248827,
	"grad_norm": 0.37417513132095337,
	"learning_rate": 9.735138316656858e-05,
	"loss": 0.677,
	"step": 878
	},
	{
	"epoch": 1.0316901408450705,
	"grad_norm": 0.38043439388275146,
	"learning_rate": 9.72336668628605e-05,
	"loss": 0.6932,
	"step": 879
	},
	{
	"epoch": 1.0328638497652582,
	"grad_norm": 0.37418729066848755,
	"learning_rate": 9.711595055915245e-05,
	"loss": 0.7119,
	"step": 880
	},
	{
	"epoch": 1.034037558685446,
	"grad_norm": 0.4013047218322754,
	"learning_rate": 9.699823425544438e-05,
	"loss": 0.7041,
	"step": 881
	},
	{
	"epoch": 1.0352112676056338,
	"grad_norm": 0.38462570309638977,
	"learning_rate": 9.688051795173632e-05,
	"loss": 0.6861,
	"step": 882
	},
	{
	"epoch": 1.0363849765258215,
	"grad_norm": 0.3900148868560791,
	"learning_rate": 9.676280164802825e-05,
	"loss": 0.6382,
	"step": 883
	},
	{
	"epoch": 1.0375586854460095,
	"grad_norm": 0.3882652819156647,
	"learning_rate": 9.66450853443202e-05,
	"loss": 0.6948,
	"step": 884
	},
	{
	"epoch": 1.0387323943661972,
	"grad_norm": 0.36546608805656433,
	"learning_rate": 9.652736904061212e-05,
	"loss": 0.7064,
	"step": 885
	},
	{
	"epoch": 1.039906103286385,
	"grad_norm": 0.3788559138774872,
	"learning_rate": 9.640965273690407e-05,
	"loss": 0.7129,
	"step": 886
	},
	{
	"epoch": 1.0410798122065728,
	"grad_norm": 0.3979467451572418,
	"learning_rate": 9.6291936433196e-05,
	"loss": 0.7196,
	"step": 887
	},
	{
	"epoch": 1.0422535211267605,
	"grad_norm": 0.3777488172054291,
	"learning_rate": 9.617422012948794e-05,
	"loss": 0.6922,
	"step": 888
	},
	{
	"epoch": 1.0434272300469483,
	"grad_norm": 0.39730504155158997,
	"learning_rate": 9.605650382577987e-05,
	"loss": 0.6529,
	"step": 889
	},
	{
	"epoch": 1.0446009389671362,
	"grad_norm": 0.39619576930999756,
	"learning_rate": 9.593878752207182e-05,
	"loss": 0.6505,
	"step": 890
	},
	{
	"epoch": 1.045774647887324,
	"grad_norm": 0.3763888478279114,
	"learning_rate": 9.582107121836374e-05,
	"loss": 0.638,
	"step": 891
	},
	{
	"epoch": 1.0469483568075117,
	"grad_norm": 0.3947450518608093,
	"learning_rate": 9.570335491465569e-05,
	"loss": 0.7099,
	"step": 892
	},
	{
	"epoch": 1.0481220657276995,
	"grad_norm": 0.43239885568618774,
	"learning_rate": 9.558563861094763e-05,
	"loss": 0.7112,
	"step": 893
	},
	{
	"epoch": 1.0492957746478873,
	"grad_norm": 0.37725165486335754,
	"learning_rate": 9.546792230723956e-05,
	"loss": 0.6775,
	"step": 894
	},
	{
	"epoch": 1.050469483568075,
	"grad_norm": 0.3807140290737152,
	"learning_rate": 9.53502060035315e-05,
	"loss": 0.7201,
	"step": 895
	},
	{
	"epoch": 1.051643192488263,
	"grad_norm": 0.40270236134529114,
	"learning_rate": 9.523248969982343e-05,
	"loss": 0.6908,
	"step": 896
	},
	{
	"epoch": 1.0528169014084507,
	"grad_norm": 0.38907137513160706,
	"learning_rate": 9.511477339611536e-05,
	"loss": 0.7274,
	"step": 897
	},
	{
	"epoch": 1.0539906103286385,
	"grad_norm": 0.35074397921562195,
	"learning_rate": 9.49970570924073e-05,
	"loss": 0.6765,
	"step": 898
	},
	{
	"epoch": 1.0551643192488263,
	"grad_norm": 0.37548649311065674,
	"learning_rate": 9.487934078869925e-05,
	"loss": 0.7258,
	"step": 899
	},
	{
	"epoch": 1.056338028169014,
	"grad_norm": 0.3947518467903137,
	"learning_rate": 9.476162448499118e-05,
	"loss": 0.7142,
	"step": 900
	},
	{
	"epoch": 1.057511737089202,
	"grad_norm": 0.36888387799263,
	"learning_rate": 9.464390818128312e-05,
	"loss": 0.664,
	"step": 901
	},
	{
	"epoch": 1.0586854460093897,
	"grad_norm": 0.3735831379890442,
	"learning_rate": 9.452619187757505e-05,
	"loss": 0.6914,
	"step": 902
	},
	{
	"epoch": 1.0598591549295775,
	"grad_norm": 0.3840358257293701,
	"learning_rate": 9.440847557386698e-05,
	"loss": 0.663,
	"step": 903
	},
	{
	"epoch": 1.0610328638497653,
	"grad_norm": 0.408840537071228,
	"learning_rate": 9.429075927015892e-05,
	"loss": 0.7225,
	"step": 904
	},
	{
	"epoch": 1.062206572769953,
	"grad_norm": 0.36408165097236633,
	"learning_rate": 9.417304296645085e-05,
	"loss": 0.6744,
	"step": 905
	},
	{
	"epoch": 1.0633802816901408,
	"grad_norm": 0.4005196690559387,
	"learning_rate": 9.405532666274279e-05,
	"loss": 0.7285,
	"step": 906
	},
	{
	"epoch": 1.0645539906103287,
	"grad_norm": 0.3824830949306488,
	"learning_rate": 9.393761035903474e-05,
	"loss": 0.6978,
	"step": 907
	},
	{
	"epoch": 1.0657276995305165,
	"grad_norm": 0.38410818576812744,
	"learning_rate": 9.381989405532666e-05,
	"loss": 0.6725,
	"step": 908
	},
	{
	"epoch": 1.0669014084507042,
	"grad_norm": 0.37026217579841614,
	"learning_rate": 9.37021777516186e-05,
	"loss": 0.6908,
	"step": 909
	},
	{
	"epoch": 1.068075117370892,
	"grad_norm": 0.37652963399887085,
	"learning_rate": 9.358446144791054e-05,
	"loss": 0.6674,
	"step": 910
	},
	{
	"epoch": 1.0692488262910798,
	"grad_norm": 0.40584585070610046,
	"learning_rate": 9.346674514420247e-05,
	"loss": 0.7087,
	"step": 911
	},
	{
	"epoch": 1.0704225352112675,
	"grad_norm": 0.3777616620063782,
	"learning_rate": 9.334902884049441e-05,
	"loss": 0.6633,
	"step": 912
	},
	{
	"epoch": 1.0715962441314555,
	"grad_norm": 0.35584181547164917,
	"learning_rate": 9.323131253678636e-05,
	"loss": 0.6484,
	"step": 913
	},
	{
	"epoch": 1.0727699530516432,
	"grad_norm": 0.40920573472976685,
	"learning_rate": 9.311359623307828e-05,
	"loss": 0.6781,
	"step": 914
	},
	{
	"epoch": 1.073943661971831,
	"grad_norm": 0.37617766857147217,
	"learning_rate": 9.299587992937023e-05,
	"loss": 0.6785,
	"step": 915
	},
	{
	"epoch": 1.0751173708920188,
	"grad_norm": 0.36854755878448486,
	"learning_rate": 9.287816362566216e-05,
	"loss": 0.6805,
	"step": 916
	},
	{
	"epoch": 1.0762910798122065,
	"grad_norm": 0.3820021152496338,
	"learning_rate": 9.27604473219541e-05,
	"loss": 0.7413,
	"step": 917
	},
	{
	"epoch": 1.0774647887323943,
	"grad_norm": 0.3654205799102783,
	"learning_rate": 9.264273101824603e-05,
	"loss": 0.6996,
	"step": 918
	},
	{
	"epoch": 1.0786384976525822,
	"grad_norm": 0.36847448348999023,
	"learning_rate": 9.252501471453798e-05,
	"loss": 0.6593,
	"step": 919
	},
	{
	"epoch": 1.07981220657277,
	"grad_norm": 0.4072454571723938,
	"learning_rate": 9.24072984108299e-05,
	"loss": 0.7062,
	"step": 920
	},
	{
	"epoch": 1.0809859154929577,
	"grad_norm": 0.37201663851737976,
	"learning_rate": 9.228958210712185e-05,
	"loss": 0.7188,
	"step": 921
	},
	{
	"epoch": 1.0821596244131455,
	"grad_norm": 0.40708494186401367,
	"learning_rate": 9.217186580341378e-05,
	"loss": 0.6984,
	"step": 922
	},
	{
	"epoch": 1.0833333333333333,
	"grad_norm": 0.37668758630752563,
	"learning_rate": 9.205414949970572e-05,
	"loss": 0.6856,
	"step": 923
	},
	{
	"epoch": 1.084507042253521,
	"grad_norm": 0.41518712043762207,
	"learning_rate": 9.193643319599765e-05,
	"loss": 0.7093,
	"step": 924
	},
	{
	"epoch": 1.085680751173709,
	"grad_norm": 0.3661474883556366,
	"learning_rate": 9.181871689228958e-05,
	"loss": 0.6765,
	"step": 925
	},
	{
	"epoch": 1.0868544600938967,
	"grad_norm": 0.3910673260688782,
	"learning_rate": 9.170100058858152e-05,
	"loss": 0.6778,
	"step": 926
	},
	{
	"epoch": 1.0880281690140845,
	"grad_norm": 0.3851100206375122,
	"learning_rate": 9.158328428487345e-05,
	"loss": 0.7188,
	"step": 927
	},
	{
	"epoch": 1.0892018779342723,
	"grad_norm": 0.36254799365997314,
	"learning_rate": 9.14655679811654e-05,
	"loss": 0.7182,
	"step": 928
	},
	{
	"epoch": 1.09037558685446,
	"grad_norm": 0.39364567399024963,
	"learning_rate": 9.134785167745734e-05,
	"loss": 0.7208,
	"step": 929
	},
	{
	"epoch": 1.091549295774648,
	"grad_norm": 0.3755466639995575,
	"learning_rate": 9.123013537374927e-05,
	"loss": 0.6771,
	"step": 930
	},
	{
	"epoch": 1.0927230046948357,
	"grad_norm": 0.361087828874588,
	"learning_rate": 9.11124190700412e-05,
	"loss": 0.6541,
	"step": 931
	},
	{
	"epoch": 1.0938967136150235,
	"grad_norm": 0.37327754497528076,
	"learning_rate": 9.099470276633314e-05,
	"loss": 0.698,
	"step": 932
	},
	{
	"epoch": 1.0950704225352113,
	"grad_norm": 0.38413748145103455,
	"learning_rate": 9.087698646262507e-05,
	"loss": 0.6933,
	"step": 933
	},
	{
	"epoch": 1.096244131455399,
	"grad_norm": 0.4182147681713104,
	"learning_rate": 9.075927015891701e-05,
	"loss": 0.6776,
	"step": 934
	},
	{
	"epoch": 1.0974178403755868,
	"grad_norm": 0.3987724483013153,
	"learning_rate": 9.064155385520894e-05,
	"loss": 0.694,
	"step": 935
	},
	{
	"epoch": 1.0985915492957747,
	"grad_norm": 0.37629225850105286,
	"learning_rate": 9.052383755150089e-05,
	"loss": 0.6565,
	"step": 936
	},
	{
	"epoch": 1.0997652582159625,
	"grad_norm": 0.38973352313041687,
	"learning_rate": 9.040612124779281e-05,
	"loss": 0.6739,
	"step": 937
	},
	{
	"epoch": 1.1009389671361502,
	"grad_norm": 0.3845914900302887,
	"learning_rate": 9.028840494408476e-05,
	"loss": 0.6788,
	"step": 938
	},
	{
	"epoch": 1.102112676056338,
	"grad_norm": 0.3861023485660553,
	"learning_rate": 9.01706886403767e-05,
	"loss": 0.6763,
	"step": 939
	},
	{
	"epoch": 1.1032863849765258,
	"grad_norm": 0.37565183639526367,
	"learning_rate": 9.005297233666863e-05,
	"loss": 0.6478,
	"step": 940
	},
	{
	"epoch": 1.1044600938967135,
	"grad_norm": 0.4068315029144287,
	"learning_rate": 8.993525603296056e-05,
	"loss": 0.6752,
	"step": 941
	},
	{
	"epoch": 1.1056338028169015,
	"grad_norm": 0.37796974182128906,
	"learning_rate": 8.981753972925251e-05,
	"loss": 0.7355,
	"step": 942
	},
	{
	"epoch": 1.1068075117370892,
	"grad_norm": 0.4024117887020111,
	"learning_rate": 8.969982342554443e-05,
	"loss": 0.6648,
	"step": 943
	},
	{
	"epoch": 1.107981220657277,
	"grad_norm": 0.404442697763443,
	"learning_rate": 8.958210712183638e-05,
	"loss": 0.7,
	"step": 944
	},
	{
	"epoch": 1.1091549295774648,
	"grad_norm": 0.35948899388313293,
	"learning_rate": 8.946439081812832e-05,
	"loss": 0.6859,
	"step": 945
	},
	{
	"epoch": 1.1103286384976525,
	"grad_norm": 0.4014012813568115,
	"learning_rate": 8.934667451442025e-05,
	"loss": 0.7294,
	"step": 946
	},
	{
	"epoch": 1.1115023474178405,
	"grad_norm": 0.38261109590530396,
	"learning_rate": 8.922895821071219e-05,
	"loss": 0.6965,
	"step": 947
	},
	{
	"epoch": 1.1126760563380282,
	"grad_norm": 0.39297208189964294,
	"learning_rate": 8.911124190700413e-05,
	"loss": 0.7153,
	"step": 948
	},
	{
	"epoch": 1.113849765258216,
	"grad_norm": 0.3710176348686218,
	"learning_rate": 8.899352560329605e-05,
	"loss": 0.7085,
	"step": 949
	},
	{
	"epoch": 1.1150234741784038,
	"grad_norm": 0.3750080168247223,
	"learning_rate": 8.8875809299588e-05,
	"loss": 0.6739,
	"step": 950
	},
	{
	"epoch": 1.1161971830985915,
	"grad_norm": 0.3672105073928833,
	"learning_rate": 8.875809299587994e-05,
	"loss": 0.7097,
	"step": 951
	},
	{
	"epoch": 1.1173708920187793,
	"grad_norm": 0.3663265109062195,
	"learning_rate": 8.864037669217187e-05,
	"loss": 0.6594,
	"step": 952
	},
	{
	"epoch": 1.1185446009389672,
	"grad_norm": 0.4023442268371582,
	"learning_rate": 8.85226603884638e-05,
	"loss": 0.7186,
	"step": 953
	},
	{
	"epoch": 1.119718309859155,
	"grad_norm": 0.36602139472961426,
	"learning_rate": 8.840494408475574e-05,
	"loss": 0.67,
	"step": 954
	},
	{
	"epoch": 1.1208920187793427,
	"grad_norm": 0.36866381764411926,
	"learning_rate": 8.828722778104768e-05,
	"loss": 0.6954,
	"step": 955
	},
	{
	"epoch": 1.1220657276995305,
	"grad_norm": 0.38905832171440125,
	"learning_rate": 8.816951147733961e-05,
	"loss": 0.7214,
	"step": 956
	},
	{
	"epoch": 1.1232394366197183,
	"grad_norm": 0.3806670010089874,
	"learning_rate": 8.805179517363156e-05,
	"loss": 0.6679,
	"step": 957
	},
	{
	"epoch": 1.124413145539906,
	"grad_norm": 0.3796343505382538,
	"learning_rate": 8.793407886992349e-05,
	"loss": 0.6334,
	"step": 958
	},
	{
	"epoch": 1.125586854460094,
	"grad_norm": 0.4143288731575012,
	"learning_rate": 8.781636256621543e-05,
	"loss": 0.7484,
	"step": 959
	},
	{
	"epoch": 1.1267605633802817,
	"grad_norm": 0.3692832887172699,
	"learning_rate": 8.769864626250736e-05,
	"loss": 0.6581,
	"step": 960
	},
	{
	"epoch": 1.1279342723004695,
	"grad_norm": 0.39971667528152466,
	"learning_rate": 8.75809299587993e-05,
	"loss": 0.7252,
	"step": 961
	},
	{
	"epoch": 1.1291079812206573,
	"grad_norm": 0.391924113035202,
	"learning_rate": 8.746321365509123e-05,
	"loss": 0.673,
	"step": 962
	},
	{
	"epoch": 1.130281690140845,
	"grad_norm": 0.39626866579055786,
	"learning_rate": 8.734549735138317e-05,
	"loss": 0.7161,
	"step": 963
	},
	{
	"epoch": 1.131455399061033,
	"grad_norm": 0.3812800347805023,
	"learning_rate": 8.72277810476751e-05,
	"loss": 0.6735,
	"step": 964
	},
	{
	"epoch": 1.1326291079812207,
	"grad_norm": 0.36054447293281555,
	"learning_rate": 8.711006474396705e-05,
	"loss": 0.6861,
	"step": 965
	},
	{
	"epoch": 1.1338028169014085,
	"grad_norm": 0.41179588437080383,
	"learning_rate": 8.699234844025897e-05,
	"loss": 0.7151,
	"step": 966
	},
	{
	"epoch": 1.1349765258215962,
	"grad_norm": 0.3688051998615265,
	"learning_rate": 8.687463213655092e-05,
	"loss": 0.6608,
	"step": 967
	},
	{
	"epoch": 1.136150234741784,
	"grad_norm": 0.3877013325691223,
	"learning_rate": 8.675691583284285e-05,
	"loss": 0.6826,
	"step": 968
	},
	{
	"epoch": 1.1373239436619718,
	"grad_norm": 0.38986387848854065,
	"learning_rate": 8.663919952913479e-05,
	"loss": 0.6915,
	"step": 969
	},
	{
	"epoch": 1.1384976525821595,
	"grad_norm": 0.41986656188964844,
	"learning_rate": 8.652148322542672e-05,
	"loss": 0.7471,
	"step": 970
	},
	{
	"epoch": 1.1396713615023475,
	"grad_norm": 0.3977747857570648,
	"learning_rate": 8.640376692171867e-05,
	"loss": 0.6844,
	"step": 971
	},
	{
	"epoch": 1.1408450704225352,
	"grad_norm": 0.3956218361854553,
	"learning_rate": 8.628605061801059e-05,
	"loss": 0.6586,
	"step": 972
	},
	{
	"epoch": 1.142018779342723,
	"grad_norm": 0.3789028227329254,
	"learning_rate": 8.616833431430254e-05,
	"loss": 0.7415,
	"step": 973
	},
	{
	"epoch": 1.1431924882629108,
	"grad_norm": 0.3878764808177948,
	"learning_rate": 8.605061801059447e-05,
	"loss": 0.6559,
	"step": 974
	},
	{
	"epoch": 1.1443661971830985,
	"grad_norm": 0.37901559472084045,
	"learning_rate": 8.593290170688641e-05,
	"loss": 0.6685,
	"step": 975
	},
	{
	"epoch": 1.1455399061032865,
	"grad_norm": 0.40399041771888733,
	"learning_rate": 8.581518540317834e-05,
	"loss": 0.6602,
	"step": 976
	},
	{
	"epoch": 1.1467136150234742,
	"grad_norm": 0.38144391775131226,
	"learning_rate": 8.569746909947029e-05,
	"loss": 0.6683,
	"step": 977
	},
	{
	"epoch": 1.147887323943662,
	"grad_norm": 0.3610433042049408,
	"learning_rate": 8.557975279576221e-05,
	"loss": 0.6579,
	"step": 978
	},
	{
	"epoch": 1.1490610328638498,
	"grad_norm": 0.42147722840309143,
	"learning_rate": 8.546203649205416e-05,
	"loss": 0.6997,
	"step": 979
	},
	{
	"epoch": 1.1502347417840375,
	"grad_norm": 0.3799455761909485,
	"learning_rate": 8.53443201883461e-05,
	"loss": 0.7096,
	"step": 980
	},
	{
	"epoch": 1.1514084507042253,
	"grad_norm": 0.4173739552497864,
	"learning_rate": 8.522660388463803e-05,
	"loss": 0.6708,
	"step": 981
	},
	{
	"epoch": 1.1525821596244132,
	"grad_norm": 0.3997640013694763,
	"learning_rate": 8.510888758092996e-05,
	"loss": 0.6514,
	"step": 982
	},
	{
	"epoch": 1.153755868544601,
	"grad_norm": 0.3758656978607178,
	"learning_rate": 8.49911712772219e-05,
	"loss": 0.6442,
	"step": 983
	},
	{
	"epoch": 1.1549295774647887,
	"grad_norm": 0.37429675459861755,
	"learning_rate": 8.487345497351383e-05,
	"loss": 0.6619,
	"step": 984
	},
	{
	"epoch": 1.1561032863849765,
	"grad_norm": 0.3747265934944153,
	"learning_rate": 8.475573866980577e-05,
	"loss": 0.7107,
	"step": 985
	},
	{
	"epoch": 1.1572769953051643,
	"grad_norm": 0.37782514095306396,
	"learning_rate": 8.463802236609771e-05,
	"loss": 0.7241,
	"step": 986
	},
	{
	"epoch": 1.158450704225352,
	"grad_norm": 0.3703122138977051,
	"learning_rate": 8.452030606238965e-05,
	"loss": 0.6952,
	"step": 987
	},
	{
	"epoch": 1.15962441314554,
	"grad_norm": 0.37990477681159973,
	"learning_rate": 8.440258975868158e-05,
	"loss": 0.7364,
	"step": 988
	},
	{
	"epoch": 1.1607981220657277,
	"grad_norm": 0.42046844959259033,
	"learning_rate": 8.428487345497352e-05,
	"loss": 0.695,
	"step": 989
	},
	{
	"epoch": 1.1619718309859155,
	"grad_norm": 0.3745966851711273,
	"learning_rate": 8.416715715126545e-05,
	"loss": 0.6875,
	"step": 990
	},
	{
	"epoch": 1.1631455399061033,
	"grad_norm": 0.3496320843696594,
	"learning_rate": 8.404944084755739e-05,
	"loss": 0.6826,
	"step": 991
	},
	{
	"epoch": 1.164319248826291,
	"grad_norm": 0.39181873202323914,
	"learning_rate": 8.393172454384934e-05,
	"loss": 0.6937,
	"step": 992
	},
	{
	"epoch": 1.165492957746479,
	"grad_norm": 0.3910543620586395,
	"learning_rate": 8.381400824014126e-05,
	"loss": 0.749,
	"step": 993
	},
	{
	"epoch": 1.1666666666666667,
	"grad_norm": 0.3770748972892761,
	"learning_rate": 8.36962919364332e-05,
	"loss": 0.6743,
	"step": 994
	},
	{
	"epoch": 1.1678403755868545,
	"grad_norm": 0.3675018846988678,
	"learning_rate": 8.357857563272513e-05,
	"loss": 0.6499,
	"step": 995
	},
	{
	"epoch": 1.1690140845070423,
	"grad_norm": 0.36867639422416687,
	"learning_rate": 8.346085932901707e-05,
	"loss": 0.6642,
	"step": 996
	},
	{
	"epoch": 1.17018779342723,
	"grad_norm": 0.3860320746898651,
	"learning_rate": 8.334314302530901e-05,
	"loss": 0.6947,
	"step": 997
	},
	{
	"epoch": 1.1713615023474178,
	"grad_norm": 0.36680731177330017,
	"learning_rate": 8.322542672160094e-05,
	"loss": 0.7111,
	"step": 998
	},
	{
	"epoch": 1.1725352112676055,
	"grad_norm": 0.38997524976730347,
	"learning_rate": 8.310771041789288e-05,
	"loss": 0.6842,
	"step": 999
	},
	{
	"epoch": 1.1737089201877935,
	"grad_norm": 0.3883102834224701,
	"learning_rate": 8.298999411418483e-05,
	"loss": 0.6655,
	"step": 1000
	}
	],
	"logging_steps": 1,
	"max_steps": 1704,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 100,
	"total_flos": 1.4844237184940114e+18,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}