PEFT
TensorBoard
Safetensors
llama
alignment-handbook
Generated from Trainer
xu3kev's picture
Upload folder using huggingface_hub
844184f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.99812382739212,
"eval_steps": 500,
"global_step": 266,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00375234521575985,
"grad_norm": 1.1422045632346107,
"learning_rate": 2.5e-06,
"loss": 1.4765,
"step": 1
},
{
"epoch": 0.0075046904315197,
"grad_norm": 1.10606272348653,
"learning_rate": 5e-06,
"loss": 1.4679,
"step": 2
},
{
"epoch": 0.01125703564727955,
"grad_norm": 1.124285036588602,
"learning_rate": 7.5e-06,
"loss": 1.4926,
"step": 3
},
{
"epoch": 0.0150093808630394,
"grad_norm": 1.175650360755083,
"learning_rate": 1e-05,
"loss": 1.4946,
"step": 4
},
{
"epoch": 0.01876172607879925,
"grad_norm": 1.1176055565878193,
"learning_rate": 1.25e-05,
"loss": 1.4803,
"step": 5
},
{
"epoch": 0.0225140712945591,
"grad_norm": 1.080965163228283,
"learning_rate": 1.5e-05,
"loss": 1.4069,
"step": 6
},
{
"epoch": 0.02626641651031895,
"grad_norm": 1.073199125902437,
"learning_rate": 1.75e-05,
"loss": 1.4175,
"step": 7
},
{
"epoch": 0.0300187617260788,
"grad_norm": 0.9895651667655394,
"learning_rate": 2e-05,
"loss": 1.3952,
"step": 8
},
{
"epoch": 0.03377110694183865,
"grad_norm": 0.8484409515335725,
"learning_rate": 2.25e-05,
"loss": 1.3084,
"step": 9
},
{
"epoch": 0.0375234521575985,
"grad_norm": 0.656224580389129,
"learning_rate": 2.5e-05,
"loss": 1.2224,
"step": 10
},
{
"epoch": 0.04127579737335835,
"grad_norm": 0.6681802871972625,
"learning_rate": 2.7500000000000004e-05,
"loss": 1.2279,
"step": 11
},
{
"epoch": 0.0450281425891182,
"grad_norm": 0.6445930931164492,
"learning_rate": 3e-05,
"loss": 1.1869,
"step": 12
},
{
"epoch": 0.04878048780487805,
"grad_norm": 0.6774830464098534,
"learning_rate": 3.2500000000000004e-05,
"loss": 1.1345,
"step": 13
},
{
"epoch": 0.0525328330206379,
"grad_norm": 0.7129957171173121,
"learning_rate": 3.5e-05,
"loss": 1.0342,
"step": 14
},
{
"epoch": 0.05628517823639775,
"grad_norm": 0.6988046692034513,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.9683,
"step": 15
},
{
"epoch": 0.0600375234521576,
"grad_norm": 0.7305746200421179,
"learning_rate": 4e-05,
"loss": 0.8998,
"step": 16
},
{
"epoch": 0.06378986866791744,
"grad_norm": 0.6303366234907746,
"learning_rate": 4.25e-05,
"loss": 0.8585,
"step": 17
},
{
"epoch": 0.0675422138836773,
"grad_norm": 0.6262466336688131,
"learning_rate": 4.5e-05,
"loss": 0.7913,
"step": 18
},
{
"epoch": 0.07129455909943715,
"grad_norm": 0.5934168135285741,
"learning_rate": 4.75e-05,
"loss": 0.7358,
"step": 19
},
{
"epoch": 0.075046904315197,
"grad_norm": 0.5003901957180881,
"learning_rate": 5e-05,
"loss": 0.6762,
"step": 20
},
{
"epoch": 0.07879924953095685,
"grad_norm": 0.44247641980699626,
"learning_rate": 5.25e-05,
"loss": 0.6412,
"step": 21
},
{
"epoch": 0.0825515947467167,
"grad_norm": 0.33108999413889184,
"learning_rate": 5.500000000000001e-05,
"loss": 0.6021,
"step": 22
},
{
"epoch": 0.08630393996247655,
"grad_norm": 0.30987297699741684,
"learning_rate": 5.7499999999999995e-05,
"loss": 0.5678,
"step": 23
},
{
"epoch": 0.0900562851782364,
"grad_norm": 0.2879383883871797,
"learning_rate": 6e-05,
"loss": 0.5653,
"step": 24
},
{
"epoch": 0.09380863039399624,
"grad_norm": 0.4921785771111378,
"learning_rate": 6.25e-05,
"loss": 0.5397,
"step": 25
},
{
"epoch": 0.0975609756097561,
"grad_norm": 0.23455468567206647,
"learning_rate": 6.500000000000001e-05,
"loss": 0.5392,
"step": 26
},
{
"epoch": 0.10131332082551595,
"grad_norm": 0.21551936996375468,
"learning_rate": 6.750000000000001e-05,
"loss": 0.5423,
"step": 27
},
{
"epoch": 0.1050656660412758,
"grad_norm": 0.2138475404490417,
"learning_rate": 7e-05,
"loss": 0.5072,
"step": 28
},
{
"epoch": 0.10881801125703565,
"grad_norm": 0.1981260579789532,
"learning_rate": 7.25e-05,
"loss": 0.4927,
"step": 29
},
{
"epoch": 0.1125703564727955,
"grad_norm": 0.19766175304738637,
"learning_rate": 7.500000000000001e-05,
"loss": 0.4992,
"step": 30
},
{
"epoch": 0.11632270168855535,
"grad_norm": 0.16180823154197033,
"learning_rate": 7.75e-05,
"loss": 0.5078,
"step": 31
},
{
"epoch": 0.1200750469043152,
"grad_norm": 0.15792678361397225,
"learning_rate": 8e-05,
"loss": 0.4834,
"step": 32
},
{
"epoch": 0.12382739212007504,
"grad_norm": 0.17901823211719936,
"learning_rate": 8.25e-05,
"loss": 0.5038,
"step": 33
},
{
"epoch": 0.1275797373358349,
"grad_norm": 0.15291985686600748,
"learning_rate": 8.5e-05,
"loss": 0.463,
"step": 34
},
{
"epoch": 0.13133208255159476,
"grad_norm": 0.1402347205035838,
"learning_rate": 8.75e-05,
"loss": 0.4678,
"step": 35
},
{
"epoch": 0.1350844277673546,
"grad_norm": 0.1292157193781673,
"learning_rate": 9e-05,
"loss": 0.48,
"step": 36
},
{
"epoch": 0.13883677298311445,
"grad_norm": 0.12200374600393228,
"learning_rate": 9.250000000000001e-05,
"loss": 0.4678,
"step": 37
},
{
"epoch": 0.1425891181988743,
"grad_norm": 0.12645974836123272,
"learning_rate": 9.5e-05,
"loss": 0.4783,
"step": 38
},
{
"epoch": 0.14634146341463414,
"grad_norm": 0.12515993032794973,
"learning_rate": 9.75e-05,
"loss": 0.4558,
"step": 39
},
{
"epoch": 0.150093808630394,
"grad_norm": 0.1257915818218713,
"learning_rate": 0.0001,
"loss": 0.4582,
"step": 40
},
{
"epoch": 0.15384615384615385,
"grad_norm": 0.11519718216680118,
"learning_rate": 0.0001025,
"loss": 0.4433,
"step": 41
},
{
"epoch": 0.1575984990619137,
"grad_norm": 0.11408287464445384,
"learning_rate": 0.000105,
"loss": 0.4566,
"step": 42
},
{
"epoch": 0.16135084427767354,
"grad_norm": 0.11355997287120467,
"learning_rate": 0.0001075,
"loss": 0.4601,
"step": 43
},
{
"epoch": 0.1651031894934334,
"grad_norm": 0.1236061343834286,
"learning_rate": 0.00011000000000000002,
"loss": 0.4279,
"step": 44
},
{
"epoch": 0.16885553470919323,
"grad_norm": 0.11292335583297317,
"learning_rate": 0.00011250000000000001,
"loss": 0.4242,
"step": 45
},
{
"epoch": 0.1726078799249531,
"grad_norm": 0.10830414207227934,
"learning_rate": 0.00011499999999999999,
"loss": 0.4392,
"step": 46
},
{
"epoch": 0.17636022514071295,
"grad_norm": 0.1130446420034329,
"learning_rate": 0.00011750000000000001,
"loss": 0.4175,
"step": 47
},
{
"epoch": 0.1801125703564728,
"grad_norm": 0.10972733489410498,
"learning_rate": 0.00012,
"loss": 0.4064,
"step": 48
},
{
"epoch": 0.18386491557223264,
"grad_norm": 0.11723858927517143,
"learning_rate": 0.00012250000000000002,
"loss": 0.4618,
"step": 49
},
{
"epoch": 0.18761726078799248,
"grad_norm": 0.12979793592348535,
"learning_rate": 0.000125,
"loss": 0.4413,
"step": 50
},
{
"epoch": 0.19136960600375236,
"grad_norm": 0.12190484063649769,
"learning_rate": 0.0001275,
"loss": 0.4212,
"step": 51
},
{
"epoch": 0.1951219512195122,
"grad_norm": 0.1200977200253699,
"learning_rate": 0.00013000000000000002,
"loss": 0.4236,
"step": 52
},
{
"epoch": 0.19887429643527205,
"grad_norm": 0.11576799257930886,
"learning_rate": 0.0001325,
"loss": 0.4328,
"step": 53
},
{
"epoch": 0.2026266416510319,
"grad_norm": 0.11804398873031127,
"learning_rate": 0.00013500000000000003,
"loss": 0.3906,
"step": 54
},
{
"epoch": 0.20637898686679174,
"grad_norm": 0.11890529087801377,
"learning_rate": 0.0001375,
"loss": 0.4092,
"step": 55
},
{
"epoch": 0.2101313320825516,
"grad_norm": 0.11537178670561035,
"learning_rate": 0.00014,
"loss": 0.4026,
"step": 56
},
{
"epoch": 0.21388367729831145,
"grad_norm": 0.11591938376106178,
"learning_rate": 0.00014250000000000002,
"loss": 0.3678,
"step": 57
},
{
"epoch": 0.2176360225140713,
"grad_norm": 0.12025566814049414,
"learning_rate": 0.000145,
"loss": 0.3791,
"step": 58
},
{
"epoch": 0.22138836772983114,
"grad_norm": 0.13088656560108905,
"learning_rate": 0.0001475,
"loss": 0.3906,
"step": 59
},
{
"epoch": 0.225140712945591,
"grad_norm": 0.12366551138693345,
"learning_rate": 0.00015000000000000001,
"loss": 0.3769,
"step": 60
},
{
"epoch": 0.22889305816135083,
"grad_norm": 0.12338960635908504,
"learning_rate": 0.0001525,
"loss": 0.3806,
"step": 61
},
{
"epoch": 0.2326454033771107,
"grad_norm": 0.12957742480845902,
"learning_rate": 0.000155,
"loss": 0.365,
"step": 62
},
{
"epoch": 0.23639774859287055,
"grad_norm": 0.1282017025800552,
"learning_rate": 0.0001575,
"loss": 0.3637,
"step": 63
},
{
"epoch": 0.2401500938086304,
"grad_norm": 0.12685377163368308,
"learning_rate": 0.00016,
"loss": 0.3813,
"step": 64
},
{
"epoch": 0.24390243902439024,
"grad_norm": 0.12515445789228635,
"learning_rate": 0.00016250000000000002,
"loss": 0.3665,
"step": 65
},
{
"epoch": 0.24765478424015008,
"grad_norm": 0.12780302020094111,
"learning_rate": 0.000165,
"loss": 0.372,
"step": 66
},
{
"epoch": 0.25140712945590993,
"grad_norm": 0.13509915457231544,
"learning_rate": 0.0001675,
"loss": 0.3964,
"step": 67
},
{
"epoch": 0.2551594746716698,
"grad_norm": 0.1324450895974203,
"learning_rate": 0.00017,
"loss": 0.3809,
"step": 68
},
{
"epoch": 0.2589118198874296,
"grad_norm": 0.14039471561000108,
"learning_rate": 0.00017250000000000002,
"loss": 0.3788,
"step": 69
},
{
"epoch": 0.2626641651031895,
"grad_norm": 0.13748884493823293,
"learning_rate": 0.000175,
"loss": 0.3477,
"step": 70
},
{
"epoch": 0.26641651031894936,
"grad_norm": 0.12981102084999996,
"learning_rate": 0.0001775,
"loss": 0.3803,
"step": 71
},
{
"epoch": 0.2701688555347092,
"grad_norm": 0.12375391443012415,
"learning_rate": 0.00018,
"loss": 0.3557,
"step": 72
},
{
"epoch": 0.27392120075046905,
"grad_norm": 0.12792478465226367,
"learning_rate": 0.0001825,
"loss": 0.351,
"step": 73
},
{
"epoch": 0.2776735459662289,
"grad_norm": 0.1281934594676182,
"learning_rate": 0.00018500000000000002,
"loss": 0.3662,
"step": 74
},
{
"epoch": 0.28142589118198874,
"grad_norm": 0.13402822886419208,
"learning_rate": 0.0001875,
"loss": 0.3491,
"step": 75
},
{
"epoch": 0.2851782363977486,
"grad_norm": 0.1292536897601892,
"learning_rate": 0.00019,
"loss": 0.3415,
"step": 76
},
{
"epoch": 0.28893058161350843,
"grad_norm": 0.16014546584829106,
"learning_rate": 0.00019250000000000002,
"loss": 0.3493,
"step": 77
},
{
"epoch": 0.2926829268292683,
"grad_norm": 0.1393384528675237,
"learning_rate": 0.000195,
"loss": 0.3509,
"step": 78
},
{
"epoch": 0.2964352720450281,
"grad_norm": 0.15248843449290234,
"learning_rate": 0.00019750000000000003,
"loss": 0.3328,
"step": 79
},
{
"epoch": 0.300187617260788,
"grad_norm": 0.1478683373584156,
"learning_rate": 0.0002,
"loss": 0.339,
"step": 80
},
{
"epoch": 0.30393996247654786,
"grad_norm": 0.1457678828189889,
"learning_rate": 0.00019999904276147618,
"loss": 0.3536,
"step": 81
},
{
"epoch": 0.3076923076923077,
"grad_norm": 0.15185000879528737,
"learning_rate": 0.00019999617106423082,
"loss": 0.3529,
"step": 82
},
{
"epoch": 0.31144465290806755,
"grad_norm": 0.15201189365883755,
"learning_rate": 0.0001999913849632419,
"loss": 0.3548,
"step": 83
},
{
"epoch": 0.3151969981238274,
"grad_norm": 0.14879326753679958,
"learning_rate": 0.00019998468455013823,
"loss": 0.3264,
"step": 84
},
{
"epoch": 0.31894934333958724,
"grad_norm": 0.14083771591440533,
"learning_rate": 0.00019997606995319768,
"loss": 0.3331,
"step": 85
},
{
"epoch": 0.3227016885553471,
"grad_norm": 0.1503929432468549,
"learning_rate": 0.00019996554133734474,
"loss": 0.3282,
"step": 86
},
{
"epoch": 0.32645403377110693,
"grad_norm": 0.14030605779758232,
"learning_rate": 0.00019995309890414732,
"loss": 0.3216,
"step": 87
},
{
"epoch": 0.3302063789868668,
"grad_norm": 0.13891895714301467,
"learning_rate": 0.000199938742891813,
"loss": 0.3049,
"step": 88
},
{
"epoch": 0.3339587242026266,
"grad_norm": 0.13742909920708118,
"learning_rate": 0.00019992247357518428,
"loss": 0.3252,
"step": 89
},
{
"epoch": 0.33771106941838647,
"grad_norm": 0.14398237502236147,
"learning_rate": 0.0001999042912657335,
"loss": 0.3226,
"step": 90
},
{
"epoch": 0.34146341463414637,
"grad_norm": 0.14292774523614082,
"learning_rate": 0.00019988419631155683,
"loss": 0.3323,
"step": 91
},
{
"epoch": 0.3452157598499062,
"grad_norm": 0.14529808441186043,
"learning_rate": 0.00019986218909736757,
"loss": 0.3621,
"step": 92
},
{
"epoch": 0.34896810506566606,
"grad_norm": 0.14363660567228131,
"learning_rate": 0.00019983827004448873,
"loss": 0.3325,
"step": 93
},
{
"epoch": 0.3527204502814259,
"grad_norm": 0.14053215950288314,
"learning_rate": 0.00019981243961084515,
"loss": 0.3317,
"step": 94
},
{
"epoch": 0.35647279549718575,
"grad_norm": 0.12839662363868307,
"learning_rate": 0.0001997846982909545,
"loss": 0.3017,
"step": 95
},
{
"epoch": 0.3602251407129456,
"grad_norm": 0.1421301998134749,
"learning_rate": 0.000199755046615918,
"loss": 0.3236,
"step": 96
},
{
"epoch": 0.36397748592870544,
"grad_norm": 0.1475029420066679,
"learning_rate": 0.00019972348515341016,
"loss": 0.3362,
"step": 97
},
{
"epoch": 0.3677298311444653,
"grad_norm": 0.13378279730516257,
"learning_rate": 0.00019969001450766794,
"loss": 0.3254,
"step": 98
},
{
"epoch": 0.3714821763602251,
"grad_norm": 0.1497261207938794,
"learning_rate": 0.0001996546353194792,
"loss": 0.3156,
"step": 99
},
{
"epoch": 0.37523452157598497,
"grad_norm": 0.1356839966194173,
"learning_rate": 0.00019961734826617035,
"loss": 0.3282,
"step": 100
},
{
"epoch": 0.3789868667917448,
"grad_norm": 0.12386942577985954,
"learning_rate": 0.0001995781540615934,
"loss": 0.3207,
"step": 101
},
{
"epoch": 0.3827392120075047,
"grad_norm": 0.16584604505517364,
"learning_rate": 0.0001995370534561125,
"loss": 0.3026,
"step": 102
},
{
"epoch": 0.38649155722326456,
"grad_norm": 0.1277560294599099,
"learning_rate": 0.0001994940472365893,
"loss": 0.322,
"step": 103
},
{
"epoch": 0.3902439024390244,
"grad_norm": 0.13567813426924816,
"learning_rate": 0.00019944913622636795,
"loss": 0.3232,
"step": 104
},
{
"epoch": 0.39399624765478425,
"grad_norm": 0.12123496832228846,
"learning_rate": 0.0001994023212852595,
"loss": 0.2972,
"step": 105
},
{
"epoch": 0.3977485928705441,
"grad_norm": 0.13879373741004694,
"learning_rate": 0.00019935360330952518,
"loss": 0.3005,
"step": 106
},
{
"epoch": 0.40150093808630394,
"grad_norm": 0.1274679949876301,
"learning_rate": 0.00019930298323185945,
"loss": 0.3119,
"step": 107
},
{
"epoch": 0.4052532833020638,
"grad_norm": 0.13101222758435194,
"learning_rate": 0.00019925046202137216,
"loss": 0.2939,
"step": 108
},
{
"epoch": 0.4090056285178236,
"grad_norm": 0.12738472548497895,
"learning_rate": 0.00019919604068356978,
"loss": 0.3093,
"step": 109
},
{
"epoch": 0.41275797373358347,
"grad_norm": 0.1490015817444115,
"learning_rate": 0.00019913972026033632,
"loss": 0.2844,
"step": 110
},
{
"epoch": 0.4165103189493433,
"grad_norm": 0.1470790264142207,
"learning_rate": 0.00019908150182991339,
"loss": 0.2872,
"step": 111
},
{
"epoch": 0.4202626641651032,
"grad_norm": 0.12721396486874495,
"learning_rate": 0.00019902138650687942,
"loss": 0.3043,
"step": 112
},
{
"epoch": 0.42401500938086306,
"grad_norm": 0.13891744298891914,
"learning_rate": 0.00019895937544212858,
"loss": 0.3009,
"step": 113
},
{
"epoch": 0.4277673545966229,
"grad_norm": 0.134346074178801,
"learning_rate": 0.00019889546982284834,
"loss": 0.3013,
"step": 114
},
{
"epoch": 0.43151969981238275,
"grad_norm": 0.1379066741076229,
"learning_rate": 0.00019882967087249718,
"loss": 0.3052,
"step": 115
},
{
"epoch": 0.4352720450281426,
"grad_norm": 0.12972548899740632,
"learning_rate": 0.0001987619798507809,
"loss": 0.3124,
"step": 116
},
{
"epoch": 0.43902439024390244,
"grad_norm": 0.12813310196115213,
"learning_rate": 0.0001986923980536286,
"loss": 0.2893,
"step": 117
},
{
"epoch": 0.4427767354596623,
"grad_norm": 0.13797054317394944,
"learning_rate": 0.00019862092681316776,
"loss": 0.3016,
"step": 118
},
{
"epoch": 0.44652908067542213,
"grad_norm": 0.13780600670778337,
"learning_rate": 0.0001985475674976989,
"loss": 0.3158,
"step": 119
},
{
"epoch": 0.450281425891182,
"grad_norm": 0.13926178383999727,
"learning_rate": 0.0001984723215116693,
"loss": 0.2801,
"step": 120
},
{
"epoch": 0.4540337711069418,
"grad_norm": 0.1369353496922525,
"learning_rate": 0.00019839519029564605,
"loss": 0.305,
"step": 121
},
{
"epoch": 0.45778611632270166,
"grad_norm": 0.13937382639705567,
"learning_rate": 0.00019831617532628862,
"loss": 0.3176,
"step": 122
},
{
"epoch": 0.46153846153846156,
"grad_norm": 0.14086276027188518,
"learning_rate": 0.00019823527811632042,
"loss": 0.2879,
"step": 123
},
{
"epoch": 0.4652908067542214,
"grad_norm": 0.13282215800163436,
"learning_rate": 0.00019815250021449997,
"loss": 0.2996,
"step": 124
},
{
"epoch": 0.46904315196998125,
"grad_norm": 0.12757163326850707,
"learning_rate": 0.00019806784320559127,
"loss": 0.3006,
"step": 125
},
{
"epoch": 0.4727954971857411,
"grad_norm": 0.14854709123219104,
"learning_rate": 0.00019798130871033322,
"loss": 0.301,
"step": 126
},
{
"epoch": 0.47654784240150094,
"grad_norm": 0.13087500973091548,
"learning_rate": 0.00019789289838540897,
"loss": 0.2902,
"step": 127
},
{
"epoch": 0.4803001876172608,
"grad_norm": 0.1433475392806627,
"learning_rate": 0.00019780261392341383,
"loss": 0.2926,
"step": 128
},
{
"epoch": 0.48405253283302063,
"grad_norm": 0.1341283559656879,
"learning_rate": 0.0001977104570528231,
"loss": 0.2602,
"step": 129
},
{
"epoch": 0.4878048780487805,
"grad_norm": 0.1607197394251248,
"learning_rate": 0.00019761642953795895,
"loss": 0.2984,
"step": 130
},
{
"epoch": 0.4915572232645403,
"grad_norm": 0.11856150621760517,
"learning_rate": 0.0001975205331789566,
"loss": 0.2988,
"step": 131
},
{
"epoch": 0.49530956848030017,
"grad_norm": 0.14014139613661877,
"learning_rate": 0.00019742276981172976,
"loss": 0.291,
"step": 132
},
{
"epoch": 0.49906191369606,
"grad_norm": 0.12881861735846314,
"learning_rate": 0.00019732314130793568,
"loss": 0.2971,
"step": 133
},
{
"epoch": 0.5028142589118199,
"grad_norm": 0.11788683351931176,
"learning_rate": 0.00019722164957493922,
"loss": 0.2766,
"step": 134
},
{
"epoch": 0.5065666041275797,
"grad_norm": 0.13746078706666037,
"learning_rate": 0.0001971182965557763,
"loss": 0.2886,
"step": 135
},
{
"epoch": 0.5103189493433395,
"grad_norm": 0.12745519285890888,
"learning_rate": 0.00019701308422911672,
"loss": 0.2963,
"step": 136
},
{
"epoch": 0.5140712945590994,
"grad_norm": 0.11835270726835292,
"learning_rate": 0.0001969060146092264,
"loss": 0.2995,
"step": 137
},
{
"epoch": 0.5178236397748592,
"grad_norm": 0.14011034379489426,
"learning_rate": 0.0001967970897459286,
"loss": 0.2881,
"step": 138
},
{
"epoch": 0.5215759849906192,
"grad_norm": 0.13060776440495228,
"learning_rate": 0.0001966863117245648,
"loss": 0.2765,
"step": 139
},
{
"epoch": 0.525328330206379,
"grad_norm": 0.14161693580554588,
"learning_rate": 0.00019657368266595476,
"loss": 0.281,
"step": 140
},
{
"epoch": 0.5290806754221389,
"grad_norm": 0.12125364150709082,
"learning_rate": 0.00019645920472635608,
"loss": 0.2732,
"step": 141
},
{
"epoch": 0.5328330206378987,
"grad_norm": 0.1334127552945295,
"learning_rate": 0.00019634288009742255,
"loss": 0.2523,
"step": 142
},
{
"epoch": 0.5365853658536586,
"grad_norm": 0.12113573146827264,
"learning_rate": 0.0001962247110061625,
"loss": 0.2775,
"step": 143
},
{
"epoch": 0.5403377110694184,
"grad_norm": 0.12331032028922699,
"learning_rate": 0.00019610469971489608,
"loss": 0.2687,
"step": 144
},
{
"epoch": 0.5440900562851783,
"grad_norm": 0.13237586077608754,
"learning_rate": 0.00019598284852121188,
"loss": 0.2774,
"step": 145
},
{
"epoch": 0.5478424015009381,
"grad_norm": 0.12199880756983131,
"learning_rate": 0.0001958591597579231,
"loss": 0.2815,
"step": 146
},
{
"epoch": 0.551594746716698,
"grad_norm": 0.11915746795874955,
"learning_rate": 0.00019573363579302266,
"loss": 0.2558,
"step": 147
},
{
"epoch": 0.5553470919324578,
"grad_norm": 0.11644382804351376,
"learning_rate": 0.00019560627902963807,
"loss": 0.2951,
"step": 148
},
{
"epoch": 0.5590994371482176,
"grad_norm": 0.1317161794959933,
"learning_rate": 0.00019547709190598534,
"loss": 0.2629,
"step": 149
},
{
"epoch": 0.5628517823639775,
"grad_norm": 0.13859313218362884,
"learning_rate": 0.00019534607689532233,
"loss": 0.2884,
"step": 150
},
{
"epoch": 0.5666041275797373,
"grad_norm": 0.1643061756146766,
"learning_rate": 0.00019521323650590133,
"loss": 0.2932,
"step": 151
},
{
"epoch": 0.5703564727954972,
"grad_norm": 0.12366306539172685,
"learning_rate": 0.00019507857328092108,
"loss": 0.2861,
"step": 152
},
{
"epoch": 0.574108818011257,
"grad_norm": 0.12624207186548378,
"learning_rate": 0.00019494208979847812,
"loss": 0.2796,
"step": 153
},
{
"epoch": 0.5778611632270169,
"grad_norm": 0.12237336350000451,
"learning_rate": 0.00019480378867151746,
"loss": 0.273,
"step": 154
},
{
"epoch": 0.5816135084427767,
"grad_norm": 0.12323433685041912,
"learning_rate": 0.00019466367254778233,
"loss": 0.2747,
"step": 155
},
{
"epoch": 0.5853658536585366,
"grad_norm": 0.12577598956544817,
"learning_rate": 0.0001945217441097638,
"loss": 0.2634,
"step": 156
},
{
"epoch": 0.5891181988742964,
"grad_norm": 0.12244570380339517,
"learning_rate": 0.00019437800607464932,
"loss": 0.2701,
"step": 157
},
{
"epoch": 0.5928705440900562,
"grad_norm": 0.12004670825182381,
"learning_rate": 0.00019423246119427043,
"loss": 0.2781,
"step": 158
},
{
"epoch": 0.5966228893058161,
"grad_norm": 0.13091796767694497,
"learning_rate": 0.00019408511225505056,
"loss": 0.2646,
"step": 159
},
{
"epoch": 0.600375234521576,
"grad_norm": 0.11771920694416416,
"learning_rate": 0.00019393596207795136,
"loss": 0.2795,
"step": 160
},
{
"epoch": 0.6041275797373359,
"grad_norm": 0.12447218651645564,
"learning_rate": 0.00019378501351841865,
"loss": 0.2767,
"step": 161
},
{
"epoch": 0.6078799249530957,
"grad_norm": 0.11854916742534294,
"learning_rate": 0.000193632269466328,
"loss": 0.2595,
"step": 162
},
{
"epoch": 0.6116322701688556,
"grad_norm": 0.11517649062994549,
"learning_rate": 0.0001934777328459292,
"loss": 0.2611,
"step": 163
},
{
"epoch": 0.6153846153846154,
"grad_norm": 0.12291906434338017,
"learning_rate": 0.00019332140661579042,
"loss": 0.2569,
"step": 164
},
{
"epoch": 0.6191369606003753,
"grad_norm": 0.12768661337225065,
"learning_rate": 0.00019316329376874145,
"loss": 0.2802,
"step": 165
},
{
"epoch": 0.6228893058161351,
"grad_norm": 0.12224468589372722,
"learning_rate": 0.00019300339733181642,
"loss": 0.2742,
"step": 166
},
{
"epoch": 0.626641651031895,
"grad_norm": 0.11873375913983374,
"learning_rate": 0.00019284172036619594,
"loss": 0.2496,
"step": 167
},
{
"epoch": 0.6303939962476548,
"grad_norm": 0.1094029489278503,
"learning_rate": 0.0001926782659671484,
"loss": 0.2834,
"step": 168
},
{
"epoch": 0.6341463414634146,
"grad_norm": 0.11667364916992014,
"learning_rate": 0.00019251303726397078,
"loss": 0.2749,
"step": 169
},
{
"epoch": 0.6378986866791745,
"grad_norm": 0.10721206701910313,
"learning_rate": 0.00019234603741992862,
"loss": 0.2833,
"step": 170
},
{
"epoch": 0.6416510318949343,
"grad_norm": 0.11114975628124507,
"learning_rate": 0.00019217726963219567,
"loss": 0.2412,
"step": 171
},
{
"epoch": 0.6454033771106942,
"grad_norm": 0.11052789377191914,
"learning_rate": 0.00019200673713179245,
"loss": 0.2629,
"step": 172
},
{
"epoch": 0.649155722326454,
"grad_norm": 0.1254877320751365,
"learning_rate": 0.00019183444318352457,
"loss": 0.2676,
"step": 173
},
{
"epoch": 0.6529080675422139,
"grad_norm": 0.11436464042758997,
"learning_rate": 0.0001916603910859201,
"loss": 0.2786,
"step": 174
},
{
"epoch": 0.6566604127579737,
"grad_norm": 0.12040982753537727,
"learning_rate": 0.00019148458417116645,
"loss": 0.255,
"step": 175
},
{
"epoch": 0.6604127579737336,
"grad_norm": 0.1215472428194096,
"learning_rate": 0.00019130702580504676,
"loss": 0.2933,
"step": 176
},
{
"epoch": 0.6641651031894934,
"grad_norm": 0.11127574852727158,
"learning_rate": 0.0001911277193868751,
"loss": 0.2638,
"step": 177
},
{
"epoch": 0.6679174484052532,
"grad_norm": 0.11297276732299613,
"learning_rate": 0.00019094666834943179,
"loss": 0.2553,
"step": 178
},
{
"epoch": 0.6716697936210131,
"grad_norm": 0.11230362581933455,
"learning_rate": 0.00019076387615889727,
"loss": 0.2656,
"step": 179
},
{
"epoch": 0.6754221388367729,
"grad_norm": 0.11339982024848368,
"learning_rate": 0.00019057934631478617,
"loss": 0.2608,
"step": 180
},
{
"epoch": 0.6791744840525328,
"grad_norm": 0.1157018708653507,
"learning_rate": 0.00019039308234987992,
"loss": 0.2661,
"step": 181
},
{
"epoch": 0.6829268292682927,
"grad_norm": 0.12120354653706046,
"learning_rate": 0.00019020508783015942,
"loss": 0.2655,
"step": 182
},
{
"epoch": 0.6866791744840526,
"grad_norm": 0.11650498536100079,
"learning_rate": 0.00019001536635473664,
"loss": 0.2617,
"step": 183
},
{
"epoch": 0.6904315196998124,
"grad_norm": 0.11284326019455035,
"learning_rate": 0.0001898239215557856,
"loss": 0.2604,
"step": 184
},
{
"epoch": 0.6941838649155723,
"grad_norm": 0.11137366023131207,
"learning_rate": 0.0001896307570984731,
"loss": 0.2695,
"step": 185
},
{
"epoch": 0.6979362101313321,
"grad_norm": 0.10909150712308537,
"learning_rate": 0.00018943587668088832,
"loss": 0.261,
"step": 186
},
{
"epoch": 0.701688555347092,
"grad_norm": 0.11533104627662898,
"learning_rate": 0.00018923928403397208,
"loss": 0.2662,
"step": 187
},
{
"epoch": 0.7054409005628518,
"grad_norm": 0.11085301527387796,
"learning_rate": 0.00018904098292144554,
"loss": 0.26,
"step": 188
},
{
"epoch": 0.7091932457786116,
"grad_norm": 0.1040125545017247,
"learning_rate": 0.00018884097713973798,
"loss": 0.2641,
"step": 189
},
{
"epoch": 0.7129455909943715,
"grad_norm": 0.10775777270108124,
"learning_rate": 0.00018863927051791416,
"loss": 0.2553,
"step": 190
},
{
"epoch": 0.7166979362101313,
"grad_norm": 0.11556746781951048,
"learning_rate": 0.00018843586691760108,
"loss": 0.2817,
"step": 191
},
{
"epoch": 0.7204502814258912,
"grad_norm": 0.11370972134361729,
"learning_rate": 0.00018823077023291397,
"loss": 0.2715,
"step": 192
},
{
"epoch": 0.724202626641651,
"grad_norm": 0.10785721109445355,
"learning_rate": 0.00018802398439038176,
"loss": 0.2604,
"step": 193
},
{
"epoch": 0.7279549718574109,
"grad_norm": 0.10825278350141479,
"learning_rate": 0.00018781551334887201,
"loss": 0.2498,
"step": 194
},
{
"epoch": 0.7317073170731707,
"grad_norm": 0.09965163182891702,
"learning_rate": 0.0001876053610995149,
"loss": 0.2504,
"step": 195
},
{
"epoch": 0.7354596622889306,
"grad_norm": 0.1026489808604617,
"learning_rate": 0.000187393531665627,
"loss": 0.2587,
"step": 196
},
{
"epoch": 0.7392120075046904,
"grad_norm": 0.10399821510438714,
"learning_rate": 0.00018718002910263426,
"loss": 0.273,
"step": 197
},
{
"epoch": 0.7429643527204502,
"grad_norm": 0.10994775687961979,
"learning_rate": 0.0001869648574979942,
"loss": 0.2659,
"step": 198
},
{
"epoch": 0.7467166979362101,
"grad_norm": 0.10593465784705908,
"learning_rate": 0.00018674802097111784,
"loss": 0.26,
"step": 199
},
{
"epoch": 0.7504690431519699,
"grad_norm": 0.11280493763136354,
"learning_rate": 0.0001865295236732907,
"loss": 0.2677,
"step": 200
},
{
"epoch": 0.7542213883677298,
"grad_norm": 0.10536591132251391,
"learning_rate": 0.00018630936978759338,
"loss": 0.2513,
"step": 201
},
{
"epoch": 0.7579737335834896,
"grad_norm": 0.10796354732338231,
"learning_rate": 0.00018608756352882152,
"loss": 0.2757,
"step": 202
},
{
"epoch": 0.7617260787992496,
"grad_norm": 0.10552783825603758,
"learning_rate": 0.00018586410914340497,
"loss": 0.2552,
"step": 203
},
{
"epoch": 0.7654784240150094,
"grad_norm": 0.10937928150050989,
"learning_rate": 0.00018563901090932672,
"loss": 0.2675,
"step": 204
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.11537632950908651,
"learning_rate": 0.00018541227313604078,
"loss": 0.2402,
"step": 205
},
{
"epoch": 0.7729831144465291,
"grad_norm": 0.11524821367403956,
"learning_rate": 0.0001851839001643898,
"loss": 0.2628,
"step": 206
},
{
"epoch": 0.776735459662289,
"grad_norm": 0.10266098148088061,
"learning_rate": 0.00018495389636652185,
"loss": 0.2484,
"step": 207
},
{
"epoch": 0.7804878048780488,
"grad_norm": 0.10807777719284456,
"learning_rate": 0.0001847222661458069,
"loss": 0.2648,
"step": 208
},
{
"epoch": 0.7842401500938087,
"grad_norm": 0.10744597380010515,
"learning_rate": 0.00018448901393675233,
"loss": 0.2575,
"step": 209
},
{
"epoch": 0.7879924953095685,
"grad_norm": 0.10942201726245399,
"learning_rate": 0.00018425414420491815,
"loss": 0.266,
"step": 210
},
{
"epoch": 0.7917448405253283,
"grad_norm": 0.10660876081865972,
"learning_rate": 0.00018401766144683147,
"loss": 0.2438,
"step": 211
},
{
"epoch": 0.7954971857410882,
"grad_norm": 0.11694393967537217,
"learning_rate": 0.0001837795701899004,
"loss": 0.2787,
"step": 212
},
{
"epoch": 0.799249530956848,
"grad_norm": 0.11981272200535166,
"learning_rate": 0.00018353987499232746,
"loss": 0.264,
"step": 213
},
{
"epoch": 0.8030018761726079,
"grad_norm": 0.10661350248202765,
"learning_rate": 0.00018329858044302213,
"loss": 0.2467,
"step": 214
},
{
"epoch": 0.8067542213883677,
"grad_norm": 0.10372037225439175,
"learning_rate": 0.0001830556911615132,
"loss": 0.2718,
"step": 215
},
{
"epoch": 0.8105065666041276,
"grad_norm": 0.10573394846211595,
"learning_rate": 0.00018281121179786024,
"loss": 0.2414,
"step": 216
},
{
"epoch": 0.8142589118198874,
"grad_norm": 0.10765219346551154,
"learning_rate": 0.0001825651470325645,
"loss": 0.2516,
"step": 217
},
{
"epoch": 0.8180112570356473,
"grad_norm": 0.09961054466797757,
"learning_rate": 0.0001823175015764795,
"loss": 0.2337,
"step": 218
},
{
"epoch": 0.8217636022514071,
"grad_norm": 0.10573680507484315,
"learning_rate": 0.00018206828017072057,
"loss": 0.2443,
"step": 219
},
{
"epoch": 0.8255159474671669,
"grad_norm": 0.10617911818381037,
"learning_rate": 0.00018181748758657438,
"loss": 0.2409,
"step": 220
},
{
"epoch": 0.8292682926829268,
"grad_norm": 0.10190011860666479,
"learning_rate": 0.0001815651286254074,
"loss": 0.2699,
"step": 221
},
{
"epoch": 0.8330206378986866,
"grad_norm": 0.10217498312134918,
"learning_rate": 0.000181311208118574,
"loss": 0.261,
"step": 222
},
{
"epoch": 0.8367729831144465,
"grad_norm": 0.10290805625127751,
"learning_rate": 0.000181055730927324,
"loss": 0.2544,
"step": 223
},
{
"epoch": 0.8405253283302064,
"grad_norm": 0.10273441373621256,
"learning_rate": 0.00018079870194270958,
"loss": 0.2394,
"step": 224
},
{
"epoch": 0.8442776735459663,
"grad_norm": 0.09880435844395785,
"learning_rate": 0.00018054012608549166,
"loss": 0.263,
"step": 225
},
{
"epoch": 0.8480300187617261,
"grad_norm": 0.10357276059735837,
"learning_rate": 0.0001802800083060457,
"loss": 0.2853,
"step": 226
},
{
"epoch": 0.851782363977486,
"grad_norm": 0.10804308023574893,
"learning_rate": 0.00018001835358426687,
"loss": 0.2595,
"step": 227
},
{
"epoch": 0.8555347091932458,
"grad_norm": 0.09776326620940605,
"learning_rate": 0.00017975516692947475,
"loss": 0.253,
"step": 228
},
{
"epoch": 0.8592870544090057,
"grad_norm": 0.0995125991589646,
"learning_rate": 0.00017949045338031745,
"loss": 0.2536,
"step": 229
},
{
"epoch": 0.8630393996247655,
"grad_norm": 0.10281461790899643,
"learning_rate": 0.00017922421800467512,
"loss": 0.2592,
"step": 230
},
{
"epoch": 0.8667917448405253,
"grad_norm": 0.11374858278223317,
"learning_rate": 0.0001789564658995629,
"loss": 0.2694,
"step": 231
},
{
"epoch": 0.8705440900562852,
"grad_norm": 0.10048956101218906,
"learning_rate": 0.00017868720219103344,
"loss": 0.2563,
"step": 232
},
{
"epoch": 0.874296435272045,
"grad_norm": 0.11978050473597157,
"learning_rate": 0.00017841643203407852,
"loss": 0.2671,
"step": 233
},
{
"epoch": 0.8780487804878049,
"grad_norm": 0.1022948197426214,
"learning_rate": 0.00017814416061253077,
"loss": 0.2442,
"step": 234
},
{
"epoch": 0.8818011257035647,
"grad_norm": 0.10648409702487768,
"learning_rate": 0.000177870393138964,
"loss": 0.2172,
"step": 235
},
{
"epoch": 0.8855534709193246,
"grad_norm": 0.09682467776295996,
"learning_rate": 0.00017759513485459367,
"loss": 0.2503,
"step": 236
},
{
"epoch": 0.8893058161350844,
"grad_norm": 0.10093582432576866,
"learning_rate": 0.00017731839102917644,
"loss": 0.2526,
"step": 237
},
{
"epoch": 0.8930581613508443,
"grad_norm": 0.10283968277186326,
"learning_rate": 0.00017704016696090937,
"loss": 0.2467,
"step": 238
},
{
"epoch": 0.8968105065666041,
"grad_norm": 0.1016691703162235,
"learning_rate": 0.00017676046797632835,
"loss": 0.2458,
"step": 239
},
{
"epoch": 0.900562851782364,
"grad_norm": 0.09871178549145665,
"learning_rate": 0.00017647929943020625,
"loss": 0.2387,
"step": 240
},
{
"epoch": 0.9043151969981238,
"grad_norm": 0.11005062968397657,
"learning_rate": 0.00017619666670545033,
"loss": 0.2485,
"step": 241
},
{
"epoch": 0.9080675422138836,
"grad_norm": 0.10636010374538316,
"learning_rate": 0.00017591257521299932,
"loss": 0.2344,
"step": 242
},
{
"epoch": 0.9118198874296435,
"grad_norm": 0.10269265934208162,
"learning_rate": 0.00017562703039171955,
"loss": 0.2449,
"step": 243
},
{
"epoch": 0.9155722326454033,
"grad_norm": 0.1123496871025115,
"learning_rate": 0.0001753400377083011,
"loss": 0.2472,
"step": 244
},
{
"epoch": 0.9193245778611632,
"grad_norm": 0.10731321325088286,
"learning_rate": 0.00017505160265715304,
"loss": 0.2257,
"step": 245
},
{
"epoch": 0.9230769230769231,
"grad_norm": 0.10122280465712044,
"learning_rate": 0.0001747617307602982,
"loss": 0.2673,
"step": 246
},
{
"epoch": 0.926829268292683,
"grad_norm": 0.10287633377626088,
"learning_rate": 0.00017447042756726754,
"loss": 0.2623,
"step": 247
},
{
"epoch": 0.9305816135084428,
"grad_norm": 0.11180813962431274,
"learning_rate": 0.0001741776986549938,
"loss": 0.2588,
"step": 248
},
{
"epoch": 0.9343339587242027,
"grad_norm": 0.10342918680770019,
"learning_rate": 0.00017388354962770487,
"loss": 0.2365,
"step": 249
},
{
"epoch": 0.9380863039399625,
"grad_norm": 0.10248241650027715,
"learning_rate": 0.0001735879861168163,
"loss": 0.2453,
"step": 250
},
{
"epoch": 0.9418386491557224,
"grad_norm": 0.11730400265701718,
"learning_rate": 0.00017329101378082374,
"loss": 0.2486,
"step": 251
},
{
"epoch": 0.9455909943714822,
"grad_norm": 0.09685186553299667,
"learning_rate": 0.0001729926383051943,
"loss": 0.2572,
"step": 252
},
{
"epoch": 0.949343339587242,
"grad_norm": 0.12090818479499119,
"learning_rate": 0.00017269286540225805,
"loss": 0.2248,
"step": 253
},
{
"epoch": 0.9530956848030019,
"grad_norm": 0.10260399450357141,
"learning_rate": 0.0001723917008110984,
"loss": 0.2527,
"step": 254
},
{
"epoch": 0.9568480300187617,
"grad_norm": 0.10114612523395812,
"learning_rate": 0.0001720891502974423,
"loss": 0.2602,
"step": 255
},
{
"epoch": 0.9606003752345216,
"grad_norm": 0.11613810011247953,
"learning_rate": 0.00017178521965354992,
"loss": 0.2535,
"step": 256
},
{
"epoch": 0.9643527204502814,
"grad_norm": 0.10548781228478918,
"learning_rate": 0.00017147991469810368,
"loss": 0.2616,
"step": 257
},
{
"epoch": 0.9681050656660413,
"grad_norm": 0.10337010169414873,
"learning_rate": 0.00017117324127609686,
"loss": 0.2506,
"step": 258
},
{
"epoch": 0.9718574108818011,
"grad_norm": 0.1022753450493229,
"learning_rate": 0.00017086520525872172,
"loss": 0.2536,
"step": 259
},
{
"epoch": 0.975609756097561,
"grad_norm": 0.10274802198295474,
"learning_rate": 0.00017055581254325715,
"loss": 0.2444,
"step": 260
},
{
"epoch": 0.9793621013133208,
"grad_norm": 0.10073944882387982,
"learning_rate": 0.00017024506905295565,
"loss": 0.2583,
"step": 261
},
{
"epoch": 0.9831144465290806,
"grad_norm": 0.10220040335882648,
"learning_rate": 0.00016993298073693003,
"loss": 0.2431,
"step": 262
},
{
"epoch": 0.9868667917448405,
"grad_norm": 0.1060948209024435,
"learning_rate": 0.00016961955357003947,
"loss": 0.262,
"step": 263
},
{
"epoch": 0.9906191369606003,
"grad_norm": 0.10004277645336798,
"learning_rate": 0.0001693047935527751,
"loss": 0.234,
"step": 264
},
{
"epoch": 0.9943714821763602,
"grad_norm": 0.1000376814502259,
"learning_rate": 0.00016898870671114527,
"loss": 0.2566,
"step": 265
},
{
"epoch": 0.99812382739212,
"grad_norm": 0.09911659249018077,
"learning_rate": 0.00016867129909655998,
"loss": 0.2657,
"step": 266
},
{
"epoch": 0.99812382739212,
"eval_loss": 0.25076788663864136,
"eval_runtime": 54.8199,
"eval_samples_per_second": 32.725,
"eval_steps_per_second": 1.04,
"step": 266
}
],
"logging_steps": 1,
"max_steps": 798,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.692263947344282e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}