gemma-2-9b-it-bnb-4bit-lora-finetune / trainer_state.json

End of training

1cfb1e0 verified 6 months ago

20.8 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9945945945945946,
	"eval_steps": 500,
	"global_step": 115,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.008648648648648649,
	"grad_norm": 2.249741315841675,
	"learning_rate": 0.0001,
	"loss": 1.8319,
	"step": 1
	},
	{
	"epoch": 0.017297297297297298,
	"grad_norm": 2.1813502311706543,
	"learning_rate": 0.0002,
	"loss": 1.4027,
	"step": 2
	},
	{
	"epoch": 0.025945945945945945,
	"grad_norm": 0.8601759672164917,
	"learning_rate": 0.00019823008849557524,
	"loss": 1.1102,
	"step": 3
	},
	{
	"epoch": 0.034594594594594595,
	"grad_norm": 1.7297605276107788,
	"learning_rate": 0.00019646017699115044,
	"loss": 1.3774,
	"step": 4
	},
	{
	"epoch": 0.043243243243243246,
	"grad_norm": 1.0936262607574463,
	"learning_rate": 0.00019469026548672567,
	"loss": 0.895,
	"step": 5
	},
	{
	"epoch": 0.05189189189189189,
	"grad_norm": 0.6946480870246887,
	"learning_rate": 0.00019292035398230087,
	"loss": 0.7451,
	"step": 6
	},
	{
	"epoch": 0.06054054054054054,
	"grad_norm": 0.45863592624664307,
	"learning_rate": 0.00019115044247787613,
	"loss": 0.876,
	"step": 7
	},
	{
	"epoch": 0.06918918918918919,
	"grad_norm": 0.5447478890419006,
	"learning_rate": 0.00018938053097345133,
	"loss": 0.7719,
	"step": 8
	},
	{
	"epoch": 0.07783783783783783,
	"grad_norm": 0.45514124631881714,
	"learning_rate": 0.00018761061946902656,
	"loss": 0.5759,
	"step": 9
	},
	{
	"epoch": 0.08648648648648649,
	"grad_norm": 0.4590395987033844,
	"learning_rate": 0.0001858407079646018,
	"loss": 0.5838,
	"step": 10
	},
	{
	"epoch": 0.09513513513513513,
	"grad_norm": 0.5425634384155273,
	"learning_rate": 0.000184070796460177,
	"loss": 0.6641,
	"step": 11
	},
	{
	"epoch": 0.10378378378378378,
	"grad_norm": 1.0379027128219604,
	"learning_rate": 0.00018230088495575222,
	"loss": 0.9623,
	"step": 12
	},
	{
	"epoch": 0.11243243243243244,
	"grad_norm": 0.5286022424697876,
	"learning_rate": 0.00018053097345132742,
	"loss": 0.4761,
	"step": 13
	},
	{
	"epoch": 0.12108108108108108,
	"grad_norm": 0.6451830267906189,
	"learning_rate": 0.00017876106194690265,
	"loss": 0.547,
	"step": 14
	},
	{
	"epoch": 0.12972972972972974,
	"grad_norm": 0.6369953751564026,
	"learning_rate": 0.0001769911504424779,
	"loss": 0.5872,
	"step": 15
	},
	{
	"epoch": 0.13837837837837838,
	"grad_norm": 0.4720052182674408,
	"learning_rate": 0.0001752212389380531,
	"loss": 0.3248,
	"step": 16
	},
	{
	"epoch": 0.14702702702702702,
	"grad_norm": 0.5918360352516174,
	"learning_rate": 0.00017345132743362834,
	"loss": 0.6277,
	"step": 17
	},
	{
	"epoch": 0.15567567567567567,
	"grad_norm": 0.5242601037025452,
	"learning_rate": 0.00017168141592920354,
	"loss": 0.5645,
	"step": 18
	},
	{
	"epoch": 0.1643243243243243,
	"grad_norm": 0.474292129278183,
	"learning_rate": 0.00016991150442477877,
	"loss": 0.2115,
	"step": 19
	},
	{
	"epoch": 0.17297297297297298,
	"grad_norm": 0.6523647904396057,
	"learning_rate": 0.000168141592920354,
	"loss": 0.5803,
	"step": 20
	},
	{
	"epoch": 0.18162162162162163,
	"grad_norm": 0.521297812461853,
	"learning_rate": 0.0001663716814159292,
	"loss": 0.4483,
	"step": 21
	},
	{
	"epoch": 0.19027027027027027,
	"grad_norm": 0.5689568519592285,
	"learning_rate": 0.00016460176991150443,
	"loss": 0.6231,
	"step": 22
	},
	{
	"epoch": 0.1989189189189189,
	"grad_norm": 0.4570567011833191,
	"learning_rate": 0.00016283185840707966,
	"loss": 0.2368,
	"step": 23
	},
	{
	"epoch": 0.20756756756756756,
	"grad_norm": 0.414307564496994,
	"learning_rate": 0.0001610619469026549,
	"loss": 0.4674,
	"step": 24
	},
	{
	"epoch": 0.21621621621621623,
	"grad_norm": 0.5027227997779846,
	"learning_rate": 0.0001592920353982301,
	"loss": 0.3558,
	"step": 25
	},
	{
	"epoch": 0.22486486486486487,
	"grad_norm": 0.4441507160663605,
	"learning_rate": 0.00015752212389380532,
	"loss": 0.437,
	"step": 26
	},
	{
	"epoch": 0.23351351351351352,
	"grad_norm": 0.4098701477050781,
	"learning_rate": 0.00015575221238938055,
	"loss": 0.3553,
	"step": 27
	},
	{
	"epoch": 0.24216216216216216,
	"grad_norm": 0.3602244257926941,
	"learning_rate": 0.00015398230088495575,
	"loss": 0.3689,
	"step": 28
	},
	{
	"epoch": 0.2508108108108108,
	"grad_norm": 0.4340718984603882,
	"learning_rate": 0.00015221238938053098,
	"loss": 0.318,
	"step": 29
	},
	{
	"epoch": 0.2594594594594595,
	"grad_norm": 0.44470590353012085,
	"learning_rate": 0.00015044247787610618,
	"loss": 0.4992,
	"step": 30
	},
	{
	"epoch": 0.2681081081081081,
	"grad_norm": 0.43699413537979126,
	"learning_rate": 0.00014867256637168144,
	"loss": 0.3362,
	"step": 31
	},
	{
	"epoch": 0.27675675675675676,
	"grad_norm": 0.4950752258300781,
	"learning_rate": 0.00014690265486725664,
	"loss": 0.4464,
	"step": 32
	},
	{
	"epoch": 0.28540540540540543,
	"grad_norm": 0.4312315881252289,
	"learning_rate": 0.00014513274336283187,
	"loss": 0.4786,
	"step": 33
	},
	{
	"epoch": 0.29405405405405405,
	"grad_norm": 0.45234543085098267,
	"learning_rate": 0.0001433628318584071,
	"loss": 0.5572,
	"step": 34
	},
	{
	"epoch": 0.3027027027027027,
	"grad_norm": 0.4373219311237335,
	"learning_rate": 0.0001415929203539823,
	"loss": 0.3873,
	"step": 35
	},
	{
	"epoch": 0.31135135135135134,
	"grad_norm": 0.35862988233566284,
	"learning_rate": 0.00013982300884955753,
	"loss": 0.2902,
	"step": 36
	},
	{
	"epoch": 0.32,
	"grad_norm": 0.41014787554740906,
	"learning_rate": 0.00013805309734513276,
	"loss": 0.3806,
	"step": 37
	},
	{
	"epoch": 0.3286486486486486,
	"grad_norm": 0.4181463420391083,
	"learning_rate": 0.00013628318584070796,
	"loss": 0.3036,
	"step": 38
	},
	{
	"epoch": 0.3372972972972973,
	"grad_norm": 0.3663095235824585,
	"learning_rate": 0.00013451327433628321,
	"loss": 0.1979,
	"step": 39
	},
	{
	"epoch": 0.34594594594594597,
	"grad_norm": 0.46295005083084106,
	"learning_rate": 0.00013274336283185842,
	"loss": 0.4204,
	"step": 40
	},
	{
	"epoch": 0.3545945945945946,
	"grad_norm": 0.39596325159072876,
	"learning_rate": 0.00013097345132743365,
	"loss": 0.3512,
	"step": 41
	},
	{
	"epoch": 0.36324324324324325,
	"grad_norm": 0.7628335952758789,
	"learning_rate": 0.00012920353982300885,
	"loss": 0.4965,
	"step": 42
	},
	{
	"epoch": 0.37189189189189187,
	"grad_norm": 0.5216770172119141,
	"learning_rate": 0.00012743362831858408,
	"loss": 0.4658,
	"step": 43
	},
	{
	"epoch": 0.38054054054054054,
	"grad_norm": 0.38578447699546814,
	"learning_rate": 0.0001256637168141593,
	"loss": 0.2661,
	"step": 44
	},
	{
	"epoch": 0.3891891891891892,
	"grad_norm": 0.2811882197856903,
	"learning_rate": 0.0001238938053097345,
	"loss": 0.1545,
	"step": 45
	},
	{
	"epoch": 0.3978378378378378,
	"grad_norm": 0.3812131881713867,
	"learning_rate": 0.00012212389380530974,
	"loss": 0.3295,
	"step": 46
	},
	{
	"epoch": 0.4064864864864865,
	"grad_norm": 0.3791070878505707,
	"learning_rate": 0.00012035398230088497,
	"loss": 0.2472,
	"step": 47
	},
	{
	"epoch": 0.4151351351351351,
	"grad_norm": 0.38515138626098633,
	"learning_rate": 0.0001185840707964602,
	"loss": 0.4042,
	"step": 48
	},
	{
	"epoch": 0.4237837837837838,
	"grad_norm": 0.5093116164207458,
	"learning_rate": 0.00011681415929203541,
	"loss": 0.8376,
	"step": 49
	},
	{
	"epoch": 0.43243243243243246,
	"grad_norm": 0.2971178889274597,
	"learning_rate": 0.00011504424778761063,
	"loss": 0.4082,
	"step": 50
	},
	{
	"epoch": 0.4410810810810811,
	"grad_norm": 0.30018818378448486,
	"learning_rate": 0.00011327433628318584,
	"loss": 0.129,
	"step": 51
	},
	{
	"epoch": 0.44972972972972974,
	"grad_norm": 0.4631483256816864,
	"learning_rate": 0.00011150442477876106,
	"loss": 0.3752,
	"step": 52
	},
	{
	"epoch": 0.45837837837837836,
	"grad_norm": 0.3890452980995178,
	"learning_rate": 0.00010973451327433629,
	"loss": 0.4054,
	"step": 53
	},
	{
	"epoch": 0.46702702702702703,
	"grad_norm": 0.3566686511039734,
	"learning_rate": 0.0001079646017699115,
	"loss": 0.2452,
	"step": 54
	},
	{
	"epoch": 0.4756756756756757,
	"grad_norm": 0.4903372526168823,
	"learning_rate": 0.00010619469026548674,
	"loss": 0.4505,
	"step": 55
	},
	{
	"epoch": 0.4843243243243243,
	"grad_norm": 0.3836239278316498,
	"learning_rate": 0.00010442477876106196,
	"loss": 0.3952,
	"step": 56
	},
	{
	"epoch": 0.492972972972973,
	"grad_norm": 0.42047417163848877,
	"learning_rate": 0.00010265486725663717,
	"loss": 0.5074,
	"step": 57
	},
	{
	"epoch": 0.5016216216216216,
	"grad_norm": 0.24409635365009308,
	"learning_rate": 0.00010088495575221239,
	"loss": 0.1389,
	"step": 58
	},
	{
	"epoch": 0.5102702702702703,
	"grad_norm": 0.3819220960140228,
	"learning_rate": 9.911504424778762e-05,
	"loss": 0.3945,
	"step": 59
	},
	{
	"epoch": 0.518918918918919,
	"grad_norm": 0.31148406863212585,
	"learning_rate": 9.734513274336283e-05,
	"loss": 0.5203,
	"step": 60
	},
	{
	"epoch": 0.5275675675675676,
	"grad_norm": 0.3157011866569519,
	"learning_rate": 9.557522123893806e-05,
	"loss": 0.262,
	"step": 61
	},
	{
	"epoch": 0.5362162162162162,
	"grad_norm": 0.40180379152297974,
	"learning_rate": 9.380530973451328e-05,
	"loss": 0.2404,
	"step": 62
	},
	{
	"epoch": 0.5448648648648649,
	"grad_norm": 0.4064180552959442,
	"learning_rate": 9.20353982300885e-05,
	"loss": 0.6118,
	"step": 63
	},
	{
	"epoch": 0.5535135135135135,
	"grad_norm": 0.3912467956542969,
	"learning_rate": 9.026548672566371e-05,
	"loss": 0.271,
	"step": 64
	},
	{
	"epoch": 0.5621621621621622,
	"grad_norm": 0.31059980392456055,
	"learning_rate": 8.849557522123895e-05,
	"loss": 0.2373,
	"step": 65
	},
	{
	"epoch": 0.5708108108108109,
	"grad_norm": 0.30928152799606323,
	"learning_rate": 8.672566371681417e-05,
	"loss": 0.4169,
	"step": 66
	},
	{
	"epoch": 0.5794594594594594,
	"grad_norm": 0.40631791949272156,
	"learning_rate": 8.495575221238938e-05,
	"loss": 0.4175,
	"step": 67
	},
	{
	"epoch": 0.5881081081081081,
	"grad_norm": 0.40440961718559265,
	"learning_rate": 8.31858407079646e-05,
	"loss": 0.3269,
	"step": 68
	},
	{
	"epoch": 0.5967567567567568,
	"grad_norm": 0.4534294009208679,
	"learning_rate": 8.141592920353983e-05,
	"loss": 0.2242,
	"step": 69
	},
	{
	"epoch": 0.6054054054054054,
	"grad_norm": 0.41317978501319885,
	"learning_rate": 7.964601769911504e-05,
	"loss": 0.2633,
	"step": 70
	},
	{
	"epoch": 0.614054054054054,
	"grad_norm": 0.272535115480423,
	"learning_rate": 7.787610619469027e-05,
	"loss": 0.1455,
	"step": 71
	},
	{
	"epoch": 0.6227027027027027,
	"grad_norm": 0.4280416667461395,
	"learning_rate": 7.610619469026549e-05,
	"loss": 0.5289,
	"step": 72
	},
	{
	"epoch": 0.6313513513513513,
	"grad_norm": 0.4870530664920807,
	"learning_rate": 7.433628318584072e-05,
	"loss": 0.5633,
	"step": 73
	},
	{
	"epoch": 0.64,
	"grad_norm": 0.38074707984924316,
	"learning_rate": 7.256637168141593e-05,
	"loss": 0.4738,
	"step": 74
	},
	{
	"epoch": 0.6486486486486487,
	"grad_norm": 0.32775411009788513,
	"learning_rate": 7.079646017699115e-05,
	"loss": 0.2764,
	"step": 75
	},
	{
	"epoch": 0.6572972972972972,
	"grad_norm": 0.3663316071033478,
	"learning_rate": 6.902654867256638e-05,
	"loss": 0.4794,
	"step": 76
	},
	{
	"epoch": 0.6659459459459459,
	"grad_norm": 0.36854031682014465,
	"learning_rate": 6.725663716814161e-05,
	"loss": 0.1809,
	"step": 77
	},
	{
	"epoch": 0.6745945945945946,
	"grad_norm": 0.37296342849731445,
	"learning_rate": 6.548672566371682e-05,
	"loss": 0.4067,
	"step": 78
	},
	{
	"epoch": 0.6832432432432433,
	"grad_norm": 0.4202044606208801,
	"learning_rate": 6.371681415929204e-05,
	"loss": 0.2752,
	"step": 79
	},
	{
	"epoch": 0.6918918918918919,
	"grad_norm": 0.29250282049179077,
	"learning_rate": 6.194690265486725e-05,
	"loss": 0.1461,
	"step": 80
	},
	{
	"epoch": 0.7005405405405405,
	"grad_norm": 0.37763354182243347,
	"learning_rate": 6.017699115044248e-05,
	"loss": 0.2817,
	"step": 81
	},
	{
	"epoch": 0.7091891891891892,
	"grad_norm": 0.30031171441078186,
	"learning_rate": 5.8407079646017705e-05,
	"loss": 0.1572,
	"step": 82
	},
	{
	"epoch": 0.7178378378378378,
	"grad_norm": 0.4519175887107849,
	"learning_rate": 5.663716814159292e-05,
	"loss": 0.3046,
	"step": 83
	},
	{
	"epoch": 0.7264864864864865,
	"grad_norm": 0.3103352189064026,
	"learning_rate": 5.486725663716814e-05,
	"loss": 0.1347,
	"step": 84
	},
	{
	"epoch": 0.7351351351351352,
	"grad_norm": 0.7960600852966309,
	"learning_rate": 5.309734513274337e-05,
	"loss": 0.3168,
	"step": 85
	},
	{
	"epoch": 0.7437837837837837,
	"grad_norm": 0.3281419277191162,
	"learning_rate": 5.132743362831859e-05,
	"loss": 0.2045,
	"step": 86
	},
	{
	"epoch": 0.7524324324324324,
	"grad_norm": 0.35785752534866333,
	"learning_rate": 4.955752212389381e-05,
	"loss": 0.4077,
	"step": 87
	},
	{
	"epoch": 0.7610810810810811,
	"grad_norm": 0.37461650371551514,
	"learning_rate": 4.778761061946903e-05,
	"loss": 0.3227,
	"step": 88
	},
	{
	"epoch": 0.7697297297297298,
	"grad_norm": 0.3365744352340698,
	"learning_rate": 4.601769911504425e-05,
	"loss": 0.2306,
	"step": 89
	},
	{
	"epoch": 0.7783783783783784,
	"grad_norm": 0.29543980956077576,
	"learning_rate": 4.4247787610619477e-05,
	"loss": 0.3661,
	"step": 90
	},
	{
	"epoch": 0.787027027027027,
	"grad_norm": 0.3135324716567993,
	"learning_rate": 4.247787610619469e-05,
	"loss": 0.2503,
	"step": 91
	},
	{
	"epoch": 0.7956756756756757,
	"grad_norm": 0.23556429147720337,
	"learning_rate": 4.0707964601769914e-05,
	"loss": 0.1044,
	"step": 92
	},
	{
	"epoch": 0.8043243243243243,
	"grad_norm": 0.2718769907951355,
	"learning_rate": 3.893805309734514e-05,
	"loss": 0.1471,
	"step": 93
	},
	{
	"epoch": 0.812972972972973,
	"grad_norm": 0.25528448820114136,
	"learning_rate": 3.716814159292036e-05,
	"loss": 0.1126,
	"step": 94
	},
	{
	"epoch": 0.8216216216216217,
	"grad_norm": 0.514164388179779,
	"learning_rate": 3.5398230088495574e-05,
	"loss": 0.3423,
	"step": 95
	},
	{
	"epoch": 0.8302702702702702,
	"grad_norm": 0.33162716031074524,
	"learning_rate": 3.3628318584070804e-05,
	"loss": 0.3637,
	"step": 96
	},
	{
	"epoch": 0.8389189189189189,
	"grad_norm": 0.25161704421043396,
	"learning_rate": 3.185840707964602e-05,
	"loss": 0.1284,
	"step": 97
	},
	{
	"epoch": 0.8475675675675676,
	"grad_norm": 0.32825589179992676,
	"learning_rate": 3.008849557522124e-05,
	"loss": 0.2171,
	"step": 98
	},
	{
	"epoch": 0.8562162162162162,
	"grad_norm": 0.23435255885124207,
	"learning_rate": 2.831858407079646e-05,
	"loss": 0.16,
	"step": 99
	},
	{
	"epoch": 0.8648648648648649,
	"grad_norm": 0.2661581337451935,
	"learning_rate": 2.6548672566371686e-05,
	"loss": 0.2421,
	"step": 100
	},
	{
	"epoch": 0.8735135135135135,
	"grad_norm": 0.2724602222442627,
	"learning_rate": 2.4778761061946905e-05,
	"loss": 0.1246,
	"step": 101
	},
	{
	"epoch": 0.8821621621621621,
	"grad_norm": 0.47894561290740967,
	"learning_rate": 2.3008849557522124e-05,
	"loss": 0.4472,
	"step": 102
	},
	{
	"epoch": 0.8908108108108108,
	"grad_norm": 0.3064163327217102,
	"learning_rate": 2.1238938053097346e-05,
	"loss": 0.2987,
	"step": 103
	},
	{
	"epoch": 0.8994594594594595,
	"grad_norm": 0.4226900637149811,
	"learning_rate": 1.946902654867257e-05,
	"loss": 0.4185,
	"step": 104
	},
	{
	"epoch": 0.9081081081081082,
	"grad_norm": 0.34745219349861145,
	"learning_rate": 1.7699115044247787e-05,
	"loss": 0.2572,
	"step": 105
	},
	{
	"epoch": 0.9167567567567567,
	"grad_norm": 0.35236531496047974,
	"learning_rate": 1.592920353982301e-05,
	"loss": 0.3427,
	"step": 106
	},
	{
	"epoch": 0.9254054054054054,
	"grad_norm": 0.37095391750335693,
	"learning_rate": 1.415929203539823e-05,
	"loss": 0.4018,
	"step": 107
	},
	{
	"epoch": 0.9340540540540541,
	"grad_norm": 0.3331229090690613,
	"learning_rate": 1.2389380530973452e-05,
	"loss": 0.2038,
	"step": 108
	},
	{
	"epoch": 0.9427027027027027,
	"grad_norm": 0.2652183175086975,
	"learning_rate": 1.0619469026548673e-05,
	"loss": 0.1072,
	"step": 109
	},
	{
	"epoch": 0.9513513513513514,
	"grad_norm": 0.29123690724372864,
	"learning_rate": 8.849557522123894e-06,
	"loss": 0.1406,
	"step": 110
	},
	{
	"epoch": 0.96,
	"grad_norm": 0.3317340612411499,
	"learning_rate": 7.079646017699115e-06,
	"loss": 0.2202,
	"step": 111
	},
	{
	"epoch": 0.9686486486486486,
	"grad_norm": 0.47986647486686707,
	"learning_rate": 5.3097345132743365e-06,
	"loss": 0.3464,
	"step": 112
	},
	{
	"epoch": 0.9772972972972973,
	"grad_norm": 0.2612822949886322,
	"learning_rate": 3.5398230088495575e-06,
	"loss": 0.1271,
	"step": 113
	},
	{
	"epoch": 0.985945945945946,
	"grad_norm": 0.26845863461494446,
	"learning_rate": 1.7699115044247788e-06,
	"loss": 0.1044,
	"step": 114
	},
	{
	"epoch": 0.9945945945945946,
	"grad_norm": 0.2526237368583679,
	"learning_rate": 0.0,
	"loss": 0.1158,
	"step": 115
	},
	{
	"epoch": 0.9945945945945946,
	"step": 115,
	"total_flos": 1.3431114641260646e+17,
	"train_loss": 0.4029887131374815,
	"train_runtime": 1125.7865,
	"train_samples_per_second": 0.822,
	"train_steps_per_second": 0.102
	}
	],
	"logging_steps": 1,
	"max_steps": 115,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.3431114641260646e+17,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}