bert-base-uncased-mean-500 / trainer_state.json
sobamchan's picture
Upload folder using huggingface_hub
003e476 verified
raw
history blame
23.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.11470520761642579,
"eval_steps": 5,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0011470520761642578,
"eval_loss": 2.755366325378418,
"eval_runtime": 14.0088,
"eval_samples_per_second": 469.989,
"eval_steps_per_second": 3.712,
"step": 5
},
{
"epoch": 0.0022941041523285156,
"eval_loss": 2.750617027282715,
"eval_runtime": 14.0654,
"eval_samples_per_second": 468.099,
"eval_steps_per_second": 3.697,
"step": 10
},
{
"epoch": 0.0034411562284927736,
"eval_loss": 2.7424304485321045,
"eval_runtime": 14.1186,
"eval_samples_per_second": 466.336,
"eval_steps_per_second": 3.683,
"step": 15
},
{
"epoch": 0.004588208304657031,
"eval_loss": 2.730889081954956,
"eval_runtime": 14.2451,
"eval_samples_per_second": 462.193,
"eval_steps_per_second": 3.65,
"step": 20
},
{
"epoch": 0.005735260380821289,
"eval_loss": 2.7159576416015625,
"eval_runtime": 14.245,
"eval_samples_per_second": 462.199,
"eval_steps_per_second": 3.65,
"step": 25
},
{
"epoch": 0.006882312456985547,
"eval_loss": 2.6975343227386475,
"eval_runtime": 14.3448,
"eval_samples_per_second": 458.981,
"eval_steps_per_second": 3.625,
"step": 30
},
{
"epoch": 0.008029364533149804,
"eval_loss": 2.675693988800049,
"eval_runtime": 16.965,
"eval_samples_per_second": 388.094,
"eval_steps_per_second": 3.065,
"step": 35
},
{
"epoch": 0.009176416609314062,
"eval_loss": 2.650242328643799,
"eval_runtime": 14.2414,
"eval_samples_per_second": 462.315,
"eval_steps_per_second": 3.651,
"step": 40
},
{
"epoch": 0.01032346868547832,
"eval_loss": 2.6213560104370117,
"eval_runtime": 14.4168,
"eval_samples_per_second": 456.689,
"eval_steps_per_second": 3.607,
"step": 45
},
{
"epoch": 0.011470520761642579,
"eval_loss": 2.5892951488494873,
"eval_runtime": 14.3332,
"eval_samples_per_second": 459.353,
"eval_steps_per_second": 3.628,
"step": 50
},
{
"epoch": 0.012617572837806837,
"eval_loss": 2.5538384914398193,
"eval_runtime": 14.4067,
"eval_samples_per_second": 457.01,
"eval_steps_per_second": 3.609,
"step": 55
},
{
"epoch": 0.013764624913971095,
"eval_loss": 2.51452374458313,
"eval_runtime": 14.4948,
"eval_samples_per_second": 454.233,
"eval_steps_per_second": 3.588,
"step": 60
},
{
"epoch": 0.014911676990135353,
"eval_loss": 2.4726295471191406,
"eval_runtime": 14.3162,
"eval_samples_per_second": 459.898,
"eval_steps_per_second": 3.632,
"step": 65
},
{
"epoch": 0.01605872906629961,
"eval_loss": 2.428196668624878,
"eval_runtime": 14.3204,
"eval_samples_per_second": 459.763,
"eval_steps_per_second": 3.631,
"step": 70
},
{
"epoch": 0.017205781142463867,
"eval_loss": 2.379453659057617,
"eval_runtime": 14.3796,
"eval_samples_per_second": 457.872,
"eval_steps_per_second": 3.616,
"step": 75
},
{
"epoch": 0.018352833218628125,
"eval_loss": 2.327221393585205,
"eval_runtime": 14.3415,
"eval_samples_per_second": 459.089,
"eval_steps_per_second": 3.626,
"step": 80
},
{
"epoch": 0.019499885294792383,
"eval_loss": 2.2712388038635254,
"eval_runtime": 14.462,
"eval_samples_per_second": 455.263,
"eval_steps_per_second": 3.596,
"step": 85
},
{
"epoch": 0.02064693737095664,
"eval_loss": 2.2120068073272705,
"eval_runtime": 14.334,
"eval_samples_per_second": 459.326,
"eval_steps_per_second": 3.628,
"step": 90
},
{
"epoch": 0.0217939894471209,
"eval_loss": 2.1501331329345703,
"eval_runtime": 14.3506,
"eval_samples_per_second": 458.796,
"eval_steps_per_second": 3.624,
"step": 95
},
{
"epoch": 0.022941041523285157,
"grad_norm": 19.895355224609375,
"learning_rate": 7.645259938837921e-07,
"loss": 3.6197,
"step": 100
},
{
"epoch": 0.022941041523285157,
"eval_loss": 2.086596727371216,
"eval_runtime": 16.6465,
"eval_samples_per_second": 395.519,
"eval_steps_per_second": 3.124,
"step": 100
},
{
"epoch": 0.024088093599449415,
"eval_loss": 2.0223236083984375,
"eval_runtime": 14.471,
"eval_samples_per_second": 454.979,
"eval_steps_per_second": 3.593,
"step": 105
},
{
"epoch": 0.025235145675613673,
"eval_loss": 1.9571231603622437,
"eval_runtime": 14.3783,
"eval_samples_per_second": 457.913,
"eval_steps_per_second": 3.617,
"step": 110
},
{
"epoch": 0.02638219775177793,
"eval_loss": 1.890655517578125,
"eval_runtime": 14.288,
"eval_samples_per_second": 460.805,
"eval_steps_per_second": 3.639,
"step": 115
},
{
"epoch": 0.02752924982794219,
"eval_loss": 1.823920488357544,
"eval_runtime": 14.3764,
"eval_samples_per_second": 457.972,
"eval_steps_per_second": 3.617,
"step": 120
},
{
"epoch": 0.028676301904106447,
"eval_loss": 1.758331298828125,
"eval_runtime": 14.2666,
"eval_samples_per_second": 461.499,
"eval_steps_per_second": 3.645,
"step": 125
},
{
"epoch": 0.029823353980270705,
"eval_loss": 1.6937522888183594,
"eval_runtime": 14.2522,
"eval_samples_per_second": 461.963,
"eval_steps_per_second": 3.649,
"step": 130
},
{
"epoch": 0.030970406056434963,
"eval_loss": 1.6316019296646118,
"eval_runtime": 16.3104,
"eval_samples_per_second": 403.67,
"eval_steps_per_second": 3.188,
"step": 135
},
{
"epoch": 0.03211745813259922,
"eval_loss": 1.571895718574524,
"eval_runtime": 14.2422,
"eval_samples_per_second": 462.288,
"eval_steps_per_second": 3.651,
"step": 140
},
{
"epoch": 0.033264510208763476,
"eval_loss": 1.5148202180862427,
"eval_runtime": 14.3624,
"eval_samples_per_second": 458.418,
"eval_steps_per_second": 3.621,
"step": 145
},
{
"epoch": 0.034411562284927734,
"eval_loss": 1.459762454032898,
"eval_runtime": 14.3035,
"eval_samples_per_second": 460.308,
"eval_steps_per_second": 3.635,
"step": 150
},
{
"epoch": 0.03555861436109199,
"eval_loss": 1.4081143140792847,
"eval_runtime": 14.2988,
"eval_samples_per_second": 460.46,
"eval_steps_per_second": 3.637,
"step": 155
},
{
"epoch": 0.03670566643725625,
"eval_loss": 1.3612124919891357,
"eval_runtime": 14.2569,
"eval_samples_per_second": 461.812,
"eval_steps_per_second": 3.647,
"step": 160
},
{
"epoch": 0.03785271851342051,
"eval_loss": 1.318212866783142,
"eval_runtime": 14.282,
"eval_samples_per_second": 460.999,
"eval_steps_per_second": 3.641,
"step": 165
},
{
"epoch": 0.038999770589584766,
"eval_loss": 1.2802687883377075,
"eval_runtime": 14.4058,
"eval_samples_per_second": 457.038,
"eval_steps_per_second": 3.61,
"step": 170
},
{
"epoch": 0.040146822665749024,
"eval_loss": 1.246294617652893,
"eval_runtime": 14.2804,
"eval_samples_per_second": 461.051,
"eval_steps_per_second": 3.641,
"step": 175
},
{
"epoch": 0.04129387474191328,
"eval_loss": 1.2160167694091797,
"eval_runtime": 14.3449,
"eval_samples_per_second": 458.977,
"eval_steps_per_second": 3.625,
"step": 180
},
{
"epoch": 0.04244092681807754,
"eval_loss": 1.189509630203247,
"eval_runtime": 14.2361,
"eval_samples_per_second": 462.486,
"eval_steps_per_second": 3.653,
"step": 185
},
{
"epoch": 0.0435879788942418,
"eval_loss": 1.1653709411621094,
"eval_runtime": 14.3748,
"eval_samples_per_second": 458.025,
"eval_steps_per_second": 3.617,
"step": 190
},
{
"epoch": 0.044735030970406056,
"eval_loss": 1.143513798713684,
"eval_runtime": 14.25,
"eval_samples_per_second": 462.035,
"eval_steps_per_second": 3.649,
"step": 195
},
{
"epoch": 0.045882083046570314,
"grad_norm": 8.192963600158691,
"learning_rate": 1.5290519877675841e-06,
"loss": 2.292,
"step": 200
},
{
"epoch": 0.045882083046570314,
"eval_loss": 1.1239805221557617,
"eval_runtime": 16.3012,
"eval_samples_per_second": 403.896,
"eval_steps_per_second": 3.19,
"step": 200
},
{
"epoch": 0.04702913512273457,
"eval_loss": 1.1064891815185547,
"eval_runtime": 14.2673,
"eval_samples_per_second": 461.476,
"eval_steps_per_second": 3.645,
"step": 205
},
{
"epoch": 0.04817618719889883,
"eval_loss": 1.0907284021377563,
"eval_runtime": 14.2601,
"eval_samples_per_second": 461.707,
"eval_steps_per_second": 3.647,
"step": 210
},
{
"epoch": 0.04932323927506309,
"eval_loss": 1.076059341430664,
"eval_runtime": 14.2845,
"eval_samples_per_second": 460.919,
"eval_steps_per_second": 3.64,
"step": 215
},
{
"epoch": 0.050470291351227346,
"eval_loss": 1.062280535697937,
"eval_runtime": 14.217,
"eval_samples_per_second": 463.109,
"eval_steps_per_second": 3.658,
"step": 220
},
{
"epoch": 0.051617343427391604,
"eval_loss": 1.0492550134658813,
"eval_runtime": 14.2233,
"eval_samples_per_second": 462.901,
"eval_steps_per_second": 3.656,
"step": 225
},
{
"epoch": 0.05276439550355586,
"eval_loss": 1.0374095439910889,
"eval_runtime": 14.5693,
"eval_samples_per_second": 451.909,
"eval_steps_per_second": 3.569,
"step": 230
},
{
"epoch": 0.05391144757972012,
"eval_loss": 1.0259910821914673,
"eval_runtime": 15.9876,
"eval_samples_per_second": 411.819,
"eval_steps_per_second": 3.253,
"step": 235
},
{
"epoch": 0.05505849965588438,
"eval_loss": 1.0147359371185303,
"eval_runtime": 14.2101,
"eval_samples_per_second": 463.332,
"eval_steps_per_second": 3.659,
"step": 240
},
{
"epoch": 0.056205551732048636,
"eval_loss": 1.0042893886566162,
"eval_runtime": 14.2547,
"eval_samples_per_second": 461.882,
"eval_steps_per_second": 3.648,
"step": 245
},
{
"epoch": 0.057352603808212894,
"eval_loss": 0.9941452741622925,
"eval_runtime": 14.2125,
"eval_samples_per_second": 463.253,
"eval_steps_per_second": 3.659,
"step": 250
},
{
"epoch": 0.05849965588437715,
"eval_loss": 0.9848644733428955,
"eval_runtime": 14.2628,
"eval_samples_per_second": 461.621,
"eval_steps_per_second": 3.646,
"step": 255
},
{
"epoch": 0.05964670796054141,
"eval_loss": 0.9763049483299255,
"eval_runtime": 14.2014,
"eval_samples_per_second": 463.617,
"eval_steps_per_second": 3.662,
"step": 260
},
{
"epoch": 0.06079376003670567,
"eval_loss": 0.9682185649871826,
"eval_runtime": 16.4598,
"eval_samples_per_second": 400.004,
"eval_steps_per_second": 3.159,
"step": 265
},
{
"epoch": 0.06194081211286993,
"eval_loss": 0.9602033495903015,
"eval_runtime": 14.3432,
"eval_samples_per_second": 459.031,
"eval_steps_per_second": 3.625,
"step": 270
},
{
"epoch": 0.06308786418903418,
"eval_loss": 0.952538251876831,
"eval_runtime": 14.2143,
"eval_samples_per_second": 463.195,
"eval_steps_per_second": 3.658,
"step": 275
},
{
"epoch": 0.06423491626519844,
"eval_loss": 0.9450673460960388,
"eval_runtime": 14.2195,
"eval_samples_per_second": 463.025,
"eval_steps_per_second": 3.657,
"step": 280
},
{
"epoch": 0.0653819683413627,
"eval_loss": 0.937529981136322,
"eval_runtime": 14.2726,
"eval_samples_per_second": 461.303,
"eval_steps_per_second": 3.643,
"step": 285
},
{
"epoch": 0.06652902041752695,
"eval_loss": 0.930277407169342,
"eval_runtime": 14.5409,
"eval_samples_per_second": 452.791,
"eval_steps_per_second": 3.576,
"step": 290
},
{
"epoch": 0.06767607249369122,
"eval_loss": 0.9230740666389465,
"eval_runtime": 14.2552,
"eval_samples_per_second": 461.868,
"eval_steps_per_second": 3.648,
"step": 295
},
{
"epoch": 0.06882312456985547,
"grad_norm": 7.243612766265869,
"learning_rate": 2.2935779816513764e-06,
"loss": 1.5711,
"step": 300
},
{
"epoch": 0.06882312456985547,
"eval_loss": 0.915981650352478,
"eval_runtime": 16.3997,
"eval_samples_per_second": 401.471,
"eval_steps_per_second": 3.171,
"step": 300
},
{
"epoch": 0.06997017664601973,
"eval_loss": 0.9087598323822021,
"eval_runtime": 14.1993,
"eval_samples_per_second": 463.685,
"eval_steps_per_second": 3.662,
"step": 305
},
{
"epoch": 0.07111722872218398,
"eval_loss": 0.9022247791290283,
"eval_runtime": 14.3064,
"eval_samples_per_second": 460.214,
"eval_steps_per_second": 3.635,
"step": 310
},
{
"epoch": 0.07226428079834825,
"eval_loss": 0.8950537443161011,
"eval_runtime": 14.2087,
"eval_samples_per_second": 463.377,
"eval_steps_per_second": 3.66,
"step": 315
},
{
"epoch": 0.0734113328745125,
"eval_loss": 0.8874984383583069,
"eval_runtime": 14.2295,
"eval_samples_per_second": 462.699,
"eval_steps_per_second": 3.654,
"step": 320
},
{
"epoch": 0.07455838495067676,
"eval_loss": 0.8809635043144226,
"eval_runtime": 14.2266,
"eval_samples_per_second": 462.796,
"eval_steps_per_second": 3.655,
"step": 325
},
{
"epoch": 0.07570543702684102,
"eval_loss": 0.8745627403259277,
"eval_runtime": 14.2942,
"eval_samples_per_second": 460.608,
"eval_steps_per_second": 3.638,
"step": 330
},
{
"epoch": 0.07685248910300528,
"eval_loss": 0.8683872818946838,
"eval_runtime": 14.1413,
"eval_samples_per_second": 465.586,
"eval_steps_per_second": 3.677,
"step": 335
},
{
"epoch": 0.07799954117916953,
"eval_loss": 0.8624699115753174,
"eval_runtime": 14.1694,
"eval_samples_per_second": 464.662,
"eval_steps_per_second": 3.67,
"step": 340
},
{
"epoch": 0.0791465932553338,
"eval_loss": 0.8569262027740479,
"eval_runtime": 14.2132,
"eval_samples_per_second": 463.231,
"eval_steps_per_second": 3.659,
"step": 345
},
{
"epoch": 0.08029364533149805,
"eval_loss": 0.8515614867210388,
"eval_runtime": 14.3011,
"eval_samples_per_second": 460.383,
"eval_steps_per_second": 3.636,
"step": 350
},
{
"epoch": 0.08144069740766231,
"eval_loss": 0.8465690612792969,
"eval_runtime": 14.2483,
"eval_samples_per_second": 462.09,
"eval_steps_per_second": 3.65,
"step": 355
},
{
"epoch": 0.08258774948382656,
"eval_loss": 0.8418980836868286,
"eval_runtime": 14.1978,
"eval_samples_per_second": 463.735,
"eval_steps_per_second": 3.663,
"step": 360
},
{
"epoch": 0.08373480155999083,
"eval_loss": 0.836972177028656,
"eval_runtime": 16.4774,
"eval_samples_per_second": 399.577,
"eval_steps_per_second": 3.156,
"step": 365
},
{
"epoch": 0.08488185363615508,
"eval_loss": 0.8320812582969666,
"eval_runtime": 14.3098,
"eval_samples_per_second": 460.103,
"eval_steps_per_second": 3.634,
"step": 370
},
{
"epoch": 0.08602890571231935,
"eval_loss": 0.8273819088935852,
"eval_runtime": 14.2755,
"eval_samples_per_second": 461.209,
"eval_steps_per_second": 3.643,
"step": 375
},
{
"epoch": 0.0871759577884836,
"eval_loss": 0.8223117589950562,
"eval_runtime": 14.1567,
"eval_samples_per_second": 465.081,
"eval_steps_per_second": 3.673,
"step": 380
},
{
"epoch": 0.08832300986464786,
"eval_loss": 0.8169983625411987,
"eval_runtime": 14.1912,
"eval_samples_per_second": 463.948,
"eval_steps_per_second": 3.664,
"step": 385
},
{
"epoch": 0.08947006194081211,
"eval_loss": 0.811504602432251,
"eval_runtime": 14.1791,
"eval_samples_per_second": 464.347,
"eval_steps_per_second": 3.667,
"step": 390
},
{
"epoch": 0.09061711401697638,
"eval_loss": 0.8054670095443726,
"eval_runtime": 14.2016,
"eval_samples_per_second": 463.611,
"eval_steps_per_second": 3.662,
"step": 395
},
{
"epoch": 0.09176416609314063,
"grad_norm": 5.852241516113281,
"learning_rate": 3.0581039755351682e-06,
"loss": 1.3859,
"step": 400
},
{
"epoch": 0.09176416609314063,
"eval_loss": 0.8003625273704529,
"eval_runtime": 16.3667,
"eval_samples_per_second": 402.281,
"eval_steps_per_second": 3.177,
"step": 400
},
{
"epoch": 0.0929112181693049,
"eval_loss": 0.7954948544502258,
"eval_runtime": 14.1443,
"eval_samples_per_second": 465.489,
"eval_steps_per_second": 3.676,
"step": 405
},
{
"epoch": 0.09405827024546914,
"eval_loss": 0.7906287312507629,
"eval_runtime": 14.2485,
"eval_samples_per_second": 462.082,
"eval_steps_per_second": 3.649,
"step": 410
},
{
"epoch": 0.0952053223216334,
"eval_loss": 0.7856321930885315,
"eval_runtime": 14.2745,
"eval_samples_per_second": 461.242,
"eval_steps_per_second": 3.643,
"step": 415
},
{
"epoch": 0.09635237439779766,
"eval_loss": 0.7809005379676819,
"eval_runtime": 14.2039,
"eval_samples_per_second": 463.536,
"eval_steps_per_second": 3.661,
"step": 420
},
{
"epoch": 0.09749942647396191,
"eval_loss": 0.7759343981742859,
"eval_runtime": 14.2261,
"eval_samples_per_second": 462.813,
"eval_steps_per_second": 3.655,
"step": 425
},
{
"epoch": 0.09864647855012618,
"eval_loss": 0.7706864476203918,
"eval_runtime": 16.5161,
"eval_samples_per_second": 398.642,
"eval_steps_per_second": 3.148,
"step": 430
},
{
"epoch": 0.09979353062629043,
"eval_loss": 0.765408992767334,
"eval_runtime": 14.193,
"eval_samples_per_second": 463.89,
"eval_steps_per_second": 3.664,
"step": 435
},
{
"epoch": 0.10094058270245469,
"eval_loss": 0.7602970600128174,
"eval_runtime": 14.2387,
"eval_samples_per_second": 462.402,
"eval_steps_per_second": 3.652,
"step": 440
},
{
"epoch": 0.10208763477861894,
"eval_loss": 0.7555930018424988,
"eval_runtime": 14.1713,
"eval_samples_per_second": 464.601,
"eval_steps_per_second": 3.669,
"step": 445
},
{
"epoch": 0.10323468685478321,
"eval_loss": 0.7515881061553955,
"eval_runtime": 14.2211,
"eval_samples_per_second": 462.973,
"eval_steps_per_second": 3.657,
"step": 450
},
{
"epoch": 0.10438173893094746,
"eval_loss": 0.7474184036254883,
"eval_runtime": 14.1814,
"eval_samples_per_second": 464.27,
"eval_steps_per_second": 3.667,
"step": 455
},
{
"epoch": 0.10552879100711172,
"eval_loss": 0.7430188059806824,
"eval_runtime": 14.2099,
"eval_samples_per_second": 463.34,
"eval_steps_per_second": 3.659,
"step": 460
},
{
"epoch": 0.10667584308327598,
"eval_loss": 0.7378737330436707,
"eval_runtime": 16.2296,
"eval_samples_per_second": 405.68,
"eval_steps_per_second": 3.204,
"step": 465
},
{
"epoch": 0.10782289515944024,
"eval_loss": 0.7332107424736023,
"eval_runtime": 14.1689,
"eval_samples_per_second": 464.68,
"eval_steps_per_second": 3.67,
"step": 470
},
{
"epoch": 0.10896994723560449,
"eval_loss": 0.7287834286689758,
"eval_runtime": 14.4646,
"eval_samples_per_second": 455.18,
"eval_steps_per_second": 3.595,
"step": 475
},
{
"epoch": 0.11011699931176876,
"eval_loss": 0.7247486710548401,
"eval_runtime": 14.1683,
"eval_samples_per_second": 464.699,
"eval_steps_per_second": 3.67,
"step": 480
},
{
"epoch": 0.11126405138793301,
"eval_loss": 0.7211238741874695,
"eval_runtime": 14.235,
"eval_samples_per_second": 462.523,
"eval_steps_per_second": 3.653,
"step": 485
},
{
"epoch": 0.11241110346409727,
"eval_loss": 0.7170650959014893,
"eval_runtime": 14.2367,
"eval_samples_per_second": 462.466,
"eval_steps_per_second": 3.653,
"step": 490
},
{
"epoch": 0.11355815554026152,
"eval_loss": 0.7122666835784912,
"eval_runtime": 14.2574,
"eval_samples_per_second": 461.796,
"eval_steps_per_second": 3.647,
"step": 495
},
{
"epoch": 0.11470520761642579,
"grad_norm": 5.7844438552856445,
"learning_rate": 3.8226299694189605e-06,
"loss": 1.2632,
"step": 500
},
{
"epoch": 0.11470520761642579,
"eval_loss": 0.7078971862792969,
"eval_runtime": 14.238,
"eval_samples_per_second": 462.423,
"eval_steps_per_second": 3.652,
"step": 500
}
],
"logging_steps": 100,
"max_steps": 13077,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}