|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9991245987744383, |
|
"eval_steps": 36, |
|
"global_step": 107, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009337613072658301, |
|
"grad_norm": 1.3116651734261826, |
|
"learning_rate": 1.25e-06, |
|
"loss": 11.8241, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009337613072658301, |
|
"eval_loss": 11.651493072509766, |
|
"eval_runtime": 43.8088, |
|
"eval_samples_per_second": 6.3, |
|
"eval_steps_per_second": 0.799, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.018675226145316602, |
|
"grad_norm": 1.3220486767280257, |
|
"learning_rate": 2.5e-06, |
|
"loss": 11.8134, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.028012839217974907, |
|
"grad_norm": 1.323342086038301, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 11.7915, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.037350452290633204, |
|
"grad_norm": 1.32468270774218, |
|
"learning_rate": 5e-06, |
|
"loss": 11.7306, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04668806536329151, |
|
"grad_norm": 1.3580804351059697, |
|
"learning_rate": 6.25e-06, |
|
"loss": 11.4493, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05602567843594981, |
|
"grad_norm": 1.4657782226093532, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 10.5146, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06536329150860812, |
|
"grad_norm": 1.4570176977534868, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 7.5687, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.07470090458126641, |
|
"grad_norm": 1.3101839933421826, |
|
"learning_rate": 1e-05, |
|
"loss": 6.6374, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.08403851765392471, |
|
"grad_norm": 0.39797650661474604, |
|
"learning_rate": 1.125e-05, |
|
"loss": 4.0048, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.09337613072658302, |
|
"grad_norm": 0.29538196193252036, |
|
"learning_rate": 1.25e-05, |
|
"loss": 3.7439, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10271374379924132, |
|
"grad_norm": 0.21371312404721451, |
|
"learning_rate": 1.375e-05, |
|
"loss": 3.5406, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.11205135687189963, |
|
"grad_norm": 0.36485008261730323, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 3.3211, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.12138896994455792, |
|
"grad_norm": 0.2805852343578138, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 3.1236, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.13072658301721624, |
|
"grad_norm": 0.2021156853252266, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 2.8668, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.14006419608987453, |
|
"grad_norm": 0.15215118244217535, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 2.6099, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.14940180916253282, |
|
"grad_norm": 0.13687350701334702, |
|
"learning_rate": 2e-05, |
|
"loss": 2.3842, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.15873942223519114, |
|
"grad_norm": 0.13750612099868206, |
|
"learning_rate": 1.9999469523400122e-05, |
|
"loss": 2.1991, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.16807703530784943, |
|
"grad_norm": 0.12651196118653027, |
|
"learning_rate": 1.9997878149881576e-05, |
|
"loss": 2.0405, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.17741464838050774, |
|
"grad_norm": 0.12217963693020709, |
|
"learning_rate": 1.999522604828164e-05, |
|
"loss": 1.867, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.18675226145316604, |
|
"grad_norm": 0.11161741227643726, |
|
"learning_rate": 1.9991513499975883e-05, |
|
"loss": 1.7081, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19608987452582433, |
|
"grad_norm": 0.09431005577364608, |
|
"learning_rate": 1.9986740898848306e-05, |
|
"loss": 1.6022, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.20542748759848264, |
|
"grad_norm": 0.07544503987145361, |
|
"learning_rate": 1.9980908751249556e-05, |
|
"loss": 1.5179, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.21476510067114093, |
|
"grad_norm": 0.06853247429967851, |
|
"learning_rate": 1.997401767594319e-05, |
|
"loss": 1.4395, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.22410271374379925, |
|
"grad_norm": 0.05892769062015809, |
|
"learning_rate": 1.996606840404006e-05, |
|
"loss": 1.3577, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.23344032681645754, |
|
"grad_norm": 0.05337539608721018, |
|
"learning_rate": 1.9957061778920703e-05, |
|
"loss": 1.3032, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.24277793988911583, |
|
"grad_norm": 0.05279602192024426, |
|
"learning_rate": 1.9946998756145894e-05, |
|
"loss": 1.2436, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.2521155529617741, |
|
"grad_norm": 0.04928056172617358, |
|
"learning_rate": 1.9935880403355255e-05, |
|
"loss": 1.1964, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.26145316603443247, |
|
"grad_norm": 0.046581318628423386, |
|
"learning_rate": 1.9923707900153984e-05, |
|
"loss": 1.1161, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.27079077910709076, |
|
"grad_norm": 0.04485518736789617, |
|
"learning_rate": 1.9910482537987704e-05, |
|
"loss": 1.1, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.28012839217974905, |
|
"grad_norm": 0.03813950781953075, |
|
"learning_rate": 1.989620572000544e-05, |
|
"loss": 1.0508, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.28946600525240734, |
|
"grad_norm": 0.036622498559561, |
|
"learning_rate": 1.9880878960910772e-05, |
|
"loss": 1.0364, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.29880361832506563, |
|
"grad_norm": 0.03579080814636808, |
|
"learning_rate": 1.9864503886801108e-05, |
|
"loss": 1.0058, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.308141231397724, |
|
"grad_norm": 0.03146608814811976, |
|
"learning_rate": 1.9847082234995172e-05, |
|
"loss": 0.9954, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.31747884447038227, |
|
"grad_norm": 0.027371561053018377, |
|
"learning_rate": 1.982861585384869e-05, |
|
"loss": 0.9445, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.32681645754304056, |
|
"grad_norm": 0.024499632261339315, |
|
"learning_rate": 1.9809106702558277e-05, |
|
"loss": 0.938, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.33615407061569885, |
|
"grad_norm": 0.022391297579903467, |
|
"learning_rate": 1.978855685095358e-05, |
|
"loss": 0.9141, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.33615407061569885, |
|
"eval_loss": 0.8478350639343262, |
|
"eval_runtime": 43.6583, |
|
"eval_samples_per_second": 6.322, |
|
"eval_steps_per_second": 0.802, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.34549168368835714, |
|
"grad_norm": 0.021886974051633205, |
|
"learning_rate": 1.9766968479277684e-05, |
|
"loss": 0.8907, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3548292967610155, |
|
"grad_norm": 0.023142495322344372, |
|
"learning_rate": 1.974434387795579e-05, |
|
"loss": 0.9146, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.3641669098336738, |
|
"grad_norm": 0.021168627691228573, |
|
"learning_rate": 1.972068544735221e-05, |
|
"loss": 0.8564, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.37350452290633207, |
|
"grad_norm": 0.019500940158346962, |
|
"learning_rate": 1.969599569751571e-05, |
|
"loss": 0.8691, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.38284213597899036, |
|
"grad_norm": 0.017342490295839202, |
|
"learning_rate": 1.9670277247913205e-05, |
|
"loss": 0.8169, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.39217974905164865, |
|
"grad_norm": 0.01834359475842193, |
|
"learning_rate": 1.964353282715183e-05, |
|
"loss": 0.8285, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.401517362124307, |
|
"grad_norm": 0.01743926340900274, |
|
"learning_rate": 1.961576527268946e-05, |
|
"loss": 0.8185, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.4108549751969653, |
|
"grad_norm": 0.015589382188249705, |
|
"learning_rate": 1.9586977530533677e-05, |
|
"loss": 0.7824, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.4201925882696236, |
|
"grad_norm": 0.015465707621862244, |
|
"learning_rate": 1.95571726549292e-05, |
|
"loss": 0.7994, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.42953020134228187, |
|
"grad_norm": 0.01348325969968343, |
|
"learning_rate": 1.9526353808033827e-05, |
|
"loss": 0.786, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.43886781441494016, |
|
"grad_norm": 0.01248344639342667, |
|
"learning_rate": 1.9494524259582994e-05, |
|
"loss": 0.7841, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.4482054274875985, |
|
"grad_norm": 0.012287865038500807, |
|
"learning_rate": 1.9461687386542826e-05, |
|
"loss": 0.7703, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.4575430405602568, |
|
"grad_norm": 0.013137558740636913, |
|
"learning_rate": 1.9427846672751873e-05, |
|
"loss": 0.7522, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4668806536329151, |
|
"grad_norm": 0.012405785040338313, |
|
"learning_rate": 1.93930057085515e-05, |
|
"loss": 0.7513, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4762182667055734, |
|
"grad_norm": 0.012489962634362767, |
|
"learning_rate": 1.9357168190404937e-05, |
|
"loss": 0.743, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.48555587977823167, |
|
"grad_norm": 0.011604499619002773, |
|
"learning_rate": 1.932033792050515e-05, |
|
"loss": 0.7377, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.49489349285089, |
|
"grad_norm": 0.010226240702119492, |
|
"learning_rate": 1.928251880637141e-05, |
|
"loss": 0.723, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.5042311059235483, |
|
"grad_norm": 0.010262055155701614, |
|
"learning_rate": 1.924371486043473e-05, |
|
"loss": 0.7371, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.5135687189962066, |
|
"grad_norm": 0.009630481842938549, |
|
"learning_rate": 1.920393019961217e-05, |
|
"loss": 0.7295, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5229063320688649, |
|
"grad_norm": 0.0093680195601059, |
|
"learning_rate": 1.916316904487005e-05, |
|
"loss": 0.712, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.5322439451415232, |
|
"grad_norm": 0.009110104257000179, |
|
"learning_rate": 1.9121435720776122e-05, |
|
"loss": 0.6959, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5415815582141815, |
|
"grad_norm": 0.008883791285743962, |
|
"learning_rate": 1.9078734655040763e-05, |
|
"loss": 0.7056, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5509191712868398, |
|
"grad_norm": 0.008661290168572833, |
|
"learning_rate": 1.9035070378047204e-05, |
|
"loss": 0.7031, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.5602567843594981, |
|
"grad_norm": 0.008574085915641946, |
|
"learning_rate": 1.8990447522370886e-05, |
|
"loss": 0.6973, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5695943974321565, |
|
"grad_norm": 0.008014005266988974, |
|
"learning_rate": 1.8944870822287957e-05, |
|
"loss": 0.7075, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5789320105048147, |
|
"grad_norm": 0.007587538749978625, |
|
"learning_rate": 1.8898345113273e-05, |
|
"loss": 0.6736, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.588269623577473, |
|
"grad_norm": 0.007458374217548868, |
|
"learning_rate": 1.8850875331485996e-05, |
|
"loss": 0.6811, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5976072366501313, |
|
"grad_norm": 0.007383373412606801, |
|
"learning_rate": 1.8802466513248635e-05, |
|
"loss": 0.682, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.6069448497227896, |
|
"grad_norm": 0.007069434956330871, |
|
"learning_rate": 1.8753123794509974e-05, |
|
"loss": 0.6723, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.616282462795448, |
|
"grad_norm": 0.007152713173479847, |
|
"learning_rate": 1.8702852410301556e-05, |
|
"loss": 0.6663, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.6256200758681062, |
|
"grad_norm": 0.00764154365674685, |
|
"learning_rate": 1.865165769418196e-05, |
|
"loss": 0.6954, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.6349576889407645, |
|
"grad_norm": 0.007184416838244711, |
|
"learning_rate": 1.8599545077670983e-05, |
|
"loss": 0.6738, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.6442953020134228, |
|
"grad_norm": 0.006849637138186313, |
|
"learning_rate": 1.854652008967335e-05, |
|
"loss": 0.6679, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6536329150860811, |
|
"grad_norm": 0.007061882929419886, |
|
"learning_rate": 1.8492588355892125e-05, |
|
"loss": 0.6765, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6629705281587395, |
|
"grad_norm": 0.006338175952472431, |
|
"learning_rate": 1.8437755598231857e-05, |
|
"loss": 0.6695, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6723081412313977, |
|
"grad_norm": 0.0060579092781975295, |
|
"learning_rate": 1.8382027634191523e-05, |
|
"loss": 0.6484, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6723081412313977, |
|
"eval_loss": 0.6214176416397095, |
|
"eval_runtime": 43.7617, |
|
"eval_samples_per_second": 6.307, |
|
"eval_steps_per_second": 0.8, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.681645754304056, |
|
"grad_norm": 0.006328927046317208, |
|
"learning_rate": 1.8325410376247295e-05, |
|
"loss": 0.6537, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.6909833673767143, |
|
"grad_norm": 0.006322879070385963, |
|
"learning_rate": 1.826790983122527e-05, |
|
"loss": 0.6792, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.7003209804493726, |
|
"grad_norm": 0.0059267041402792315, |
|
"learning_rate": 1.8209532099664177e-05, |
|
"loss": 0.6497, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.709658593522031, |
|
"grad_norm": 0.006265125029941557, |
|
"learning_rate": 1.8150283375168112e-05, |
|
"loss": 0.6375, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.7189962065946892, |
|
"grad_norm": 0.0057329086343299585, |
|
"learning_rate": 1.8090169943749477e-05, |
|
"loss": 0.6505, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.7283338196673476, |
|
"grad_norm": 0.006315125437389684, |
|
"learning_rate": 1.8029198183162e-05, |
|
"loss": 0.641, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.7376714327400058, |
|
"grad_norm": 0.00602643377500652, |
|
"learning_rate": 1.796737456222413e-05, |
|
"loss": 0.6702, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.7470090458126641, |
|
"grad_norm": 0.00618264758307804, |
|
"learning_rate": 1.7904705640132717e-05, |
|
"loss": 0.6414, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7563466588853225, |
|
"grad_norm": 0.0058530581998478, |
|
"learning_rate": 1.7841198065767107e-05, |
|
"loss": 0.65, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7656842719579807, |
|
"grad_norm": 0.005741561897188635, |
|
"learning_rate": 1.7776858576983713e-05, |
|
"loss": 0.659, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7750218850306391, |
|
"grad_norm": 0.005350843870415206, |
|
"learning_rate": 1.771169399990119e-05, |
|
"loss": 0.6464, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.7843594981032973, |
|
"grad_norm": 0.005628603669293866, |
|
"learning_rate": 1.7645711248176198e-05, |
|
"loss": 0.6347, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.7936971111759556, |
|
"grad_norm": 0.006024684364906471, |
|
"learning_rate": 1.7578917322269885e-05, |
|
"loss": 0.6133, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.803034724248614, |
|
"grad_norm": 0.005557283692065632, |
|
"learning_rate": 1.7511319308705198e-05, |
|
"loss": 0.6244, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.8123723373212722, |
|
"grad_norm": 0.00553799083367584, |
|
"learning_rate": 1.744292437931502e-05, |
|
"loss": 0.6528, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.8217099503939306, |
|
"grad_norm": 0.005384068308149398, |
|
"learning_rate": 1.7373739790481263e-05, |
|
"loss": 0.6337, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.8310475634665888, |
|
"grad_norm": 0.005255083208402985, |
|
"learning_rate": 1.7303772882365018e-05, |
|
"loss": 0.6063, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.8403851765392472, |
|
"grad_norm": 0.005656017120071371, |
|
"learning_rate": 1.723303107812779e-05, |
|
"loss": 0.6194, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8497227896119055, |
|
"grad_norm": 0.005727064114913842, |
|
"learning_rate": 1.7161521883143936e-05, |
|
"loss": 0.6359, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.8590604026845637, |
|
"grad_norm": 0.005332007932524027, |
|
"learning_rate": 1.7089252884204376e-05, |
|
"loss": 0.6188, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.8683980157572221, |
|
"grad_norm": 0.004895963878943312, |
|
"learning_rate": 1.701623174871168e-05, |
|
"loss": 0.6156, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.8777356288298803, |
|
"grad_norm": 0.0053198813356673084, |
|
"learning_rate": 1.6942466223866582e-05, |
|
"loss": 0.6165, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.8870732419025387, |
|
"grad_norm": 0.005384472223204587, |
|
"learning_rate": 1.6867964135846043e-05, |
|
"loss": 0.6182, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.896410854975197, |
|
"grad_norm": 0.005145515044871062, |
|
"learning_rate": 1.679273338897293e-05, |
|
"loss": 0.622, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.9057484680478552, |
|
"grad_norm": 0.005287999487944072, |
|
"learning_rate": 1.6716781964877413e-05, |
|
"loss": 0.6346, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.9150860811205136, |
|
"grad_norm": 0.005190281800590422, |
|
"learning_rate": 1.664011792165012e-05, |
|
"loss": 0.6219, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.9244236941931718, |
|
"grad_norm": 0.0051116136333854015, |
|
"learning_rate": 1.6562749392987255e-05, |
|
"loss": 0.6234, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.9337613072658302, |
|
"grad_norm": 0.005528069555146713, |
|
"learning_rate": 1.648468458732762e-05, |
|
"loss": 0.6238, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9430989203384885, |
|
"grad_norm": 0.00505378298539984, |
|
"learning_rate": 1.6405931786981753e-05, |
|
"loss": 0.611, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.9524365334111468, |
|
"grad_norm": 0.005251284958033568, |
|
"learning_rate": 1.6326499347253206e-05, |
|
"loss": 0.6016, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.9617741464838051, |
|
"grad_norm": 0.0052283843925876325, |
|
"learning_rate": 1.6246395695552086e-05, |
|
"loss": 0.6224, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.9711117595564633, |
|
"grad_norm": 0.005202512120139698, |
|
"learning_rate": 1.6165629330500952e-05, |
|
"loss": 0.6082, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.9804493726291217, |
|
"grad_norm": 0.004915735280290327, |
|
"learning_rate": 1.6084208821033152e-05, |
|
"loss": 0.5931, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.98978698570178, |
|
"grad_norm": 0.0050879698990566845, |
|
"learning_rate": 1.6002142805483686e-05, |
|
"loss": 0.5945, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.9991245987744383, |
|
"grad_norm": 0.005267364898779865, |
|
"learning_rate": 1.591943999067273e-05, |
|
"loss": 0.6108, |
|
"step": 107 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 321, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 107, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6387261951962513e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|