|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9971523493118177, |
|
"eval_steps": 500, |
|
"global_step": 526, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0037968675842429997, |
|
"grad_norm": 190.1852569580078, |
|
"learning_rate": 1.8518518518518518e-07, |
|
"loss": 2.1743, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007593735168485999, |
|
"grad_norm": 318.4629211425781, |
|
"learning_rate": 3.7037037037037036e-07, |
|
"loss": 2.1888, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.011390602752728999, |
|
"grad_norm": 4.048186302185059, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 1.8374, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.015187470336971999, |
|
"grad_norm": 149.984375, |
|
"learning_rate": 7.407407407407407e-07, |
|
"loss": 2.2082, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.018984337921214997, |
|
"grad_norm": 140.36471557617188, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 2.5169, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.022781205505457997, |
|
"grad_norm": 81.98410034179688, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 2.0747, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.026578073089700997, |
|
"grad_norm": 3.9009580612182617, |
|
"learning_rate": 1.2962962962962962e-06, |
|
"loss": 1.8328, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.030374940673943997, |
|
"grad_norm": 239.54661560058594, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 2.1877, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.034171808258187, |
|
"grad_norm": 181.9974822998047, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 1.9126, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.037968675842429994, |
|
"grad_norm": 71.94468688964844, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 1.9289, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.041765543426673, |
|
"grad_norm": 327.50091552734375, |
|
"learning_rate": 2.037037037037037e-06, |
|
"loss": 1.8236, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.045562411010915994, |
|
"grad_norm": 3.223841667175293, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 1.7878, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04935927859515899, |
|
"grad_norm": 230.49221801757812, |
|
"learning_rate": 2.4074074074074075e-06, |
|
"loss": 1.6871, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.053156146179401995, |
|
"grad_norm": 18558.3125, |
|
"learning_rate": 2.5925925925925925e-06, |
|
"loss": 1.6958, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05695301376364499, |
|
"grad_norm": 2.5916926860809326, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 1.7525, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.060749881347887995, |
|
"grad_norm": 2.61940598487854, |
|
"learning_rate": 2.962962962962963e-06, |
|
"loss": 1.7431, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.064546748932131, |
|
"grad_norm": 175.40200805664062, |
|
"learning_rate": 3.1481481481481483e-06, |
|
"loss": 1.6176, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.068343616516374, |
|
"grad_norm": 2.723275899887085, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.7091, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.07214048410061699, |
|
"grad_norm": 575.9302368164062, |
|
"learning_rate": 3.5185185185185187e-06, |
|
"loss": 1.4155, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07593735168485999, |
|
"grad_norm": 1.5204131603240967, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 1.668, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07973421926910298, |
|
"grad_norm": 172.04342651367188, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 1.5175, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.083531086853346, |
|
"grad_norm": 36.41116714477539, |
|
"learning_rate": 4.074074074074074e-06, |
|
"loss": 1.3265, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08732795443758899, |
|
"grad_norm": 1.3005378246307373, |
|
"learning_rate": 4.2592592592592596e-06, |
|
"loss": 1.6097, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.09112482202183199, |
|
"grad_norm": 23.33233642578125, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 1.4055, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09492168960607499, |
|
"grad_norm": 1.2947165966033936, |
|
"learning_rate": 4.62962962962963e-06, |
|
"loss": 1.552, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09871855719031798, |
|
"grad_norm": 62.264190673828125, |
|
"learning_rate": 4.814814814814815e-06, |
|
"loss": 1.198, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.10251542477456099, |
|
"grad_norm": 16.471799850463867, |
|
"learning_rate": 5e-06, |
|
"loss": 1.3374, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.10631229235880399, |
|
"grad_norm": 9.552218437194824, |
|
"learning_rate": 4.999950454155801e-06, |
|
"loss": 1.3352, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.11010915994304699, |
|
"grad_norm": 1.3561033010482788, |
|
"learning_rate": 4.999801818587036e-06, |
|
"loss": 1.5152, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.11390602752728998, |
|
"grad_norm": 3.8642690181732178, |
|
"learning_rate": 4.999554099185124e-06, |
|
"loss": 1.3093, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11770289511153298, |
|
"grad_norm": 0.9432722926139832, |
|
"learning_rate": 4.999207305768841e-06, |
|
"loss": 1.3048, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.12149976269577599, |
|
"grad_norm": 0.9486730694770813, |
|
"learning_rate": 4.998761452083922e-06, |
|
"loss": 1.4368, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.12529663028001897, |
|
"grad_norm": 0.7894783020019531, |
|
"learning_rate": 4.998216555802526e-06, |
|
"loss": 1.2765, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.129093497864262, |
|
"grad_norm": 0.8032355308532715, |
|
"learning_rate": 4.997572638522531e-06, |
|
"loss": 1.425, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.132890365448505, |
|
"grad_norm": 0.6962498426437378, |
|
"learning_rate": 4.996829725766676e-06, |
|
"loss": 1.4005, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.136687233032748, |
|
"grad_norm": 0.6125718355178833, |
|
"learning_rate": 4.995987846981554e-06, |
|
"loss": 1.0826, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.140484100616991, |
|
"grad_norm": 0.6794630885124207, |
|
"learning_rate": 4.995047035536439e-06, |
|
"loss": 1.0644, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.14428096820123398, |
|
"grad_norm": 0.6797907948493958, |
|
"learning_rate": 4.9940073287219705e-06, |
|
"loss": 1.0821, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.14807783578547698, |
|
"grad_norm": 0.6790012121200562, |
|
"learning_rate": 4.992868767748669e-06, |
|
"loss": 1.2284, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.15187470336971998, |
|
"grad_norm": 0.6396064758300781, |
|
"learning_rate": 4.991631397745307e-06, |
|
"loss": 1.197, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15567157095396297, |
|
"grad_norm": 0.6216674447059631, |
|
"learning_rate": 4.990295267757117e-06, |
|
"loss": 1.3547, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.15946843853820597, |
|
"grad_norm": 0.49298611283302307, |
|
"learning_rate": 4.98886043074385e-06, |
|
"loss": 0.8974, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 0.5648188591003418, |
|
"learning_rate": 4.987326943577675e-06, |
|
"loss": 1.1868, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.167062173706692, |
|
"grad_norm": 0.5617629885673523, |
|
"learning_rate": 4.985694867040924e-06, |
|
"loss": 1.197, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.170859041290935, |
|
"grad_norm": 0.5470258593559265, |
|
"learning_rate": 4.983964265823687e-06, |
|
"loss": 1.0242, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17465590887517798, |
|
"grad_norm": 0.5692163705825806, |
|
"learning_rate": 4.98213520852124e-06, |
|
"loss": 1.3244, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.17845277645942098, |
|
"grad_norm": 0.4028306007385254, |
|
"learning_rate": 4.980207767631335e-06, |
|
"loss": 0.7001, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.18224964404366398, |
|
"grad_norm": 0.49932053685188293, |
|
"learning_rate": 4.978182019551321e-06, |
|
"loss": 1.1597, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.18604651162790697, |
|
"grad_norm": 0.5206290483474731, |
|
"learning_rate": 4.976058044575116e-06, |
|
"loss": 1.3185, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.18984337921214997, |
|
"grad_norm": 0.534298300743103, |
|
"learning_rate": 4.973835926890027e-06, |
|
"loss": 1.3182, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19364024679639297, |
|
"grad_norm": 0.4649883210659027, |
|
"learning_rate": 4.9715157545734124e-06, |
|
"loss": 1.1734, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.19743711438063596, |
|
"grad_norm": 0.45419999957084656, |
|
"learning_rate": 4.969097619589187e-06, |
|
"loss": 1.1433, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.201233981964879, |
|
"grad_norm": 0.4962867498397827, |
|
"learning_rate": 4.9665816177841845e-06, |
|
"loss": 1.1749, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.20503084954912199, |
|
"grad_norm": 0.4966491162776947, |
|
"learning_rate": 4.963967848884349e-06, |
|
"loss": 1.1595, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.20882771713336498, |
|
"grad_norm": 0.4114384055137634, |
|
"learning_rate": 4.961256416490793e-06, |
|
"loss": 0.8368, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.21262458471760798, |
|
"grad_norm": 0.438700407743454, |
|
"learning_rate": 4.9584474280756785e-06, |
|
"loss": 1.1412, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.21642145230185098, |
|
"grad_norm": 0.45085862278938293, |
|
"learning_rate": 4.9555409949779695e-06, |
|
"loss": 1.1312, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.22021831988609397, |
|
"grad_norm": 0.4020615220069885, |
|
"learning_rate": 4.952537232399012e-06, |
|
"loss": 0.9898, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.22401518747033697, |
|
"grad_norm": 0.43643948435783386, |
|
"learning_rate": 4.9494362593979665e-06, |
|
"loss": 1.1352, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.22781205505457996, |
|
"grad_norm": 0.40290120244026184, |
|
"learning_rate": 4.946238198887093e-06, |
|
"loss": 0.9822, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23160892263882296, |
|
"grad_norm": 0.4434157609939575, |
|
"learning_rate": 4.942943177626879e-06, |
|
"loss": 1.1303, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.23540579022306596, |
|
"grad_norm": 0.48527973890304565, |
|
"learning_rate": 4.939551326221012e-06, |
|
"loss": 1.2812, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.23920265780730898, |
|
"grad_norm": 0.4416787028312683, |
|
"learning_rate": 4.936062779111205e-06, |
|
"loss": 1.1381, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.24299952539155198, |
|
"grad_norm": 0.4510328769683838, |
|
"learning_rate": 4.932477674571867e-06, |
|
"loss": 1.2696, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.24679639297579498, |
|
"grad_norm": 0.38020116090774536, |
|
"learning_rate": 4.928796154704623e-06, |
|
"loss": 0.9767, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.25059326056003794, |
|
"grad_norm": 0.4880014657974243, |
|
"learning_rate": 4.925018365432681e-06, |
|
"loss": 1.2679, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.25439012814428097, |
|
"grad_norm": 0.4506332576274872, |
|
"learning_rate": 4.921144456495048e-06, |
|
"loss": 1.1344, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.258186995728524, |
|
"grad_norm": 0.45424792170524597, |
|
"learning_rate": 4.9171745814405945e-06, |
|
"loss": 1.2797, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.26198386331276696, |
|
"grad_norm": 0.4969100058078766, |
|
"learning_rate": 4.9131088976219695e-06, |
|
"loss": 1.2685, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.26578073089701, |
|
"grad_norm": 0.4142298400402069, |
|
"learning_rate": 4.908947566189362e-06, |
|
"loss": 1.1168, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26957759848125296, |
|
"grad_norm": 0.4250011444091797, |
|
"learning_rate": 4.904690752084117e-06, |
|
"loss": 1.1161, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.273374466065496, |
|
"grad_norm": 0.4570682644844055, |
|
"learning_rate": 4.900338624032191e-06, |
|
"loss": 1.2663, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.27717133364973895, |
|
"grad_norm": 0.43025946617126465, |
|
"learning_rate": 4.895891354537472e-06, |
|
"loss": 1.2616, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.280968201233982, |
|
"grad_norm": 0.42482641339302063, |
|
"learning_rate": 4.891349119874936e-06, |
|
"loss": 1.1215, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.28476506881822494, |
|
"grad_norm": 0.4691850244998932, |
|
"learning_rate": 4.886712100083664e-06, |
|
"loss": 1.1117, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.28856193640246797, |
|
"grad_norm": 0.4500940442085266, |
|
"learning_rate": 4.881980478959707e-06, |
|
"loss": 1.1082, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.292358803986711, |
|
"grad_norm": 0.3645106256008148, |
|
"learning_rate": 4.877154444048792e-06, |
|
"loss": 0.9683, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.29615567157095396, |
|
"grad_norm": 0.42706671357154846, |
|
"learning_rate": 4.872234186638898e-06, |
|
"loss": 1.106, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.299952539155197, |
|
"grad_norm": 0.416423499584198, |
|
"learning_rate": 4.8672199017526725e-06, |
|
"loss": 1.1109, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.30374940673943995, |
|
"grad_norm": 0.46140867471694946, |
|
"learning_rate": 4.862111788139697e-06, |
|
"loss": 1.2646, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.307546274323683, |
|
"grad_norm": 0.4429333508014679, |
|
"learning_rate": 4.856910048268613e-06, |
|
"loss": 1.1331, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.31134314190792595, |
|
"grad_norm": 0.42622944712638855, |
|
"learning_rate": 4.851614888319093e-06, |
|
"loss": 1.0966, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.31514000949216897, |
|
"grad_norm": 0.5261926054954529, |
|
"learning_rate": 4.846226518173676e-06, |
|
"loss": 1.2528, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.31893687707641194, |
|
"grad_norm": 0.4338201880455017, |
|
"learning_rate": 4.840745151409437e-06, |
|
"loss": 1.2542, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.32273374466065496, |
|
"grad_norm": 0.44151878356933594, |
|
"learning_rate": 4.835171005289533e-06, |
|
"loss": 1.1135, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 0.4812638461589813, |
|
"learning_rate": 4.8295043007545836e-06, |
|
"loss": 1.2653, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.33032747982914096, |
|
"grad_norm": 0.562940239906311, |
|
"learning_rate": 4.823745262413917e-06, |
|
"loss": 1.2494, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.334124347413384, |
|
"grad_norm": 0.4242718517780304, |
|
"learning_rate": 4.817894118536667e-06, |
|
"loss": 1.1076, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.33792121499762695, |
|
"grad_norm": 0.45141878724098206, |
|
"learning_rate": 4.811951101042722e-06, |
|
"loss": 1.1108, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.34171808258187, |
|
"grad_norm": 0.5067852139472961, |
|
"learning_rate": 4.805916445493538e-06, |
|
"loss": 1.2272, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.34551495016611294, |
|
"grad_norm": 0.4855363965034485, |
|
"learning_rate": 4.799790391082799e-06, |
|
"loss": 1.1, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.34931181775035597, |
|
"grad_norm": 0.47762539982795715, |
|
"learning_rate": 4.793573180626934e-06, |
|
"loss": 1.2453, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.35310868533459894, |
|
"grad_norm": 0.5055530071258545, |
|
"learning_rate": 4.787265060555495e-06, |
|
"loss": 1.2437, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.35690555291884196, |
|
"grad_norm": 0.4677724838256836, |
|
"learning_rate": 4.7808662809013895e-06, |
|
"loss": 1.223, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.36070242050308493, |
|
"grad_norm": 0.47210943698883057, |
|
"learning_rate": 4.774377095290969e-06, |
|
"loss": 1.2287, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.36449928808732796, |
|
"grad_norm": 0.48820409178733826, |
|
"learning_rate": 4.76779776093398e-06, |
|
"loss": 1.0997, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.368296155671571, |
|
"grad_norm": 0.509981632232666, |
|
"learning_rate": 4.761128538613359e-06, |
|
"loss": 1.2303, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.37209302325581395, |
|
"grad_norm": 0.46266815066337585, |
|
"learning_rate": 4.754369692674906e-06, |
|
"loss": 1.2366, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.375889890840057, |
|
"grad_norm": 0.4689420759677887, |
|
"learning_rate": 4.747521491016805e-06, |
|
"loss": 1.2379, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.37968675842429994, |
|
"grad_norm": 0.454712837934494, |
|
"learning_rate": 4.740584205079002e-06, |
|
"loss": 1.1078, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38348362600854297, |
|
"grad_norm": 0.5033725500106812, |
|
"learning_rate": 4.7335581098324465e-06, |
|
"loss": 1.2319, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.38728049359278593, |
|
"grad_norm": 0.5276666283607483, |
|
"learning_rate": 4.726443483768195e-06, |
|
"loss": 1.2373, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.39107736117702896, |
|
"grad_norm": 0.4005415737628937, |
|
"learning_rate": 4.719240608886372e-06, |
|
"loss": 0.9373, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.39487422876127193, |
|
"grad_norm": 0.456887423992157, |
|
"learning_rate": 4.711949770684989e-06, |
|
"loss": 1.2402, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.39867109634551495, |
|
"grad_norm": 0.4958035349845886, |
|
"learning_rate": 4.704571258148634e-06, |
|
"loss": 1.2327, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.402467963929758, |
|
"grad_norm": 0.40182697772979736, |
|
"learning_rate": 4.697105363737015e-06, |
|
"loss": 0.9684, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.40626483151400095, |
|
"grad_norm": 0.43226033449172974, |
|
"learning_rate": 4.689552383373362e-06, |
|
"loss": 1.0848, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.41006169909824397, |
|
"grad_norm": 0.4296877682209015, |
|
"learning_rate": 4.681912616432707e-06, |
|
"loss": 0.9568, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.41385856668248694, |
|
"grad_norm": 0.5179736018180847, |
|
"learning_rate": 4.674186365730012e-06, |
|
"loss": 1.2286, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.41765543426672996, |
|
"grad_norm": 0.4191986620426178, |
|
"learning_rate": 4.666373937508166e-06, |
|
"loss": 0.9606, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.42145230185097293, |
|
"grad_norm": 0.412567675113678, |
|
"learning_rate": 4.658475641425854e-06, |
|
"loss": 1.0793, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.42524916943521596, |
|
"grad_norm": 0.4645300507545471, |
|
"learning_rate": 4.6504917905452705e-06, |
|
"loss": 1.0835, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.4290460370194589, |
|
"grad_norm": 0.3561910092830658, |
|
"learning_rate": 4.6424227013197235e-06, |
|
"loss": 0.7952, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.43284290460370195, |
|
"grad_norm": 0.5064642429351807, |
|
"learning_rate": 4.6342686935810795e-06, |
|
"loss": 1.2319, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.4366397721879449, |
|
"grad_norm": 0.4808673858642578, |
|
"learning_rate": 4.6260300905271e-06, |
|
"loss": 1.223, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.44043663977218794, |
|
"grad_norm": 0.4152892529964447, |
|
"learning_rate": 4.617707218708617e-06, |
|
"loss": 1.0765, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.44423350735643097, |
|
"grad_norm": 0.49487704038619995, |
|
"learning_rate": 4.6093004080166e-06, |
|
"loss": 1.2205, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.44803037494067394, |
|
"grad_norm": 0.4736645817756653, |
|
"learning_rate": 4.600809991669076e-06, |
|
"loss": 1.0811, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.45182724252491696, |
|
"grad_norm": 0.46936193108558655, |
|
"learning_rate": 4.59223630619792e-06, |
|
"loss": 1.0809, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.45562411010915993, |
|
"grad_norm": 0.4555075466632843, |
|
"learning_rate": 4.5835796914355195e-06, |
|
"loss": 1.1008, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.45942097769340295, |
|
"grad_norm": 0.43824076652526855, |
|
"learning_rate": 4.5748404905013045e-06, |
|
"loss": 1.1092, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.4632178452776459, |
|
"grad_norm": 0.4486856162548065, |
|
"learning_rate": 4.5660190497881455e-06, |
|
"loss": 1.0784, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.46701471286188895, |
|
"grad_norm": 0.4244840145111084, |
|
"learning_rate": 4.557115718948622e-06, |
|
"loss": 0.958, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.4708115804461319, |
|
"grad_norm": 0.4413922429084778, |
|
"learning_rate": 4.548130850881171e-06, |
|
"loss": 1.2196, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.47460844803037494, |
|
"grad_norm": 0.4678729772567749, |
|
"learning_rate": 4.53906480171609e-06, |
|
"loss": 1.1124, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.47840531561461797, |
|
"grad_norm": 0.48969003558158875, |
|
"learning_rate": 4.529917930801427e-06, |
|
"loss": 1.2281, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.48220218319886093, |
|
"grad_norm": 0.4379821717739105, |
|
"learning_rate": 4.520690600688734e-06, |
|
"loss": 1.0849, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.48599905078310396, |
|
"grad_norm": 0.4471890330314636, |
|
"learning_rate": 4.5113831771187e-06, |
|
"loss": 1.0791, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4897959183673469, |
|
"grad_norm": 0.49659088253974915, |
|
"learning_rate": 4.501996029006651e-06, |
|
"loss": 1.097, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.49359278595158995, |
|
"grad_norm": 0.4094451665878296, |
|
"learning_rate": 4.492529528427929e-06, |
|
"loss": 0.972, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4973896535358329, |
|
"grad_norm": 0.4491405487060547, |
|
"learning_rate": 4.4829840506031455e-06, |
|
"loss": 1.0826, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.5011865211200759, |
|
"grad_norm": 0.42457208037376404, |
|
"learning_rate": 4.473359973883305e-06, |
|
"loss": 1.1066, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.5049833887043189, |
|
"grad_norm": 0.4856266379356384, |
|
"learning_rate": 4.463657679734813e-06, |
|
"loss": 1.225, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5087802562885619, |
|
"grad_norm": 0.47458940744400024, |
|
"learning_rate": 4.453877552724352e-06, |
|
"loss": 1.2234, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.512577123872805, |
|
"grad_norm": 0.36578264832496643, |
|
"learning_rate": 4.444019980503641e-06, |
|
"loss": 0.9276, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.516373991457048, |
|
"grad_norm": 0.4141775965690613, |
|
"learning_rate": 4.4340853537940715e-06, |
|
"loss": 1.0895, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.5201708590412909, |
|
"grad_norm": 0.4604395031929016, |
|
"learning_rate": 4.424074066371216e-06, |
|
"loss": 1.2279, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.5239677266255339, |
|
"grad_norm": 0.4443908929824829, |
|
"learning_rate": 4.4139865150492235e-06, |
|
"loss": 1.0862, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.527764594209777, |
|
"grad_norm": 0.4457070529460907, |
|
"learning_rate": 4.403823099665093e-06, |
|
"loss": 1.2267, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.53156146179402, |
|
"grad_norm": 0.4152843952178955, |
|
"learning_rate": 4.393584223062819e-06, |
|
"loss": 1.0827, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5353583293782629, |
|
"grad_norm": 0.42452365159988403, |
|
"learning_rate": 4.38327029107743e-06, |
|
"loss": 1.0743, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.5391551969625059, |
|
"grad_norm": 0.3921717703342438, |
|
"learning_rate": 4.372881712518898e-06, |
|
"loss": 0.9454, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.5429520645467489, |
|
"grad_norm": 0.5372737646102905, |
|
"learning_rate": 4.362418899155941e-06, |
|
"loss": 1.227, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.546748932130992, |
|
"grad_norm": 0.5399878621101379, |
|
"learning_rate": 4.351882265699696e-06, |
|
"loss": 1.2155, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.550545799715235, |
|
"grad_norm": 0.44373106956481934, |
|
"learning_rate": 4.341272229787281e-06, |
|
"loss": 1.0809, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5543426672994779, |
|
"grad_norm": 0.4626406133174896, |
|
"learning_rate": 4.330589211965246e-06, |
|
"loss": 1.2145, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.5581395348837209, |
|
"grad_norm": 0.4238491356372833, |
|
"learning_rate": 4.319833635672899e-06, |
|
"loss": 1.0835, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.561936402467964, |
|
"grad_norm": 0.5245795249938965, |
|
"learning_rate": 4.309005927225528e-06, |
|
"loss": 1.222, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.565733270052207, |
|
"grad_norm": 0.43987414240837097, |
|
"learning_rate": 4.2981065157974955e-06, |
|
"loss": 0.9477, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.5695301376364499, |
|
"grad_norm": 0.4053569734096527, |
|
"learning_rate": 4.287135833405235e-06, |
|
"loss": 1.0717, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5733270052206929, |
|
"grad_norm": 0.411527544260025, |
|
"learning_rate": 4.276094314890122e-06, |
|
"loss": 0.9402, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.5771238728049359, |
|
"grad_norm": 0.5262433290481567, |
|
"learning_rate": 4.2649823979012424e-06, |
|
"loss": 1.2261, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.580920740389179, |
|
"grad_norm": 0.44064462184906006, |
|
"learning_rate": 4.253800522878043e-06, |
|
"loss": 1.0823, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.584717607973422, |
|
"grad_norm": 0.4491995871067047, |
|
"learning_rate": 4.242549133032872e-06, |
|
"loss": 1.2246, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.5885144755576649, |
|
"grad_norm": 0.44037237763404846, |
|
"learning_rate": 4.2312286743334174e-06, |
|
"loss": 1.08, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.5923113431419079, |
|
"grad_norm": 0.4179958701133728, |
|
"learning_rate": 4.219839595485026e-06, |
|
"loss": 1.0792, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.5961082107261509, |
|
"grad_norm": 0.4985540509223938, |
|
"learning_rate": 4.2083823479129175e-06, |
|
"loss": 1.2162, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.599905078310394, |
|
"grad_norm": 0.43255966901779175, |
|
"learning_rate": 4.196857385744295e-06, |
|
"loss": 1.0821, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.6037019458946369, |
|
"grad_norm": 0.38958489894866943, |
|
"learning_rate": 4.185265165790343e-06, |
|
"loss": 0.93, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.6074988134788799, |
|
"grad_norm": 0.43625640869140625, |
|
"learning_rate": 4.17360614752812e-06, |
|
"loss": 1.0673, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6112956810631229, |
|
"grad_norm": 0.3636965751647949, |
|
"learning_rate": 4.161880793082348e-06, |
|
"loss": 0.9311, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.615092548647366, |
|
"grad_norm": 0.41958415508270264, |
|
"learning_rate": 4.150089567207094e-06, |
|
"loss": 0.9723, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.6188894162316089, |
|
"grad_norm": 0.4178905189037323, |
|
"learning_rate": 4.138232937267351e-06, |
|
"loss": 0.9489, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.6226862838158519, |
|
"grad_norm": 0.4551599621772766, |
|
"learning_rate": 4.126311373220511e-06, |
|
"loss": 1.0803, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.6264831514000949, |
|
"grad_norm": 0.3745613992214203, |
|
"learning_rate": 4.114325347597736e-06, |
|
"loss": 0.9532, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.6302800189843379, |
|
"grad_norm": 0.4355705976486206, |
|
"learning_rate": 4.102275335485234e-06, |
|
"loss": 1.2055, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.634076886568581, |
|
"grad_norm": 0.43142765760421753, |
|
"learning_rate": 4.0901618145054246e-06, |
|
"loss": 1.216, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.6378737541528239, |
|
"grad_norm": 0.3976465165615082, |
|
"learning_rate": 4.077985264798004e-06, |
|
"loss": 0.9956, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.6416706217370669, |
|
"grad_norm": 0.4292868375778198, |
|
"learning_rate": 4.06574616900092e-06, |
|
"loss": 1.0555, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.6454674893213099, |
|
"grad_norm": 0.5177546143531799, |
|
"learning_rate": 4.053445012231241e-06, |
|
"loss": 1.2066, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.649264356905553, |
|
"grad_norm": 0.41628462076187134, |
|
"learning_rate": 4.041082282065922e-06, |
|
"loss": 1.21, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 0.44618961215019226, |
|
"learning_rate": 4.028658468522489e-06, |
|
"loss": 1.2193, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.6568580920740389, |
|
"grad_norm": 0.44253483414649963, |
|
"learning_rate": 4.016174064039602e-06, |
|
"loss": 1.2155, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.6606549596582819, |
|
"grad_norm": 0.3929227292537689, |
|
"learning_rate": 4.003629563457551e-06, |
|
"loss": 1.0683, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.6644518272425249, |
|
"grad_norm": 0.5715161561965942, |
|
"learning_rate": 3.991025463998632e-06, |
|
"loss": 1.2264, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.668248694826768, |
|
"grad_norm": 0.4098718762397766, |
|
"learning_rate": 3.978362265247444e-06, |
|
"loss": 1.0723, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.6720455624110109, |
|
"grad_norm": 0.4266994595527649, |
|
"learning_rate": 3.965640469131084e-06, |
|
"loss": 1.0663, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.6758424299952539, |
|
"grad_norm": 0.379220187664032, |
|
"learning_rate": 3.952860579899257e-06, |
|
"loss": 0.9418, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.6796392975794969, |
|
"grad_norm": 0.3946760594844818, |
|
"learning_rate": 3.940023104104281e-06, |
|
"loss": 1.0605, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.68343616516374, |
|
"grad_norm": 0.47356459498405457, |
|
"learning_rate": 3.9271285505810185e-06, |
|
"loss": 1.0615, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6872330327479829, |
|
"grad_norm": 0.4356970489025116, |
|
"learning_rate": 3.9141774304267e-06, |
|
"loss": 1.0714, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.6910299003322259, |
|
"grad_norm": 0.4237845540046692, |
|
"learning_rate": 3.9011702569806716e-06, |
|
"loss": 1.0862, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.6948267679164689, |
|
"grad_norm": 0.4926426410675049, |
|
"learning_rate": 3.888107545804043e-06, |
|
"loss": 1.2091, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.6986236355007119, |
|
"grad_norm": 0.43669024109840393, |
|
"learning_rate": 3.874989814659258e-06, |
|
"loss": 1.0805, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.702420503084955, |
|
"grad_norm": 0.435596227645874, |
|
"learning_rate": 3.861817583489566e-06, |
|
"loss": 1.2199, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.7062173706691979, |
|
"grad_norm": 0.407825231552124, |
|
"learning_rate": 3.848591374398421e-06, |
|
"loss": 1.1038, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.7100142382534409, |
|
"grad_norm": 0.41674646735191345, |
|
"learning_rate": 3.835311711628774e-06, |
|
"loss": 1.0718, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.7138111058376839, |
|
"grad_norm": 0.4335026144981384, |
|
"learning_rate": 3.82197912154231e-06, |
|
"loss": 1.0727, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.717607973421927, |
|
"grad_norm": 0.47680917382240295, |
|
"learning_rate": 3.808594132598574e-06, |
|
"loss": 1.2056, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.7214048410061699, |
|
"grad_norm": 0.38044315576553345, |
|
"learning_rate": 3.7951572753340273e-06, |
|
"loss": 0.9217, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.7252017085904129, |
|
"grad_norm": 0.4642685353755951, |
|
"learning_rate": 3.781669082341018e-06, |
|
"loss": 1.071, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.7289985761746559, |
|
"grad_norm": 0.4392843544483185, |
|
"learning_rate": 3.768130088246674e-06, |
|
"loss": 1.2135, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.7327954437588989, |
|
"grad_norm": 0.4412919580936432, |
|
"learning_rate": 3.7545408296917087e-06, |
|
"loss": 1.2025, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.736592311343142, |
|
"grad_norm": 0.4227074682712555, |
|
"learning_rate": 3.740901845309152e-06, |
|
"loss": 1.0674, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.7403891789273849, |
|
"grad_norm": 0.45686036348342896, |
|
"learning_rate": 3.727213675703e-06, |
|
"loss": 1.2151, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.7441860465116279, |
|
"grad_norm": 0.4000316262245178, |
|
"learning_rate": 3.713476863426787e-06, |
|
"loss": 1.0748, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.7479829140958709, |
|
"grad_norm": 0.42302843928337097, |
|
"learning_rate": 3.699691952962083e-06, |
|
"loss": 1.0892, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.751779781680114, |
|
"grad_norm": 0.4179258644580841, |
|
"learning_rate": 3.6858594906969073e-06, |
|
"loss": 1.0626, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.7555766492643569, |
|
"grad_norm": 0.4555260241031647, |
|
"learning_rate": 3.6719800249040778e-06, |
|
"loss": 1.2077, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.7593735168485999, |
|
"grad_norm": 0.36426669359207153, |
|
"learning_rate": 3.6580541057194728e-06, |
|
"loss": 0.944, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7631703844328429, |
|
"grad_norm": 0.3654433786869049, |
|
"learning_rate": 3.6440822851202312e-06, |
|
"loss": 0.9322, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.7669672520170859, |
|
"grad_norm": 0.49382278323173523, |
|
"learning_rate": 3.63006511690287e-06, |
|
"loss": 1.2109, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.770764119601329, |
|
"grad_norm": 0.4325428307056427, |
|
"learning_rate": 3.616003156661334e-06, |
|
"loss": 1.0696, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.7745609871855719, |
|
"grad_norm": 0.4158753454685211, |
|
"learning_rate": 3.6018969617649784e-06, |
|
"loss": 1.0642, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.7783578547698149, |
|
"grad_norm": 0.4324316382408142, |
|
"learning_rate": 3.5877470913364697e-06, |
|
"loss": 1.2037, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.7821547223540579, |
|
"grad_norm": 0.46198171377182007, |
|
"learning_rate": 3.5735541062296287e-06, |
|
"loss": 1.0685, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.7859515899383009, |
|
"grad_norm": 0.43740782141685486, |
|
"learning_rate": 3.559318569007198e-06, |
|
"loss": 0.9377, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.7897484575225439, |
|
"grad_norm": 0.4424276351928711, |
|
"learning_rate": 3.545041043918546e-06, |
|
"loss": 1.2036, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.7935453251067869, |
|
"grad_norm": 0.4607738256454468, |
|
"learning_rate": 3.5307220968772983e-06, |
|
"loss": 1.2091, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.7973421926910299, |
|
"grad_norm": 0.4169575273990631, |
|
"learning_rate": 3.516362295438911e-06, |
|
"loss": 1.0704, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.8011390602752729, |
|
"grad_norm": 0.5177940130233765, |
|
"learning_rate": 3.501962208778172e-06, |
|
"loss": 1.2059, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.804935927859516, |
|
"grad_norm": 0.4142232835292816, |
|
"learning_rate": 3.487522407666641e-06, |
|
"loss": 1.0769, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.8087327954437589, |
|
"grad_norm": 0.4076462984085083, |
|
"learning_rate": 3.473043464450027e-06, |
|
"loss": 1.0677, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.8125296630280019, |
|
"grad_norm": 0.5051140785217285, |
|
"learning_rate": 3.458525953025503e-06, |
|
"loss": 1.0704, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 0.37081989645957947, |
|
"learning_rate": 3.443970448818954e-06, |
|
"loss": 0.9524, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.8201233981964879, |
|
"grad_norm": 0.3888426423072815, |
|
"learning_rate": 3.429377528762177e-06, |
|
"loss": 1.0749, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.8239202657807309, |
|
"grad_norm": 0.49155759811401367, |
|
"learning_rate": 3.414747771270007e-06, |
|
"loss": 1.2125, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.8277171333649739, |
|
"grad_norm": 0.38015004992485046, |
|
"learning_rate": 3.40008175621739e-06, |
|
"loss": 1.0571, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.8315140009492169, |
|
"grad_norm": 0.4071613848209381, |
|
"learning_rate": 3.3853800649164053e-06, |
|
"loss": 1.053, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.8353108685334599, |
|
"grad_norm": 0.42250776290893555, |
|
"learning_rate": 3.3706432800932184e-06, |
|
"loss": 1.0685, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8391077361177028, |
|
"grad_norm": 0.431573748588562, |
|
"learning_rate": 3.3558719858649835e-06, |
|
"loss": 1.0752, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.8429046037019459, |
|
"grad_norm": 0.46120485663414, |
|
"learning_rate": 3.341066767716697e-06, |
|
"loss": 1.2063, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.8467014712861889, |
|
"grad_norm": 0.4283529818058014, |
|
"learning_rate": 3.3262282124779823e-06, |
|
"loss": 1.0799, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.8504983388704319, |
|
"grad_norm": 0.37570565938949585, |
|
"learning_rate": 3.3113569082998367e-06, |
|
"loss": 1.0655, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.8542952064546749, |
|
"grad_norm": 0.2539384663105011, |
|
"learning_rate": 3.2964534446313163e-06, |
|
"loss": 0.4994, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.8580920740389179, |
|
"grad_norm": 0.4729446470737457, |
|
"learning_rate": 3.2815184121961725e-06, |
|
"loss": 1.2129, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.8618889416231609, |
|
"grad_norm": 0.3342674672603607, |
|
"learning_rate": 3.266552402969437e-06, |
|
"loss": 0.7854, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.8656858092074039, |
|
"grad_norm": 0.45071637630462646, |
|
"learning_rate": 3.251556010153958e-06, |
|
"loss": 1.2061, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.8694826767916469, |
|
"grad_norm": 0.4373473525047302, |
|
"learning_rate": 3.2365298281568913e-06, |
|
"loss": 1.0659, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.8732795443758898, |
|
"grad_norm": 0.42635422945022583, |
|
"learning_rate": 3.2214744525661336e-06, |
|
"loss": 1.2066, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.8770764119601329, |
|
"grad_norm": 0.45856305956840515, |
|
"learning_rate": 3.2063904801267184e-06, |
|
"loss": 1.1959, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.8808732795443759, |
|
"grad_norm": 0.4771096408367157, |
|
"learning_rate": 3.191278508717166e-06, |
|
"loss": 1.2025, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.8846701471286189, |
|
"grad_norm": 0.4655088484287262, |
|
"learning_rate": 3.176139137325781e-06, |
|
"loss": 1.2094, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.8884670147128619, |
|
"grad_norm": 0.39348939061164856, |
|
"learning_rate": 3.1609729660269114e-06, |
|
"loss": 1.0672, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.8922638822971048, |
|
"grad_norm": 0.445089727640152, |
|
"learning_rate": 3.1457805959571663e-06, |
|
"loss": 1.1994, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.8960607498813479, |
|
"grad_norm": 0.3999580144882202, |
|
"learning_rate": 3.130562629291586e-06, |
|
"loss": 1.0561, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.8998576174655909, |
|
"grad_norm": 0.4471660554409027, |
|
"learning_rate": 3.1153196692197747e-06, |
|
"loss": 1.0674, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.9036544850498339, |
|
"grad_norm": 0.4131118655204773, |
|
"learning_rate": 3.100052319921992e-06, |
|
"loss": 0.9328, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.9074513526340768, |
|
"grad_norm": 0.4657612144947052, |
|
"learning_rate": 3.0847611865452064e-06, |
|
"loss": 1.2085, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.9112482202183199, |
|
"grad_norm": 0.4662039577960968, |
|
"learning_rate": 3.069446875179106e-06, |
|
"loss": 1.2029, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.9150450878025629, |
|
"grad_norm": 0.41623374819755554, |
|
"learning_rate": 3.0541099928320806e-06, |
|
"loss": 0.9324, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.9188419553868059, |
|
"grad_norm": 0.4404604434967041, |
|
"learning_rate": 3.0387511474071556e-06, |
|
"loss": 1.0648, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.9226388229710489, |
|
"grad_norm": 0.3622177243232727, |
|
"learning_rate": 3.023370947677901e-06, |
|
"loss": 0.9238, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.9264356905552918, |
|
"grad_norm": 0.4089031517505646, |
|
"learning_rate": 3.007970003264301e-06, |
|
"loss": 1.0869, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 0.4477076232433319, |
|
"learning_rate": 2.99254892460859e-06, |
|
"loss": 1.1983, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.9340294257237779, |
|
"grad_norm": 0.46868517994880676, |
|
"learning_rate": 2.9771083229510543e-06, |
|
"loss": 1.1963, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.9378262933080209, |
|
"grad_norm": 0.3464803397655487, |
|
"learning_rate": 2.9616488103058115e-06, |
|
"loss": 0.9272, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.9416231608922638, |
|
"grad_norm": 0.45595675706863403, |
|
"learning_rate": 2.9461709994365445e-06, |
|
"loss": 1.2056, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.9454200284765069, |
|
"grad_norm": 0.39231353998184204, |
|
"learning_rate": 2.930675503832217e-06, |
|
"loss": 1.0618, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.9492168960607499, |
|
"grad_norm": 0.3708343505859375, |
|
"learning_rate": 2.91516293768276e-06, |
|
"loss": 0.9102, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9530137636449929, |
|
"grad_norm": 0.4427671432495117, |
|
"learning_rate": 2.899633915854721e-06, |
|
"loss": 1.2012, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.9568106312292359, |
|
"grad_norm": 0.33957725763320923, |
|
"learning_rate": 2.8840890538668955e-06, |
|
"loss": 0.8041, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.9606074988134788, |
|
"grad_norm": 0.3556421995162964, |
|
"learning_rate": 2.868528967865934e-06, |
|
"loss": 0.9198, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.9644043663977219, |
|
"grad_norm": 0.4399990439414978, |
|
"learning_rate": 2.8529542746019118e-06, |
|
"loss": 1.1998, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.9682012339819649, |
|
"grad_norm": 0.4560842514038086, |
|
"learning_rate": 2.8373655914038907e-06, |
|
"loss": 1.2007, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.9719981015662079, |
|
"grad_norm": 0.38949334621429443, |
|
"learning_rate": 2.821763536155446e-06, |
|
"loss": 1.0533, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.9757949691504508, |
|
"grad_norm": 0.40262502431869507, |
|
"learning_rate": 2.806148727270176e-06, |
|
"loss": 1.0585, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 0.4113542139530182, |
|
"learning_rate": 2.7905217836671915e-06, |
|
"loss": 1.0601, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.9833887043189369, |
|
"grad_norm": 0.39467111229896545, |
|
"learning_rate": 2.774883324746583e-06, |
|
"loss": 1.0627, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.9871855719031799, |
|
"grad_norm": 0.4230286180973053, |
|
"learning_rate": 2.7592339703648696e-06, |
|
"loss": 1.2039, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.9909824394874229, |
|
"grad_norm": 0.3624156415462494, |
|
"learning_rate": 2.743574340810431e-06, |
|
"loss": 0.9253, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.9947793070716658, |
|
"grad_norm": 0.45184388756752014, |
|
"learning_rate": 2.7279050567789195e-06, |
|
"loss": 1.1943, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.9985761746559089, |
|
"grad_norm": 0.43259841203689575, |
|
"learning_rate": 2.7122267393486605e-06, |
|
"loss": 1.2086, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.0023730422401518, |
|
"grad_norm": 0.7363314628601074, |
|
"learning_rate": 2.6965400099560305e-06, |
|
"loss": 1.9312, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.0061699098243948, |
|
"grad_norm": 0.42088714241981506, |
|
"learning_rate": 2.6808454903708313e-06, |
|
"loss": 1.0671, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.0099667774086378, |
|
"grad_norm": 0.47700321674346924, |
|
"learning_rate": 2.66514380267164e-06, |
|
"loss": 1.2008, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.0137636449928809, |
|
"grad_norm": 0.40473899245262146, |
|
"learning_rate": 2.6494355692211537e-06, |
|
"loss": 1.0598, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.0175605125771239, |
|
"grad_norm": 0.40599343180656433, |
|
"learning_rate": 2.6337214126415237e-06, |
|
"loss": 1.0582, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.021357380161367, |
|
"grad_norm": 0.49867045879364014, |
|
"learning_rate": 2.6180019557896725e-06, |
|
"loss": 1.1879, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.02515424774561, |
|
"grad_norm": 0.432919442653656, |
|
"learning_rate": 2.6022778217326077e-06, |
|
"loss": 1.1843, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.028951115329853, |
|
"grad_norm": 0.40411439538002014, |
|
"learning_rate": 2.586549633722726e-06, |
|
"loss": 1.2015, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.032747982914096, |
|
"grad_norm": 0.4091154932975769, |
|
"learning_rate": 2.5708180151731105e-06, |
|
"loss": 1.0366, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.0365448504983388, |
|
"grad_norm": 0.41487744450569153, |
|
"learning_rate": 2.555083589632818e-06, |
|
"loss": 1.0597, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.0403417180825818, |
|
"grad_norm": 0.43414047360420227, |
|
"learning_rate": 2.5393469807621646e-06, |
|
"loss": 1.1722, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.0441385856668248, |
|
"grad_norm": 0.43517452478408813, |
|
"learning_rate": 2.523608812308009e-06, |
|
"loss": 1.1897, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.0479354532510678, |
|
"grad_norm": 0.41618216037750244, |
|
"learning_rate": 2.5078697080790248e-06, |
|
"loss": 1.0384, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.0517323208353109, |
|
"grad_norm": 0.3956233561038971, |
|
"learning_rate": 2.4921302919209765e-06, |
|
"loss": 1.0523, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.055529188419554, |
|
"grad_norm": 0.3869052231311798, |
|
"learning_rate": 2.476391187691992e-06, |
|
"loss": 1.0695, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.059326056003797, |
|
"grad_norm": 0.38108527660369873, |
|
"learning_rate": 2.4606530192378358e-06, |
|
"loss": 0.9268, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.06312292358804, |
|
"grad_norm": 0.40593841671943665, |
|
"learning_rate": 2.4449164103671834e-06, |
|
"loss": 1.0487, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0669197911722827, |
|
"grad_norm": 0.35500654578208923, |
|
"learning_rate": 2.4291819848268908e-06, |
|
"loss": 0.9251, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.0707166587565258, |
|
"grad_norm": 0.40299785137176514, |
|
"learning_rate": 2.4134503662772754e-06, |
|
"loss": 1.0533, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.0745135263407688, |
|
"grad_norm": 0.43219995498657227, |
|
"learning_rate": 2.3977221782673936e-06, |
|
"loss": 1.1866, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.0783103939250118, |
|
"grad_norm": 0.45522886514663696, |
|
"learning_rate": 2.3819980442103288e-06, |
|
"loss": 1.2028, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.0821072615092548, |
|
"grad_norm": 0.4167502522468567, |
|
"learning_rate": 2.3662785873584775e-06, |
|
"loss": 1.0328, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.0859041290934979, |
|
"grad_norm": 0.402190238237381, |
|
"learning_rate": 2.350564430778847e-06, |
|
"loss": 1.0613, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.089700996677741, |
|
"grad_norm": 0.36273133754730225, |
|
"learning_rate": 2.3348561973283613e-06, |
|
"loss": 0.9201, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.093497864261984, |
|
"grad_norm": 0.41140493750572205, |
|
"learning_rate": 2.31915450962917e-06, |
|
"loss": 1.0598, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.097294731846227, |
|
"grad_norm": 0.3959132730960846, |
|
"learning_rate": 2.3034599900439703e-06, |
|
"loss": 1.0547, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.10109159943047, |
|
"grad_norm": 0.43085983395576477, |
|
"learning_rate": 2.2877732606513407e-06, |
|
"loss": 1.1836, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.1048884670147128, |
|
"grad_norm": 0.37919700145721436, |
|
"learning_rate": 2.2720949432210813e-06, |
|
"loss": 1.0431, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.1086853345989558, |
|
"grad_norm": 0.3869677186012268, |
|
"learning_rate": 2.2564256591895695e-06, |
|
"loss": 0.9199, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.1124822021831988, |
|
"grad_norm": 0.40192729234695435, |
|
"learning_rate": 2.2407660296351313e-06, |
|
"loss": 0.9259, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.1162790697674418, |
|
"grad_norm": 0.41164687275886536, |
|
"learning_rate": 2.225116675253418e-06, |
|
"loss": 1.0489, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.1200759373516849, |
|
"grad_norm": 0.3917531371116638, |
|
"learning_rate": 2.209478216332809e-06, |
|
"loss": 1.0493, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.123872804935928, |
|
"grad_norm": 0.3413256108760834, |
|
"learning_rate": 2.193851272729825e-06, |
|
"loss": 0.9091, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.127669672520171, |
|
"grad_norm": 0.4337766170501709, |
|
"learning_rate": 2.1782364638445545e-06, |
|
"loss": 1.191, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.131466540104414, |
|
"grad_norm": 0.42421215772628784, |
|
"learning_rate": 2.16263440859611e-06, |
|
"loss": 1.1943, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.1352634076886567, |
|
"grad_norm": 0.38704779744148254, |
|
"learning_rate": 2.1470457253980887e-06, |
|
"loss": 1.1828, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.1390602752728998, |
|
"grad_norm": 0.38762617111206055, |
|
"learning_rate": 2.131471032134067e-06, |
|
"loss": 1.0479, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.3805724084377289, |
|
"learning_rate": 2.115910946133105e-06, |
|
"loss": 1.049, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.1466540104413858, |
|
"grad_norm": 0.38618871569633484, |
|
"learning_rate": 2.10036608414528e-06, |
|
"loss": 1.0566, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.1504508780256288, |
|
"grad_norm": 0.32153603434562683, |
|
"learning_rate": 2.084837062317241e-06, |
|
"loss": 0.7648, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.1542477456098719, |
|
"grad_norm": 0.4316171109676361, |
|
"learning_rate": 2.0693244961677834e-06, |
|
"loss": 1.183, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.158044613194115, |
|
"grad_norm": 0.3866140842437744, |
|
"learning_rate": 2.0538290005634564e-06, |
|
"loss": 1.1986, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.161841480778358, |
|
"grad_norm": 0.41327404975891113, |
|
"learning_rate": 2.0383511896941894e-06, |
|
"loss": 1.1974, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.165638348362601, |
|
"grad_norm": 0.37834978103637695, |
|
"learning_rate": 2.0228916770489466e-06, |
|
"loss": 1.0443, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.169435215946844, |
|
"grad_norm": 0.41474854946136475, |
|
"learning_rate": 2.0074510753914113e-06, |
|
"loss": 1.1803, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.1732320835310868, |
|
"grad_norm": 0.3727324306964874, |
|
"learning_rate": 1.9920299967356995e-06, |
|
"loss": 1.055, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.1770289511153298, |
|
"grad_norm": 0.40379658341407776, |
|
"learning_rate": 1.9766290523220997e-06, |
|
"loss": 1.2038, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.1808258186995728, |
|
"grad_norm": 0.39679378271102905, |
|
"learning_rate": 1.9612488525928453e-06, |
|
"loss": 1.1826, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.1846226862838158, |
|
"grad_norm": 0.4498043954372406, |
|
"learning_rate": 1.94589000716792e-06, |
|
"loss": 1.1875, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.1884195538680589, |
|
"grad_norm": 0.41798946261405945, |
|
"learning_rate": 1.9305531248208948e-06, |
|
"loss": 1.1847, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.1922164214523019, |
|
"grad_norm": 0.42096462845802307, |
|
"learning_rate": 1.9152388134547944e-06, |
|
"loss": 1.059, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.196013289036545, |
|
"grad_norm": 0.3939502537250519, |
|
"learning_rate": 1.8999476800780087e-06, |
|
"loss": 1.0457, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.199810156620788, |
|
"grad_norm": 0.44725000858306885, |
|
"learning_rate": 1.8846803307802263e-06, |
|
"loss": 1.1939, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.2036070242050307, |
|
"grad_norm": 0.3450300097465515, |
|
"learning_rate": 1.8694373707084148e-06, |
|
"loss": 0.9045, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.2074038917892738, |
|
"grad_norm": 0.4713125228881836, |
|
"learning_rate": 1.854219404042834e-06, |
|
"loss": 1.1848, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.2112007593735168, |
|
"grad_norm": 0.33273351192474365, |
|
"learning_rate": 1.8390270339730892e-06, |
|
"loss": 0.7687, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.2149976269577598, |
|
"grad_norm": 0.35393235087394714, |
|
"learning_rate": 1.82386086267422e-06, |
|
"loss": 0.9687, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.2187944945420028, |
|
"grad_norm": 0.35949307680130005, |
|
"learning_rate": 1.8087214912828343e-06, |
|
"loss": 0.9033, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.2225913621262459, |
|
"grad_norm": 0.42038631439208984, |
|
"learning_rate": 1.793609519873282e-06, |
|
"loss": 1.1888, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.2263882297104889, |
|
"grad_norm": 0.4522581696510315, |
|
"learning_rate": 1.7785255474338675e-06, |
|
"loss": 1.188, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.230185097294732, |
|
"grad_norm": 0.3686380386352539, |
|
"learning_rate": 1.763470171843109e-06, |
|
"loss": 1.0683, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.233981964878975, |
|
"grad_norm": 0.4130842685699463, |
|
"learning_rate": 1.748443989846042e-06, |
|
"loss": 1.1772, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.237778832463218, |
|
"grad_norm": 0.3096576929092407, |
|
"learning_rate": 1.7334475970305639e-06, |
|
"loss": 0.767, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.2415757000474608, |
|
"grad_norm": 0.3671686351299286, |
|
"learning_rate": 1.718481587803828e-06, |
|
"loss": 0.9131, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.2453725676317038, |
|
"grad_norm": 0.4213862717151642, |
|
"learning_rate": 1.7035465553686839e-06, |
|
"loss": 1.1863, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.2491694352159468, |
|
"grad_norm": 0.3827097713947296, |
|
"learning_rate": 1.688643091700164e-06, |
|
"loss": 1.0456, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.2529663028001898, |
|
"grad_norm": 0.3981434106826782, |
|
"learning_rate": 1.6737717875220177e-06, |
|
"loss": 1.0414, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.2567631703844329, |
|
"grad_norm": 0.3808799982070923, |
|
"learning_rate": 1.6589332322833035e-06, |
|
"loss": 1.0457, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.2605600379686759, |
|
"grad_norm": 0.39812132716178894, |
|
"learning_rate": 1.6441280141350169e-06, |
|
"loss": 1.0624, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.264356905552919, |
|
"grad_norm": 0.3926783502101898, |
|
"learning_rate": 1.6293567199067827e-06, |
|
"loss": 1.1868, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.2681537731371617, |
|
"grad_norm": 0.42180249094963074, |
|
"learning_rate": 1.6146199350835956e-06, |
|
"loss": 1.1785, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.2719506407214047, |
|
"grad_norm": 0.3862990736961365, |
|
"learning_rate": 1.5999182437826111e-06, |
|
"loss": 1.0557, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.2757475083056478, |
|
"grad_norm": 0.33035123348236084, |
|
"learning_rate": 1.5852522287299946e-06, |
|
"loss": 0.9202, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.2795443758898908, |
|
"grad_norm": 0.3912082612514496, |
|
"learning_rate": 1.5706224712378237e-06, |
|
"loss": 1.0578, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.2833412434741338, |
|
"grad_norm": 0.368190199136734, |
|
"learning_rate": 1.5560295511810465e-06, |
|
"loss": 1.0456, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.2871381110583768, |
|
"grad_norm": 0.3697574734687805, |
|
"learning_rate": 1.5414740469744986e-06, |
|
"loss": 1.0412, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.2909349786426199, |
|
"grad_norm": 0.3395715057849884, |
|
"learning_rate": 1.5269565355499738e-06, |
|
"loss": 0.9436, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.2947318462268629, |
|
"grad_norm": 0.38877978920936584, |
|
"learning_rate": 1.5124775923333604e-06, |
|
"loss": 1.0565, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.298528713811106, |
|
"grad_norm": 0.3653126060962677, |
|
"learning_rate": 1.4980377912218291e-06, |
|
"loss": 1.0534, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.302325581395349, |
|
"grad_norm": 0.403150737285614, |
|
"learning_rate": 1.4836377045610903e-06, |
|
"loss": 1.063, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.306122448979592, |
|
"grad_norm": 0.3969637155532837, |
|
"learning_rate": 1.4692779031227032e-06, |
|
"loss": 1.1949, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.3099193165638348, |
|
"grad_norm": 0.3859919607639313, |
|
"learning_rate": 1.4549589560814558e-06, |
|
"loss": 1.0345, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.3137161841480778, |
|
"grad_norm": 0.3501088321208954, |
|
"learning_rate": 1.440681430992803e-06, |
|
"loss": 1.0387, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.3175130517323208, |
|
"grad_norm": 0.38995981216430664, |
|
"learning_rate": 1.4264458937703717e-06, |
|
"loss": 1.0632, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.3213099193165638, |
|
"grad_norm": 0.3388945460319519, |
|
"learning_rate": 1.4122529086635312e-06, |
|
"loss": 0.9119, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.3251067869008069, |
|
"grad_norm": 0.37815216183662415, |
|
"learning_rate": 1.398103038235022e-06, |
|
"loss": 1.1801, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.3289036544850499, |
|
"grad_norm": 0.3603959381580353, |
|
"learning_rate": 1.3839968433386659e-06, |
|
"loss": 1.0401, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.332700522069293, |
|
"grad_norm": 0.4169382154941559, |
|
"learning_rate": 1.3699348830971316e-06, |
|
"loss": 1.1889, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.3364973896535357, |
|
"grad_norm": 0.4009808599948883, |
|
"learning_rate": 1.3559177148797698e-06, |
|
"loss": 1.1802, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.3402942572377787, |
|
"grad_norm": 0.3702283203601837, |
|
"learning_rate": 1.3419458942805274e-06, |
|
"loss": 0.9253, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.3440911248220218, |
|
"grad_norm": 0.3447802662849426, |
|
"learning_rate": 1.3280199750959233e-06, |
|
"loss": 0.9254, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.3478879924062648, |
|
"grad_norm": 0.40221476554870605, |
|
"learning_rate": 1.314140509303093e-06, |
|
"loss": 1.0729, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.3516848599905078, |
|
"grad_norm": 0.40008315443992615, |
|
"learning_rate": 1.3003080470379176e-06, |
|
"loss": 1.1839, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.3554817275747508, |
|
"grad_norm": 0.4559074640274048, |
|
"learning_rate": 1.2865231365732136e-06, |
|
"loss": 1.184, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.3592785951589939, |
|
"grad_norm": 0.3520359694957733, |
|
"learning_rate": 1.2727863242970007e-06, |
|
"loss": 0.9056, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.3630754627432369, |
|
"grad_norm": 0.39236965775489807, |
|
"learning_rate": 1.2590981546908481e-06, |
|
"loss": 1.1798, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.36687233032748, |
|
"grad_norm": 0.39921608567237854, |
|
"learning_rate": 1.245459170308292e-06, |
|
"loss": 1.1876, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.370669197911723, |
|
"grad_norm": 0.32645294070243835, |
|
"learning_rate": 1.2318699117533264e-06, |
|
"loss": 0.9214, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.374466065495966, |
|
"grad_norm": 0.3884054720401764, |
|
"learning_rate": 1.2183309176589822e-06, |
|
"loss": 1.1828, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.3782629330802088, |
|
"grad_norm": 0.3472171425819397, |
|
"learning_rate": 1.2048427246659738e-06, |
|
"loss": 0.9014, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.3820598006644518, |
|
"grad_norm": 0.34238362312316895, |
|
"learning_rate": 1.1914058674014264e-06, |
|
"loss": 0.9166, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.3858566682486948, |
|
"grad_norm": 0.4093136489391327, |
|
"learning_rate": 1.17802087845769e-06, |
|
"loss": 1.1757, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.3896535358329378, |
|
"grad_norm": 0.4089086949825287, |
|
"learning_rate": 1.164688288371227e-06, |
|
"loss": 1.1898, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.3934504034171808, |
|
"grad_norm": 0.36652591824531555, |
|
"learning_rate": 1.1514086256015803e-06, |
|
"loss": 1.0574, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.3972472710014239, |
|
"grad_norm": 0.3458910584449768, |
|
"learning_rate": 1.138182416510434e-06, |
|
"loss": 1.0404, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.401044138585667, |
|
"grad_norm": 0.403898149728775, |
|
"learning_rate": 1.1250101853407427e-06, |
|
"loss": 1.185, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.4048410061699097, |
|
"grad_norm": 0.38953447341918945, |
|
"learning_rate": 1.1118924541959573e-06, |
|
"loss": 1.1767, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.4086378737541527, |
|
"grad_norm": 0.41569021344184875, |
|
"learning_rate": 1.09882974301933e-06, |
|
"loss": 1.1867, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.4124347413383957, |
|
"grad_norm": 0.3661198019981384, |
|
"learning_rate": 1.0858225695733006e-06, |
|
"loss": 1.0516, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.4162316089226388, |
|
"grad_norm": 0.40183043479919434, |
|
"learning_rate": 1.072871449418982e-06, |
|
"loss": 1.0376, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.4200284765068818, |
|
"grad_norm": 0.3806290030479431, |
|
"learning_rate": 1.0599768958957193e-06, |
|
"loss": 1.0473, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.4238253440911248, |
|
"grad_norm": 0.3974708616733551, |
|
"learning_rate": 1.0471394201007435e-06, |
|
"loss": 1.1799, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.4276222116753678, |
|
"grad_norm": 0.37168920040130615, |
|
"learning_rate": 1.0343595308689156e-06, |
|
"loss": 1.0478, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.4314190792596109, |
|
"grad_norm": 0.36702632904052734, |
|
"learning_rate": 1.021637734752557e-06, |
|
"loss": 1.0368, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.435215946843854, |
|
"grad_norm": 0.36013931035995483, |
|
"learning_rate": 1.0089745360013685e-06, |
|
"loss": 1.0488, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.439012814428097, |
|
"grad_norm": 0.39848053455352783, |
|
"learning_rate": 9.963704365424494e-07, |
|
"loss": 1.0501, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.44280968201234, |
|
"grad_norm": 0.3630136549472809, |
|
"learning_rate": 9.838259359603987e-07, |
|
"loss": 1.0455, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.4466065495965827, |
|
"grad_norm": 0.38877931237220764, |
|
"learning_rate": 9.713415314775122e-07, |
|
"loss": 1.1887, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.4504034171808258, |
|
"grad_norm": 0.3823590874671936, |
|
"learning_rate": 9.589177179340775e-07, |
|
"loss": 1.1795, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.4542002847650688, |
|
"grad_norm": 0.31407251954078674, |
|
"learning_rate": 9.465549877687602e-07, |
|
"loss": 0.8088, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.4579971523493118, |
|
"grad_norm": 0.3785543143749237, |
|
"learning_rate": 9.342538309990804e-07, |
|
"loss": 1.0545, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.4617940199335548, |
|
"grad_norm": 0.3719753921031952, |
|
"learning_rate": 9.220147352019965e-07, |
|
"loss": 1.1924, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.4655908875177979, |
|
"grad_norm": 0.3974050283432007, |
|
"learning_rate": 9.098381854945762e-07, |
|
"loss": 1.0512, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.469387755102041, |
|
"grad_norm": 0.380188912153244, |
|
"learning_rate": 8.977246645147655e-07, |
|
"loss": 1.1842, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.4731846226862837, |
|
"grad_norm": 0.33522146940231323, |
|
"learning_rate": 8.856746524022647e-07, |
|
"loss": 0.9293, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.4769814902705267, |
|
"grad_norm": 0.34948912262916565, |
|
"learning_rate": 8.736886267794911e-07, |
|
"loss": 1.0576, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.4807783578547697, |
|
"grad_norm": 0.3386431336402893, |
|
"learning_rate": 8.617670627326503e-07, |
|
"loss": 0.9029, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.4845752254390128, |
|
"grad_norm": 0.32754087448120117, |
|
"learning_rate": 8.49910432792907e-07, |
|
"loss": 0.9201, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.4883720930232558, |
|
"grad_norm": 0.38169023394584656, |
|
"learning_rate": 8.381192069176539e-07, |
|
"loss": 1.1824, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.4921689606074988, |
|
"grad_norm": 0.3721928596496582, |
|
"learning_rate": 8.263938524718812e-07, |
|
"loss": 1.0526, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.4959658281917418, |
|
"grad_norm": 0.3896506130695343, |
|
"learning_rate": 8.147348342096579e-07, |
|
"loss": 1.1811, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.4997626957759849, |
|
"grad_norm": 0.3741244375705719, |
|
"learning_rate": 8.031426142557061e-07, |
|
"loss": 1.0512, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.503559563360228, |
|
"grad_norm": 0.3273400366306305, |
|
"learning_rate": 7.916176520870836e-07, |
|
"loss": 0.905, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.507356430944471, |
|
"grad_norm": 0.3622443675994873, |
|
"learning_rate": 7.80160404514975e-07, |
|
"loss": 1.0561, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.511153298528714, |
|
"grad_norm": 0.3936561048030853, |
|
"learning_rate": 7.687713256665835e-07, |
|
"loss": 1.1887, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.514950166112957, |
|
"grad_norm": 0.37885287404060364, |
|
"learning_rate": 7.574508669671288e-07, |
|
"loss": 1.185, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.5187470336971998, |
|
"grad_norm": 0.36686670780181885, |
|
"learning_rate": 7.46199477121958e-07, |
|
"loss": 1.1886, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.5225439012814428, |
|
"grad_norm": 0.3786951005458832, |
|
"learning_rate": 7.350176020987585e-07, |
|
"loss": 1.1841, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.5263407688656858, |
|
"grad_norm": 0.3833267390727997, |
|
"learning_rate": 7.239056851098785e-07, |
|
"loss": 1.191, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.5301376364499288, |
|
"grad_norm": 0.2992788553237915, |
|
"learning_rate": 7.128641665947658e-07, |
|
"loss": 0.7744, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.5339345040341716, |
|
"grad_norm": 0.379607230424881, |
|
"learning_rate": 7.018934842025058e-07, |
|
"loss": 1.1837, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.5377313716184147, |
|
"grad_norm": 0.3761638104915619, |
|
"learning_rate": 6.90994072774473e-07, |
|
"loss": 1.0384, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.5415282392026577, |
|
"grad_norm": 0.405567467212677, |
|
"learning_rate": 6.801663643271012e-07, |
|
"loss": 1.1853, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.5453251067869007, |
|
"grad_norm": 0.3884119987487793, |
|
"learning_rate": 6.69410788034755e-07, |
|
"loss": 1.1932, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.5491219743711437, |
|
"grad_norm": 0.35525161027908325, |
|
"learning_rate": 6.587277702127196e-07, |
|
"loss": 1.0483, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.5529188419553868, |
|
"grad_norm": 0.3150971829891205, |
|
"learning_rate": 6.481177343003043e-07, |
|
"loss": 0.9127, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.5567157095396298, |
|
"grad_norm": 0.39407217502593994, |
|
"learning_rate": 6.375811008440591e-07, |
|
"loss": 1.179, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.5605125771238728, |
|
"grad_norm": 0.402775377035141, |
|
"learning_rate": 6.271182874811024e-07, |
|
"loss": 1.0475, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.5643094447081158, |
|
"grad_norm": 0.3489883840084076, |
|
"learning_rate": 6.167297089225713e-07, |
|
"loss": 0.9048, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.5681063122923589, |
|
"grad_norm": 0.35091614723205566, |
|
"learning_rate": 6.064157769371823e-07, |
|
"loss": 1.0413, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.5719031798766019, |
|
"grad_norm": 0.35318344831466675, |
|
"learning_rate": 5.961769003349077e-07, |
|
"loss": 1.0326, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.575700047460845, |
|
"grad_norm": 0.3418586850166321, |
|
"learning_rate": 5.860134849507765e-07, |
|
"loss": 0.9169, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.579496915045088, |
|
"grad_norm": 0.35626721382141113, |
|
"learning_rate": 5.759259336287851e-07, |
|
"loss": 1.0404, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.583293782629331, |
|
"grad_norm": 0.3962875306606293, |
|
"learning_rate": 5.659146462059292e-07, |
|
"loss": 1.1874, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.5870906502135738, |
|
"grad_norm": 0.3754862844944, |
|
"learning_rate": 5.559800194963591e-07, |
|
"loss": 1.1858, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.5908875177978168, |
|
"grad_norm": 0.37421101331710815, |
|
"learning_rate": 5.46122447275649e-07, |
|
"loss": 1.1813, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.5946843853820598, |
|
"grad_norm": 0.3670285642147064, |
|
"learning_rate": 5.363423202651876e-07, |
|
"loss": 1.0544, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.5984812529663028, |
|
"grad_norm": 0.42034921050071716, |
|
"learning_rate": 5.266400261166951e-07, |
|
"loss": 1.1884, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.6022781205505456, |
|
"grad_norm": 0.3546159565448761, |
|
"learning_rate": 5.170159493968549e-07, |
|
"loss": 1.1814, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.6060749881347887, |
|
"grad_norm": 0.3211246728897095, |
|
"learning_rate": 5.074704715720711e-07, |
|
"loss": 0.9178, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.6098718557190317, |
|
"grad_norm": 0.3514634370803833, |
|
"learning_rate": 4.980039709933492e-07, |
|
"loss": 0.913, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.6136687233032747, |
|
"grad_norm": 0.4017420709133148, |
|
"learning_rate": 4.886168228813007e-07, |
|
"loss": 1.1828, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.6174655908875177, |
|
"grad_norm": 0.3313949406147003, |
|
"learning_rate": 4.793093993112663e-07, |
|
"loss": 0.9082, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.6212624584717608, |
|
"grad_norm": 0.33639460802078247, |
|
"learning_rate": 4.700820691985739e-07, |
|
"loss": 0.9069, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.6250593260560038, |
|
"grad_norm": 0.3237963616847992, |
|
"learning_rate": 4.6093519828391025e-07, |
|
"loss": 0.894, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.6288561936402468, |
|
"grad_norm": 0.3815336525440216, |
|
"learning_rate": 4.51869149118829e-07, |
|
"loss": 1.1764, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 0.36702704429626465, |
|
"learning_rate": 4.428842810513784e-07, |
|
"loss": 1.1678, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.6364499288087329, |
|
"grad_norm": 0.3530699610710144, |
|
"learning_rate": 4.3398095021185557e-07, |
|
"loss": 1.1782, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.6402467963929759, |
|
"grad_norm": 0.33500173687934875, |
|
"learning_rate": 4.251595094986957e-07, |
|
"loss": 1.0625, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.644043663977219, |
|
"grad_norm": 0.37163975834846497, |
|
"learning_rate": 4.1642030856448104e-07, |
|
"loss": 1.1739, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.647840531561462, |
|
"grad_norm": 0.37387996912002563, |
|
"learning_rate": 4.077636938020807e-07, |
|
"loss": 1.0592, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.651637399145705, |
|
"grad_norm": 0.3629416823387146, |
|
"learning_rate": 3.991900083309241e-07, |
|
"loss": 1.1718, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.6554342667299478, |
|
"grad_norm": 0.3603303134441376, |
|
"learning_rate": 3.906995919833997e-07, |
|
"loss": 1.0432, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.6592311343141908, |
|
"grad_norm": 0.31616950035095215, |
|
"learning_rate": 3.8229278129138293e-07, |
|
"loss": 0.9182, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.6630280018984338, |
|
"grad_norm": 0.3574075698852539, |
|
"learning_rate": 3.739699094729002e-07, |
|
"loss": 1.0423, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.6668248694826768, |
|
"grad_norm": 0.33015620708465576, |
|
"learning_rate": 3.6573130641892053e-07, |
|
"loss": 0.8927, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.6706217370669196, |
|
"grad_norm": 0.32390230894088745, |
|
"learning_rate": 3.575772986802775e-07, |
|
"loss": 0.9272, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.6744186046511627, |
|
"grad_norm": 0.37338370084762573, |
|
"learning_rate": 3.4950820945472945e-07, |
|
"loss": 1.1717, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.6782154722354057, |
|
"grad_norm": 0.35633939504623413, |
|
"learning_rate": 3.4152435857414676e-07, |
|
"loss": 1.1792, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.6820123398196487, |
|
"grad_norm": 0.35473427176475525, |
|
"learning_rate": 3.3362606249183446e-07, |
|
"loss": 1.0391, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.6858092074038917, |
|
"grad_norm": 0.36706259846687317, |
|
"learning_rate": 3.2581363426998966e-07, |
|
"loss": 1.0463, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.6896060749881348, |
|
"grad_norm": 0.3562609553337097, |
|
"learning_rate": 3.18087383567294e-07, |
|
"loss": 1.0515, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.6934029425723778, |
|
"grad_norm": 0.35783901810646057, |
|
"learning_rate": 3.1044761662663933e-07, |
|
"loss": 1.0449, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.6971998101566208, |
|
"grad_norm": 0.3588060736656189, |
|
"learning_rate": 3.0289463626298585e-07, |
|
"loss": 1.1823, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.7009966777408638, |
|
"grad_norm": 0.35698550939559937, |
|
"learning_rate": 2.9542874185136545e-07, |
|
"loss": 1.1792, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.7047935453251069, |
|
"grad_norm": 0.37660661339759827, |
|
"learning_rate": 2.880502293150117e-07, |
|
"loss": 1.1868, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.7085904129093499, |
|
"grad_norm": 0.38808876276016235, |
|
"learning_rate": 2.8075939111362915e-07, |
|
"loss": 1.1823, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.712387280493593, |
|
"grad_norm": 0.35739246010780334, |
|
"learning_rate": 2.7355651623180574e-07, |
|
"loss": 1.1793, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.716184148077836, |
|
"grad_norm": 0.3609924018383026, |
|
"learning_rate": 2.6644189016755415e-07, |
|
"loss": 1.0662, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.719981015662079, |
|
"grad_norm": 0.3684343099594116, |
|
"learning_rate": 2.5941579492099853e-07, |
|
"loss": 1.1712, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.7237778832463218, |
|
"grad_norm": 0.3490484952926636, |
|
"learning_rate": 2.524785089831955e-07, |
|
"loss": 1.0555, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.7275747508305648, |
|
"grad_norm": 0.32522618770599365, |
|
"learning_rate": 2.456303073250943e-07, |
|
"loss": 0.9226, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.7313716184148078, |
|
"grad_norm": 0.38961344957351685, |
|
"learning_rate": 2.388714613866422e-07, |
|
"loss": 1.1842, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.7351684859990508, |
|
"grad_norm": 0.35974937677383423, |
|
"learning_rate": 2.3220223906602113e-07, |
|
"loss": 1.0626, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.7389653535832936, |
|
"grad_norm": 0.3246191143989563, |
|
"learning_rate": 2.2562290470903082e-07, |
|
"loss": 0.9113, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.7427622211675367, |
|
"grad_norm": 0.3299662172794342, |
|
"learning_rate": 2.191337190986112e-07, |
|
"loss": 0.9153, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.7465590887517797, |
|
"grad_norm": 0.38010498881340027, |
|
"learning_rate": 2.1273493944450634e-07, |
|
"loss": 1.1955, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.7503559563360227, |
|
"grad_norm": 0.38703298568725586, |
|
"learning_rate": 2.06426819373067e-07, |
|
"loss": 1.1757, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.7541528239202657, |
|
"grad_norm": 0.294664204120636, |
|
"learning_rate": 2.0020960891720147e-07, |
|
"loss": 0.7584, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.7579496915045087, |
|
"grad_norm": 0.3676691949367523, |
|
"learning_rate": 1.9408355450646234e-07, |
|
"loss": 1.1826, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.7617465590887518, |
|
"grad_norm": 0.36037302017211914, |
|
"learning_rate": 1.8804889895727872e-07, |
|
"loss": 1.046, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.7655434266729948, |
|
"grad_norm": 0.3599529266357422, |
|
"learning_rate": 1.821058814633339e-07, |
|
"loss": 1.0328, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.7693402942572378, |
|
"grad_norm": 0.32030507922172546, |
|
"learning_rate": 1.762547375860832e-07, |
|
"loss": 0.9193, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.7731371618414808, |
|
"grad_norm": 0.4202202558517456, |
|
"learning_rate": 1.7049569924541653e-07, |
|
"loss": 1.1895, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.7769340294257239, |
|
"grad_norm": 0.35407963395118713, |
|
"learning_rate": 1.6482899471046726e-07, |
|
"loss": 1.0546, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.780730897009967, |
|
"grad_norm": 0.3341258466243744, |
|
"learning_rate": 1.5925484859056372e-07, |
|
"loss": 1.0506, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.78452776459421, |
|
"grad_norm": 0.3770039677619934, |
|
"learning_rate": 1.5377348182632536e-07, |
|
"loss": 1.1839, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.7883246321784527, |
|
"grad_norm": 0.3668918311595917, |
|
"learning_rate": 1.4838511168090707e-07, |
|
"loss": 1.1741, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.7921214997626957, |
|
"grad_norm": 0.3690701425075531, |
|
"learning_rate": 1.4308995173138828e-07, |
|
"loss": 1.1862, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.7959183673469388, |
|
"grad_norm": 0.33050721883773804, |
|
"learning_rate": 1.3788821186030338e-07, |
|
"loss": 1.0628, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.7997152349311818, |
|
"grad_norm": 0.35489219427108765, |
|
"learning_rate": 1.3278009824732763e-07, |
|
"loss": 1.0405, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.8035121025154248, |
|
"grad_norm": 0.3431392014026642, |
|
"learning_rate": 1.2776581336110234e-07, |
|
"loss": 1.0425, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.8073089700996676, |
|
"grad_norm": 0.35951146483421326, |
|
"learning_rate": 1.2284555595120901e-07, |
|
"loss": 1.192, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.8111058376839106, |
|
"grad_norm": 0.34095799922943115, |
|
"learning_rate": 1.1801952104029347e-07, |
|
"loss": 0.9311, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.8149027052681537, |
|
"grad_norm": 0.36434096097946167, |
|
"learning_rate": 1.1328789991633532e-07, |
|
"loss": 1.1809, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.8186995728523967, |
|
"grad_norm": 0.35194143652915955, |
|
"learning_rate": 1.0865088012506408e-07, |
|
"loss": 1.0538, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.8224964404366397, |
|
"grad_norm": 0.3786377012729645, |
|
"learning_rate": 1.0410864546252841e-07, |
|
"loss": 1.1839, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.8262933080208827, |
|
"grad_norm": 0.3330764174461365, |
|
"learning_rate": 9.966137596780945e-08, |
|
"loss": 1.0354, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.8300901756051258, |
|
"grad_norm": 0.33103281259536743, |
|
"learning_rate": 9.530924791588319e-08, |
|
"loss": 0.956, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.8338870431893688, |
|
"grad_norm": 0.34974756836891174, |
|
"learning_rate": 9.10524338106375e-08, |
|
"loss": 1.1761, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.8376839107736118, |
|
"grad_norm": 0.3340107202529907, |
|
"learning_rate": 8.689110237803056e-08, |
|
"loss": 1.0462, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.8414807783578548, |
|
"grad_norm": 0.337950199842453, |
|
"learning_rate": 8.282541855940546e-08, |
|
"loss": 1.0587, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.8452776459420979, |
|
"grad_norm": 0.34110939502716064, |
|
"learning_rate": 7.885554350495206e-08, |
|
"loss": 1.1823, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.849074513526341, |
|
"grad_norm": 0.3682970702648163, |
|
"learning_rate": 7.498163456731878e-08, |
|
"loss": 1.1863, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.852871381110584, |
|
"grad_norm": 0.3614114820957184, |
|
"learning_rate": 7.120384529537672e-08, |
|
"loss": 1.1923, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.8566682486948267, |
|
"grad_norm": 1.5183669328689575, |
|
"learning_rate": 6.752232542813319e-08, |
|
"loss": 0.5159, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.8604651162790697, |
|
"grad_norm": 1.4683269262313843, |
|
"learning_rate": 6.393722088879534e-08, |
|
"loss": 0.504, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.8642619838633128, |
|
"grad_norm": 1.4609661102294922, |
|
"learning_rate": 6.044867377898806e-08, |
|
"loss": 0.5112, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.8680588514475558, |
|
"grad_norm": 1.6301960945129395, |
|
"learning_rate": 5.7056822373121324e-08, |
|
"loss": 0.5463, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.8718557190317988, |
|
"grad_norm": 1.419601321220398, |
|
"learning_rate": 5.3761801112907356e-08, |
|
"loss": 0.4816, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.8756525866160416, |
|
"grad_norm": 1.1993861198425293, |
|
"learning_rate": 5.0563740602034284e-08, |
|
"loss": 0.418, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.8794494542002846, |
|
"grad_norm": 1.2279670238494873, |
|
"learning_rate": 4.746276760098867e-08, |
|
"loss": 0.4118, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.8832463217845277, |
|
"grad_norm": 0.9904822111129761, |
|
"learning_rate": 4.44590050220306e-08, |
|
"loss": 0.3383, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.8870431893687707, |
|
"grad_norm": 1.4921010732650757, |
|
"learning_rate": 4.155257192432205e-08, |
|
"loss": 0.5024, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.8908400569530137, |
|
"grad_norm": 1.2175933122634888, |
|
"learning_rate": 3.874358350920843e-08, |
|
"loss": 0.4521, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.8946369245372567, |
|
"grad_norm": 0.9808973670005798, |
|
"learning_rate": 3.603215111565139e-08, |
|
"loss": 0.3847, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.8984337921214998, |
|
"grad_norm": 1.0177325010299683, |
|
"learning_rate": 3.341838221581656e-08, |
|
"loss": 0.433, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.9022306597057428, |
|
"grad_norm": 1.1320708990097046, |
|
"learning_rate": 3.090238041081328e-08, |
|
"loss": 0.4865, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.9060275272899858, |
|
"grad_norm": 0.8693588972091675, |
|
"learning_rate": 2.848424542658823e-08, |
|
"loss": 0.3843, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.9098243948742288, |
|
"grad_norm": 1.0695801973342896, |
|
"learning_rate": 2.6164073109972986e-08, |
|
"loss": 0.4924, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.9136212624584719, |
|
"grad_norm": 1.0257716178894043, |
|
"learning_rate": 2.3941955424884312e-08, |
|
"loss": 0.4785, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.9174181300427149, |
|
"grad_norm": 0.8927969932556152, |
|
"learning_rate": 2.1817980448679553e-08, |
|
"loss": 0.4477, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.921214997626958, |
|
"grad_norm": 0.960476815700531, |
|
"learning_rate": 1.979223236866501e-08, |
|
"loss": 0.484, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.9250118652112007, |
|
"grad_norm": 0.8209058046340942, |
|
"learning_rate": 1.7864791478760245e-08, |
|
"loss": 0.4217, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.9288087327954437, |
|
"grad_norm": 0.815679669380188, |
|
"learning_rate": 1.603573417631371e-08, |
|
"loss": 0.4293, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.9326056003796868, |
|
"grad_norm": 0.9071650505065918, |
|
"learning_rate": 1.4305132959075706e-08, |
|
"loss": 0.4821, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.9364024679639298, |
|
"grad_norm": 0.9293763637542725, |
|
"learning_rate": 1.2673056422325413e-08, |
|
"loss": 0.4737, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.9401993355481728, |
|
"grad_norm": 0.8001239895820618, |
|
"learning_rate": 1.1139569256150285e-08, |
|
"loss": 0.4284, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.9439962031324156, |
|
"grad_norm": 0.7981712222099304, |
|
"learning_rate": 9.704732242883374e-09, |
|
"loss": 0.4344, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.9477930707166586, |
|
"grad_norm": 0.5713729858398438, |
|
"learning_rate": 8.368602254693603e-09, |
|
"loss": 0.3397, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.9515899383009017, |
|
"grad_norm": 0.7548425197601318, |
|
"learning_rate": 7.131232251331721e-09, |
|
"loss": 0.4207, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.9553868058851447, |
|
"grad_norm": 0.6806543469429016, |
|
"learning_rate": 5.992671278030327e-09, |
|
"loss": 0.4099, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.9591836734693877, |
|
"grad_norm": 0.836120069026947, |
|
"learning_rate": 4.952964463561805e-09, |
|
"loss": 0.4671, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.9629805410536307, |
|
"grad_norm": 0.662464439868927, |
|
"learning_rate": 4.012153018446984e-09, |
|
"loss": 0.3776, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.9667774086378738, |
|
"grad_norm": 0.7680605053901672, |
|
"learning_rate": 3.170274233324222e-09, |
|
"loss": 0.4336, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.9705742762221168, |
|
"grad_norm": 0.7456918954849243, |
|
"learning_rate": 2.4273614774691923e-09, |
|
"loss": 0.4209, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.9743711438063598, |
|
"grad_norm": 0.7245385050773621, |
|
"learning_rate": 1.7834441974740047e-09, |
|
"loss": 0.4, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.9781680113906028, |
|
"grad_norm": 0.6544315814971924, |
|
"learning_rate": 1.2385479160784141e-09, |
|
"loss": 0.3741, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.9819648789748459, |
|
"grad_norm": 0.721889317035675, |
|
"learning_rate": 7.926942311597962e-10, |
|
"loss": 0.4156, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.9857617465590889, |
|
"grad_norm": 0.8772999048233032, |
|
"learning_rate": 4.4590081487577706e-10, |
|
"loss": 0.4664, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.989558614143332, |
|
"grad_norm": 0.8888481855392456, |
|
"learning_rate": 1.9818141296451544e-10, |
|
"loss": 0.4626, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.9933554817275747, |
|
"grad_norm": 0.7575893998146057, |
|
"learning_rate": 4.954584419930575e-11, |
|
"loss": 0.4245, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.9971523493118177, |
|
"grad_norm": 0.878999650478363, |
|
"learning_rate": 0.0, |
|
"loss": 0.4739, |
|
"step": 526 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 526, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6538932878717747e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|