{ "best_metric": 0.1758999079465866, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.22948938611589215, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002294893861158921, "grad_norm": 0.23261818289756775, "learning_rate": 1.6666666666666668e-07, "loss": 0.0621, "step": 1 }, { "epoch": 0.002294893861158921, "eval_loss": 0.606043815612793, "eval_runtime": 65.6929, "eval_samples_per_second": 11.173, "eval_steps_per_second": 1.4, "step": 1 }, { "epoch": 0.004589787722317842, "grad_norm": 0.21800613403320312, "learning_rate": 3.3333333333333335e-07, "loss": 0.0685, "step": 2 }, { "epoch": 0.0068846815834767644, "grad_norm": 0.22197777032852173, "learning_rate": 5.000000000000001e-07, "loss": 0.0598, "step": 3 }, { "epoch": 0.009179575444635685, "grad_norm": 0.23577888309955597, "learning_rate": 6.666666666666667e-07, "loss": 0.0716, "step": 4 }, { "epoch": 0.011474469305794608, "grad_norm": 0.34556400775909424, "learning_rate": 8.333333333333333e-07, "loss": 0.09, "step": 5 }, { "epoch": 0.013769363166953529, "grad_norm": 0.4223484992980957, "learning_rate": 1.0000000000000002e-06, "loss": 0.1235, "step": 6 }, { "epoch": 0.01606425702811245, "grad_norm": 0.43714308738708496, "learning_rate": 1.1666666666666668e-06, "loss": 0.1379, "step": 7 }, { "epoch": 0.01835915088927137, "grad_norm": 0.572297990322113, "learning_rate": 1.3333333333333334e-06, "loss": 0.1625, "step": 8 }, { "epoch": 0.020654044750430294, "grad_norm": 0.5441948175430298, "learning_rate": 1.5e-06, "loss": 0.1878, "step": 9 }, { "epoch": 0.022948938611589215, "grad_norm": 0.6114739775657654, "learning_rate": 1.6666666666666667e-06, "loss": 0.2098, "step": 10 }, { "epoch": 0.025243832472748137, "grad_norm": 0.7089585065841675, "learning_rate": 1.8333333333333333e-06, "loss": 0.2292, "step": 11 }, { "epoch": 0.027538726333907058, "grad_norm": 0.7917402386665344, "learning_rate": 2.0000000000000003e-06, "loss": 0.2446, "step": 12 }, { "epoch": 0.02983362019506598, "grad_norm": 0.8855305314064026, "learning_rate": 2.166666666666667e-06, "loss": 0.2453, "step": 13 }, { "epoch": 0.0321285140562249, "grad_norm": 0.9501556754112244, "learning_rate": 2.3333333333333336e-06, "loss": 0.2964, "step": 14 }, { "epoch": 0.03442340791738382, "grad_norm": 0.9453725218772888, "learning_rate": 2.5e-06, "loss": 0.2895, "step": 15 }, { "epoch": 0.03671830177854274, "grad_norm": 0.9605135321617126, "learning_rate": 2.666666666666667e-06, "loss": 0.2799, "step": 16 }, { "epoch": 0.03901319563970167, "grad_norm": 0.985146701335907, "learning_rate": 2.8333333333333335e-06, "loss": 0.3195, "step": 17 }, { "epoch": 0.04130808950086059, "grad_norm": 1.061450481414795, "learning_rate": 3e-06, "loss": 0.3075, "step": 18 }, { "epoch": 0.04360298336201951, "grad_norm": 1.1614933013916016, "learning_rate": 3.1666666666666667e-06, "loss": 0.3133, "step": 19 }, { "epoch": 0.04589787722317843, "grad_norm": 1.1870945692062378, "learning_rate": 3.3333333333333333e-06, "loss": 0.3361, "step": 20 }, { "epoch": 0.04819277108433735, "grad_norm": 1.2294026613235474, "learning_rate": 3.5e-06, "loss": 0.3238, "step": 21 }, { "epoch": 0.05048766494549627, "grad_norm": 1.2782326936721802, "learning_rate": 3.6666666666666666e-06, "loss": 0.3139, "step": 22 }, { "epoch": 0.052782558806655194, "grad_norm": 1.3740707635879517, "learning_rate": 3.833333333333334e-06, "loss": 0.3445, "step": 23 }, { "epoch": 0.055077452667814115, "grad_norm": 1.4739590883255005, "learning_rate": 4.000000000000001e-06, "loss": 0.3315, "step": 24 }, { "epoch": 0.05737234652897304, "grad_norm": 1.3918646574020386, "learning_rate": 4.166666666666667e-06, "loss": 0.3283, "step": 25 }, { "epoch": 0.05966724039013196, "grad_norm": 1.293686032295227, "learning_rate": 4.333333333333334e-06, "loss": 0.2856, "step": 26 }, { "epoch": 0.06196213425129088, "grad_norm": 1.318763256072998, "learning_rate": 4.5e-06, "loss": 0.3287, "step": 27 }, { "epoch": 0.0642570281124498, "grad_norm": 1.280957579612732, "learning_rate": 4.666666666666667e-06, "loss": 0.3216, "step": 28 }, { "epoch": 0.06655192197360872, "grad_norm": 1.177618145942688, "learning_rate": 4.833333333333333e-06, "loss": 0.3001, "step": 29 }, { "epoch": 0.06884681583476764, "grad_norm": 1.2139456272125244, "learning_rate": 5e-06, "loss": 0.2899, "step": 30 }, { "epoch": 0.07114170969592656, "grad_norm": 1.1392238140106201, "learning_rate": 4.997482666353287e-06, "loss": 0.2737, "step": 31 }, { "epoch": 0.07343660355708548, "grad_norm": 1.120076060295105, "learning_rate": 4.989935734988098e-06, "loss": 0.2808, "step": 32 }, { "epoch": 0.0757314974182444, "grad_norm": 1.0275880098342896, "learning_rate": 4.977374404419838e-06, "loss": 0.2783, "step": 33 }, { "epoch": 0.07802639127940333, "grad_norm": 0.9994708895683289, "learning_rate": 4.959823971496575e-06, "loss": 0.2985, "step": 34 }, { "epoch": 0.08032128514056225, "grad_norm": 0.9263613224029541, "learning_rate": 4.937319780454559e-06, "loss": 0.2635, "step": 35 }, { "epoch": 0.08261617900172118, "grad_norm": 1.0965187549591064, "learning_rate": 4.909907151739634e-06, "loss": 0.2993, "step": 36 }, { "epoch": 0.08491107286288009, "grad_norm": 1.18157160282135, "learning_rate": 4.8776412907378845e-06, "loss": 0.3149, "step": 37 }, { "epoch": 0.08720596672403902, "grad_norm": 1.0924243927001953, "learning_rate": 4.8405871765993435e-06, "loss": 0.2597, "step": 38 }, { "epoch": 0.08950086058519793, "grad_norm": 1.1865804195404053, "learning_rate": 4.7988194313786275e-06, "loss": 0.3007, "step": 39 }, { "epoch": 0.09179575444635686, "grad_norm": 1.1488187313079834, "learning_rate": 4.752422169756048e-06, "loss": 0.3293, "step": 40 }, { "epoch": 0.09409064830751578, "grad_norm": 1.3960342407226562, "learning_rate": 4.701488829641845e-06, "loss": 0.3131, "step": 41 }, { "epoch": 0.0963855421686747, "grad_norm": 1.1148208379745483, "learning_rate": 4.646121984004666e-06, "loss": 0.2774, "step": 42 }, { "epoch": 0.09868043602983362, "grad_norm": 1.3916605710983276, "learning_rate": 4.586433134303257e-06, "loss": 0.3231, "step": 43 }, { "epoch": 0.10097532989099255, "grad_norm": 1.2748233079910278, "learning_rate": 4.522542485937369e-06, "loss": 0.3228, "step": 44 }, { "epoch": 0.10327022375215146, "grad_norm": 1.2887651920318604, "learning_rate": 4.454578706170075e-06, "loss": 0.3316, "step": 45 }, { "epoch": 0.10556511761331039, "grad_norm": 2.0249674320220947, "learning_rate": 4.382678665009028e-06, "loss": 0.4286, "step": 46 }, { "epoch": 0.1078600114744693, "grad_norm": 1.827263593673706, "learning_rate": 4.3069871595684795e-06, "loss": 0.4627, "step": 47 }, { "epoch": 0.11015490533562823, "grad_norm": 2.331561803817749, "learning_rate": 4.227656622467162e-06, "loss": 0.6452, "step": 48 }, { "epoch": 0.11244979919678715, "grad_norm": 6.2320380210876465, "learning_rate": 4.144846814849282e-06, "loss": 1.3479, "step": 49 }, { "epoch": 0.11474469305794607, "grad_norm": 14.223282814025879, "learning_rate": 4.058724504646834e-06, "loss": 2.2118, "step": 50 }, { "epoch": 0.11474469305794607, "eval_loss": 0.23119279742240906, "eval_runtime": 66.0264, "eval_samples_per_second": 11.117, "eval_steps_per_second": 1.393, "step": 50 }, { "epoch": 0.11703958691910499, "grad_norm": 0.1787334680557251, "learning_rate": 3.969463130731183e-06, "loss": 0.0541, "step": 51 }, { "epoch": 0.11933448078026392, "grad_norm": 0.22551606595516205, "learning_rate": 3.8772424536302565e-06, "loss": 0.0581, "step": 52 }, { "epoch": 0.12162937464142283, "grad_norm": 0.27361077070236206, "learning_rate": 3.782248193514766e-06, "loss": 0.06, "step": 53 }, { "epoch": 0.12392426850258176, "grad_norm": 0.1588234305381775, "learning_rate": 3.684671656182497e-06, "loss": 0.048, "step": 54 }, { "epoch": 0.12621916236374067, "grad_norm": 0.2589740455150604, "learning_rate": 3.5847093477938955e-06, "loss": 0.0509, "step": 55 }, { "epoch": 0.1285140562248996, "grad_norm": 0.3200053572654724, "learning_rate": 3.4825625791348093e-06, "loss": 0.0629, "step": 56 }, { "epoch": 0.13080895008605853, "grad_norm": 0.34225979447364807, "learning_rate": 3.3784370602033572e-06, "loss": 0.0716, "step": 57 }, { "epoch": 0.13310384394721744, "grad_norm": 0.4383489787578583, "learning_rate": 3.272542485937369e-06, "loss": 0.0725, "step": 58 }, { "epoch": 0.13539873780837636, "grad_norm": 0.4575161635875702, "learning_rate": 3.165092113916688e-06, "loss": 0.0892, "step": 59 }, { "epoch": 0.13769363166953527, "grad_norm": 0.47194910049438477, "learning_rate": 3.056302334890786e-06, "loss": 0.0878, "step": 60 }, { "epoch": 0.1399885255306942, "grad_norm": 0.5810543894767761, "learning_rate": 2.946392236996592e-06, "loss": 0.0958, "step": 61 }, { "epoch": 0.14228341939185313, "grad_norm": 0.6267717480659485, "learning_rate": 2.835583164544139e-06, "loss": 0.0849, "step": 62 }, { "epoch": 0.14457831325301204, "grad_norm": 0.6513370871543884, "learning_rate": 2.724098272258584e-06, "loss": 0.104, "step": 63 }, { "epoch": 0.14687320711417096, "grad_norm": 0.5434672832489014, "learning_rate": 2.6121620758762877e-06, "loss": 0.095, "step": 64 }, { "epoch": 0.1491681009753299, "grad_norm": 0.5460812449455261, "learning_rate": 2.5e-06, "loss": 0.1043, "step": 65 }, { "epoch": 0.1514629948364888, "grad_norm": 0.5850204825401306, "learning_rate": 2.3878379241237136e-06, "loss": 0.0992, "step": 66 }, { "epoch": 0.15375788869764773, "grad_norm": 0.45354822278022766, "learning_rate": 2.2759017277414165e-06, "loss": 0.1043, "step": 67 }, { "epoch": 0.15605278255880667, "grad_norm": 0.4008900225162506, "learning_rate": 2.1644168354558623e-06, "loss": 0.1036, "step": 68 }, { "epoch": 0.15834767641996558, "grad_norm": 0.4651618003845215, "learning_rate": 2.053607763003409e-06, "loss": 0.123, "step": 69 }, { "epoch": 0.1606425702811245, "grad_norm": 0.41306036710739136, "learning_rate": 1.9436976651092143e-06, "loss": 0.1201, "step": 70 }, { "epoch": 0.1629374641422834, "grad_norm": 0.3988576829433441, "learning_rate": 1.8349078860833125e-06, "loss": 0.1002, "step": 71 }, { "epoch": 0.16523235800344235, "grad_norm": 0.4156871736049652, "learning_rate": 1.7274575140626318e-06, "loss": 0.1125, "step": 72 }, { "epoch": 0.16752725186460127, "grad_norm": 0.34895026683807373, "learning_rate": 1.6215629397966432e-06, "loss": 0.1288, "step": 73 }, { "epoch": 0.16982214572576018, "grad_norm": 0.45981651544570923, "learning_rate": 1.5174374208651913e-06, "loss": 0.1126, "step": 74 }, { "epoch": 0.1721170395869191, "grad_norm": 0.5109217762947083, "learning_rate": 1.415290652206105e-06, "loss": 0.1149, "step": 75 }, { "epoch": 0.17441193344807804, "grad_norm": 0.4497959017753601, "learning_rate": 1.3153283438175036e-06, "loss": 0.1403, "step": 76 }, { "epoch": 0.17670682730923695, "grad_norm": 0.43716540932655334, "learning_rate": 1.217751806485235e-06, "loss": 0.1358, "step": 77 }, { "epoch": 0.17900172117039587, "grad_norm": 0.4312424957752228, "learning_rate": 1.122757546369744e-06, "loss": 0.1357, "step": 78 }, { "epoch": 0.18129661503155478, "grad_norm": 0.4340069890022278, "learning_rate": 1.0305368692688175e-06, "loss": 0.1325, "step": 79 }, { "epoch": 0.18359150889271372, "grad_norm": 0.4063401222229004, "learning_rate": 9.412754953531664e-07, "loss": 0.1326, "step": 80 }, { "epoch": 0.18588640275387264, "grad_norm": 0.5883768796920776, "learning_rate": 8.551531851507186e-07, "loss": 0.1537, "step": 81 }, { "epoch": 0.18818129661503155, "grad_norm": 0.480673611164093, "learning_rate": 7.723433775328385e-07, "loss": 0.1651, "step": 82 }, { "epoch": 0.19047619047619047, "grad_norm": 0.47313088178634644, "learning_rate": 6.930128404315214e-07, "loss": 0.1389, "step": 83 }, { "epoch": 0.1927710843373494, "grad_norm": 0.5640736222267151, "learning_rate": 6.17321334990973e-07, "loss": 0.1669, "step": 84 }, { "epoch": 0.19506597819850832, "grad_norm": 0.44458428025245667, "learning_rate": 5.454212938299256e-07, "loss": 0.1511, "step": 85 }, { "epoch": 0.19736087205966724, "grad_norm": 0.6890984773635864, "learning_rate": 4.774575140626317e-07, "loss": 0.1724, "step": 86 }, { "epoch": 0.19965576592082615, "grad_norm": 0.6082049012184143, "learning_rate": 4.1356686569674344e-07, "loss": 0.1641, "step": 87 }, { "epoch": 0.2019506597819851, "grad_norm": 0.8383265137672424, "learning_rate": 3.538780159953348e-07, "loss": 0.191, "step": 88 }, { "epoch": 0.204245553643144, "grad_norm": 0.7220001220703125, "learning_rate": 2.98511170358155e-07, "loss": 0.215, "step": 89 }, { "epoch": 0.20654044750430292, "grad_norm": 0.7412702441215515, "learning_rate": 2.4757783024395244e-07, "loss": 0.2034, "step": 90 }, { "epoch": 0.20883534136546184, "grad_norm": 0.7057530283927917, "learning_rate": 2.0118056862137358e-07, "loss": 0.1968, "step": 91 }, { "epoch": 0.21113023522662078, "grad_norm": 0.64353346824646, "learning_rate": 1.59412823400657e-07, "loss": 0.2087, "step": 92 }, { "epoch": 0.2134251290877797, "grad_norm": 0.9073025584220886, "learning_rate": 1.223587092621162e-07, "loss": 0.2682, "step": 93 }, { "epoch": 0.2157200229489386, "grad_norm": 0.8293075561523438, "learning_rate": 9.00928482603669e-08, "loss": 0.2719, "step": 94 }, { "epoch": 0.21801491681009752, "grad_norm": 0.9869675636291504, "learning_rate": 6.268021954544095e-08, "loss": 0.2622, "step": 95 }, { "epoch": 0.22030981067125646, "grad_norm": 1.2257877588272095, "learning_rate": 4.017602850342584e-08, "loss": 0.3152, "step": 96 }, { "epoch": 0.22260470453241538, "grad_norm": 1.4103209972381592, "learning_rate": 2.262559558016325e-08, "loss": 0.3584, "step": 97 }, { "epoch": 0.2248995983935743, "grad_norm": 2.496727228164673, "learning_rate": 1.006426501190233e-08, "loss": 0.5121, "step": 98 }, { "epoch": 0.22719449225473323, "grad_norm": 5.066628456115723, "learning_rate": 2.5173336467135266e-09, "loss": 0.7629, "step": 99 }, { "epoch": 0.22948938611589215, "grad_norm": 10.090572357177734, "learning_rate": 0.0, "loss": 1.3127, "step": 100 }, { "epoch": 0.22948938611589215, "eval_loss": 0.1758999079465866, "eval_runtime": 66.0597, "eval_samples_per_second": 11.111, "eval_steps_per_second": 1.393, "step": 100 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.70087811548971e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }