|
{ |
|
"best_metric": 0.6475752433152033, |
|
"best_model_checkpoint": "./runtime-masked/MiniLMv2-L6-H384-distilled-from-RoBERTa-Large-finetuned-wikitext103-mlm-multi-emails-hq-x2bs/checkpoint-4004", |
|
"epoch": 16.0, |
|
"global_step": 4928, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4291497975708505e-06, |
|
"loss": 7.2679, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.858299595141701e-06, |
|
"loss": 7.1451, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.287449392712551e-06, |
|
"loss": 7.1002, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.716599190283402e-06, |
|
"loss": 6.9508, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.2145748987854251e-05, |
|
"loss": 6.7874, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.4574898785425101e-05, |
|
"loss": 6.6279, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.7004048582995952e-05, |
|
"loss": 6.5254, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9433198380566804e-05, |
|
"loss": 6.3327, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.1862348178137653e-05, |
|
"loss": 6.2761, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.4291497975708502e-05, |
|
"loss": 6.0968, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.6720647773279357e-05, |
|
"loss": 5.9384, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9149797570850203e-05, |
|
"loss": 5.8496, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.157894736842105e-05, |
|
"loss": 5.7136, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.4008097165991904e-05, |
|
"loss": 5.6149, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.6437246963562756e-05, |
|
"loss": 5.5674, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.886639676113361e-05, |
|
"loss": 5.489, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1295546558704454e-05, |
|
"loss": 5.3851, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.3724696356275306e-05, |
|
"loss": 5.3135, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 5.1979, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8582995951417004e-05, |
|
"loss": 5.0876, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5.101214574898786e-05, |
|
"loss": 5.094, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5.3441295546558715e-05, |
|
"loss": 5.0148, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5.587044534412956e-05, |
|
"loss": 4.9376, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5.8299595141700406e-05, |
|
"loss": 4.9033, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 6.072874493927125e-05, |
|
"loss": 4.8783, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.31578947368421e-05, |
|
"loss": 4.8382, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.558704453441296e-05, |
|
"loss": 4.7009, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.801619433198381e-05, |
|
"loss": 4.6597, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.044534412955465e-05, |
|
"loss": 4.5674, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.287449392712551e-05, |
|
"loss": 4.5938, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.530364372469636e-05, |
|
"loss": 4.6061, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.773279352226722e-05, |
|
"loss": 4.582, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.016194331983806e-05, |
|
"loss": 4.4975, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.259109311740891e-05, |
|
"loss": 4.3876, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.502024291497977e-05, |
|
"loss": 4.388, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.744939271255061e-05, |
|
"loss": 4.2698, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.987854251012147e-05, |
|
"loss": 4.3306, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.230769230769232e-05, |
|
"loss": 4.3391, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.473684210526316e-05, |
|
"loss": 4.1981, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.716599190283401e-05, |
|
"loss": 4.3052, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.959514170040485e-05, |
|
"loss": 4.1384, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00010202429149797573, |
|
"loss": 4.159, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00010445344129554657, |
|
"loss": 4.1178, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00010688259109311743, |
|
"loss": 4.0929, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00010931174089068827, |
|
"loss": 4.0761, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00011174089068825912, |
|
"loss": 3.9875, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00011417004048582995, |
|
"loss": 4.0039, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00011659919028340081, |
|
"loss": 3.9917, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00011902834008097166, |
|
"loss": 4.0101, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001214574898785425, |
|
"loss": 3.9108, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00012388663967611335, |
|
"loss": 3.9445, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001263157894736842, |
|
"loss": 3.96, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00012874493927125507, |
|
"loss": 3.9475, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00013117408906882592, |
|
"loss": 3.8582, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013360323886639676, |
|
"loss": 3.8952, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013603238866396762, |
|
"loss": 3.7632, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013846153846153847, |
|
"loss": 3.7845, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001408906882591093, |
|
"loss": 3.7638, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00014331983805668017, |
|
"loss": 3.8404, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00014574898785425102, |
|
"loss": 3.7742, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00014817813765182186, |
|
"loss": 3.7533, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00015060728744939272, |
|
"loss": 3.7303, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00015303643724696357, |
|
"loss": 3.7195, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00015546558704453443, |
|
"loss": 3.7544, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00015789473684210527, |
|
"loss": 3.6913, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00016032388663967612, |
|
"loss": 3.7917, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00016275303643724698, |
|
"loss": 3.6758, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00016518218623481781, |
|
"loss": 3.6774, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016761133603238867, |
|
"loss": 3.6199, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00017004048582995953, |
|
"loss": 3.6028, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001724696356275304, |
|
"loss": 3.6084, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00017489878542510122, |
|
"loss": 3.6165, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017732793522267208, |
|
"loss": 3.5123, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00017975708502024294, |
|
"loss": 3.5594, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018218623481781377, |
|
"loss": 3.6238, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00018461538461538463, |
|
"loss": 3.4991, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001870445344129555, |
|
"loss": 3.5384, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00018947368421052632, |
|
"loss": 3.5282, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00019190283400809716, |
|
"loss": 3.574, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00019433198380566801, |
|
"loss": 3.5391, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00019676113360323887, |
|
"loss": 3.4529, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001991902834008097, |
|
"loss": 3.4957, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00019999990991501854, |
|
"loss": 3.4346, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00019999943696930958, |
|
"loss": 3.4838, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00019999855864354245, |
|
"loss": 3.4721, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0001999972749412778, |
|
"loss": 3.5, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00019999558586771948, |
|
"loss": 3.3624, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00019999349142971467, |
|
"loss": 3.4138, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00019999099163575389, |
|
"loss": 3.4005, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00019998808649597085, |
|
"loss": 3.365, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001999847760221425, |
|
"loss": 3.3424, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00019998106022768887, |
|
"loss": 3.3629, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00019997693912767318, |
|
"loss": 3.3722, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00019997241273880158, |
|
"loss": 3.3951, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00019996748107942335, |
|
"loss": 3.3817, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00019996214416953046, |
|
"loss": 3.3289, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00019995640203075788, |
|
"loss": 3.3074, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00019995025468638318, |
|
"loss": 3.3145, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00019994370216132662, |
|
"loss": 3.2853, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.000199936744482151, |
|
"loss": 3.2416, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001999293816770615, |
|
"loss": 3.2565, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00019992161377590563, |
|
"loss": 3.2947, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5121698756686252, |
|
"eval_loss": 3.0832247734069824, |
|
"eval_runtime": 16.2528, |
|
"eval_samples_per_second": 135.177, |
|
"eval_steps_per_second": 67.619, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001999134408101731, |
|
"loss": 3.2464, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019990486281299568, |
|
"loss": 3.2509, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019989587981914704, |
|
"loss": 3.284, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019988649186504262, |
|
"loss": 3.1894, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001998766989887396, |
|
"loss": 3.3045, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001998665012299365, |
|
"loss": 3.2935, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001998558986299733, |
|
"loss": 3.1808, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.000199844891231831, |
|
"loss": 3.1295, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019983347908013172, |
|
"loss": 3.262, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019982166222113826, |
|
"loss": 3.1685, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019980944070275406, |
|
"loss": 3.1682, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019979681457452304, |
|
"loss": 3.2196, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0001997837838876293, |
|
"loss": 3.1117, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001997703486948969, |
|
"loss": 3.2077, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00019975650905078976, |
|
"loss": 3.1355, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00019974226501141137, |
|
"loss": 3.2277, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00019972761663450452, |
|
"loss": 3.14, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001997125639794512, |
|
"loss": 3.1937, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00019969710710727214, |
|
"loss": 3.1441, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00019968124608062682, |
|
"loss": 3.1193, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.000199664980963813, |
|
"loss": 3.1849, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00019964831182276663, |
|
"loss": 3.1189, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00019963123872506147, |
|
"loss": 3.0733, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001996137617399088, |
|
"loss": 3.101, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00019959588093815728, |
|
"loss": 3.074, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00019957759639229247, |
|
"loss": 3.1142, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00019955890817643674, |
|
"loss": 3.1246, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001995398163663488, |
|
"loss": 3.104, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00019952032103942347, |
|
"loss": 3.1105, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0001995004222746913, |
|
"loss": 3.0985, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00019948012015281853, |
|
"loss": 3.1341, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00019945941475610623, |
|
"loss": 3.0335, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0001994383061684905, |
|
"loss": 3.0978, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00019941679447554175, |
|
"loss": 3.0737, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00019939487976446468, |
|
"loss": 3.0942, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00019937256212409756, |
|
"loss": 3.0471, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00019934984164491227, |
|
"loss": 3.0165, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00019932671841901354, |
|
"loss": 2.9886, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00019930319254013887, |
|
"loss": 3.0629, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.000199279264103658, |
|
"loss": 3.0518, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00019925493320657262, |
|
"loss": 2.9858, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00019923019994751585, |
|
"loss": 3.0696, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.000199205064426752, |
|
"loss": 3.1251, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.000199179526746176, |
|
"loss": 3.0311, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00019915358700931313, |
|
"loss": 2.9571, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00019912724532131847, |
|
"loss": 2.9914, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00019910050178897657, |
|
"loss": 2.9803, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00019907335652070103, |
|
"loss": 3.0183, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0001990458096265339, |
|
"loss": 3.0207, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00019901786121814547, |
|
"loss": 2.9883, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00019898951140883369, |
|
"loss": 2.924, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001989607603135236, |
|
"loss": 3.0618, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00019893160804876708, |
|
"loss": 3.0179, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00019890205473274236, |
|
"loss": 2.9295, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00019887210048525323, |
|
"loss": 2.9724, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00019884174542772899, |
|
"loss": 2.9413, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00019881098968322367, |
|
"loss": 3.0484, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00019877983337641565, |
|
"loss": 2.9098, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00019874827663360706, |
|
"loss": 2.9568, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00019871631958272336, |
|
"loss": 2.9348, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00019868396235331282, |
|
"loss": 2.9615, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00019865120507654593, |
|
"loss": 2.9036, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00019861804788521493, |
|
"loss": 2.8977, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00019858449091373313, |
|
"loss": 3.0531, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00019855053429813463, |
|
"loss": 2.9548, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00019851617817607354, |
|
"loss": 2.9541, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00019848142268682356, |
|
"loss": 2.8871, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00019844626797127724, |
|
"loss": 2.8821, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00019841071417194561, |
|
"loss": 2.9179, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00019837476143295748, |
|
"loss": 2.9251, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00019833840990005893, |
|
"loss": 2.8764, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00019830165972061265, |
|
"loss": 2.8817, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00019826451104359738, |
|
"loss": 2.8707, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00019822696401960727, |
|
"loss": 2.8489, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00019818901880085137, |
|
"loss": 2.9285, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00019815067554115282, |
|
"loss": 2.8707, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001981119343959485, |
|
"loss": 2.8615, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00019807279552228816, |
|
"loss": 2.8273, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00019803325907883385, |
|
"loss": 2.9113, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00019799332522585936, |
|
"loss": 2.8561, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00019795299412524945, |
|
"loss": 2.8562, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00019791226594049932, |
|
"loss": 2.8861, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00019787114083671375, |
|
"loss": 2.8196, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00019782961898060677, |
|
"loss": 2.8737, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00019778770054050058, |
|
"loss": 2.9101, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00019774538568632515, |
|
"loss": 2.9066, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00019770267458961741, |
|
"loss": 2.8889, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00019765956742352062, |
|
"loss": 2.8761, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00019761606436278362, |
|
"loss": 2.8484, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00019757216558376013, |
|
"loss": 2.8575, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00019752787126440803, |
|
"loss": 2.785, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0001974831815842887, |
|
"loss": 2.8186, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00019743809672456618, |
|
"loss": 2.8184, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0001973926168680066, |
|
"loss": 2.8115, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00019734674219897718, |
|
"loss": 2.8627, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00019730047290344578, |
|
"loss": 2.8416, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0001972538091689799, |
|
"loss": 2.8565, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00019720675118474614, |
|
"loss": 2.8373, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00019715929914150923, |
|
"loss": 2.8271, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00019711145323163137, |
|
"loss": 2.8548, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00019706321364907142, |
|
"loss": 2.8292, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00019701458058938418, |
|
"loss": 2.9203, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00019696555424971943, |
|
"loss": 2.8727, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5661561892883697, |
|
"eval_loss": 2.672184705734253, |
|
"eval_runtime": 16.2659, |
|
"eval_samples_per_second": 135.068, |
|
"eval_steps_per_second": 67.565, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0001969161348288213, |
|
"loss": 2.7287, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00019686632252702743, |
|
"loss": 2.7983, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00019681611754626807, |
|
"loss": 2.7829, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00019676552009006534, |
|
"loss": 2.8671, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001967145303635324, |
|
"loss": 2.7472, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00019666314857337262, |
|
"loss": 2.7506, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00019661137492787867, |
|
"loss": 2.7307, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00019655920963693174, |
|
"loss": 2.7653, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00019650665291200082, |
|
"loss": 2.8072, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00019645370496614145, |
|
"loss": 2.781, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00019640036601399535, |
|
"loss": 2.6695, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00019634663627178918, |
|
"loss": 2.7504, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00019629251595733383, |
|
"loss": 2.7793, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00019623800529002347, |
|
"loss": 2.7255, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00019618310449083477, |
|
"loss": 2.7955, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00019612781378232583, |
|
"loss": 2.6888, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00019607213338863547, |
|
"loss": 2.8287, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0001960160635354821, |
|
"loss": 2.7925, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00019595960445016307, |
|
"loss": 2.8107, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00019590275636155352, |
|
"loss": 2.7144, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00019584551950010555, |
|
"loss": 2.8271, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00019578789409784727, |
|
"loss": 2.685, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00019572988038838194, |
|
"loss": 2.7504, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00019567147860688686, |
|
"loss": 2.7186, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00019561268899011256, |
|
"loss": 2.7287, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00019555351177638172, |
|
"loss": 2.7973, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00019549394720558833, |
|
"loss": 2.7732, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.00019543399551919668, |
|
"loss": 2.7949, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0001953736569602403, |
|
"loss": 2.893, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00019531293177332102, |
|
"loss": 2.7169, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00019525182020460803, |
|
"loss": 2.7442, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0001951903225018369, |
|
"loss": 2.7373, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0001951284389143084, |
|
"loss": 2.6366, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.00019506616969288768, |
|
"loss": 2.7411, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00019500351509000314, |
|
"loss": 2.7378, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00019494047535964553, |
|
"loss": 2.6151, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00019487705075736672, |
|
"loss": 2.8224, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00019481324154027894, |
|
"loss": 2.8167, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00019474904796705337, |
|
"loss": 2.6986, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.0001946844702979195, |
|
"loss": 2.7249, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00019461950879466383, |
|
"loss": 2.6904, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0001945541637206287, |
|
"loss": 2.7191, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00019448843534071163, |
|
"loss": 2.804, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00019442232392136375, |
|
"loss": 2.7587, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00019435582973058915, |
|
"loss": 2.6742, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00019428895303794352, |
|
"loss": 2.7017, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00019422169411453317, |
|
"loss": 2.7544, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0001941540532330139, |
|
"loss": 2.7186, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00019408603066758988, |
|
"loss": 2.7649, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00019401762669401257, |
|
"loss": 2.8109, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00019394884158957965, |
|
"loss": 2.7248, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00019387967563313377, |
|
"loss": 2.6719, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00019381012910506146, |
|
"loss": 2.7268, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00019374020228729206, |
|
"loss": 2.7121, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0001936698954632966, |
|
"loss": 2.6516, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00019359920891808647, |
|
"loss": 2.7795, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00019352814293821248, |
|
"loss": 2.7295, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00019345669781176356, |
|
"loss": 2.6901, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00019338487382836565, |
|
"loss": 2.7171, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.00019331267127918044, |
|
"loss": 2.5934, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00019324009045690438, |
|
"loss": 2.677, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00019316713165576726, |
|
"loss": 2.7009, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.0001930937951715312, |
|
"loss": 2.7384, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.00019302008130148932, |
|
"loss": 2.6525, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00019294599034446467, |
|
"loss": 2.626, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00019287152260080888, |
|
"loss": 2.6826, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.00019279667837240105, |
|
"loss": 2.7388, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.00019272145796264648, |
|
"loss": 2.7281, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0001926458616764754, |
|
"loss": 2.5821, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.00019256988982034178, |
|
"loss": 2.6877, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00019249354270222218, |
|
"loss": 2.6533, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00019241682063161428, |
|
"loss": 2.7017, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00019233972391953584, |
|
"loss": 2.5812, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.00019226225287852325, |
|
"loss": 2.6331, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.0001921844078226305, |
|
"loss": 2.6415, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.0001921061890674277, |
|
"loss": 2.6382, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.0001920275969299998, |
|
"loss": 2.6692, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00019194863172894552, |
|
"loss": 2.6522, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00019186929378437582, |
|
"loss": 2.6311, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.00019178958341791268, |
|
"loss": 2.6869, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.00019170950095268792, |
|
"loss": 2.6057, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.00019162904671334163, |
|
"loss": 2.6813, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.00019154822102602115, |
|
"loss": 2.6263, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.0001914670242183795, |
|
"loss": 2.6506, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.00019138545661957426, |
|
"loss": 2.6605, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00019130351856026597, |
|
"loss": 2.6763, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.00019122121037261719, |
|
"loss": 2.6219, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.00019113853239029064, |
|
"loss": 2.6208, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00019105548494844835, |
|
"loss": 2.7369, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00019097206838374997, |
|
"loss": 2.5989, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0001908882830343515, |
|
"loss": 2.5859, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00019080412923990395, |
|
"loss": 2.6183, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00019071960734155194, |
|
"loss": 2.6804, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00019063471768193235, |
|
"loss": 2.6772, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.00019054946060517283, |
|
"loss": 2.6334, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00019046383645689055, |
|
"loss": 2.7288, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.00019037784558419065, |
|
"loss": 2.6486, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.00019029148833566497, |
|
"loss": 2.6382, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00019020476506139057, |
|
"loss": 2.5683, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.00019011767611292819, |
|
"loss": 2.6047, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.00019003022184332116, |
|
"loss": 2.6616, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0001899424026070936, |
|
"loss": 2.6049, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00018985421876024916, |
|
"loss": 2.6339, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5878054172915932, |
|
"eval_loss": 2.479712724685669, |
|
"eval_runtime": 16.3394, |
|
"eval_samples_per_second": 134.46, |
|
"eval_steps_per_second": 67.261, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.0001897656706602696, |
|
"loss": 2.5743, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0001896767586661133, |
|
"loss": 2.5385, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0001895874831382138, |
|
"loss": 2.6556, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.00018949784443847824, |
|
"loss": 2.5895, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.00018940784293028617, |
|
"loss": 2.5747, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00018931747897848778, |
|
"loss": 2.6032, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.00018922675294940256, |
|
"loss": 2.5687, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.00018913566521081777, |
|
"loss": 2.5473, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.00018904421613198712, |
|
"loss": 2.6586, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.00018895240608362895, |
|
"loss": 2.6245, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.000188860235437925, |
|
"loss": 2.6062, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00018876770456851877, |
|
"loss": 2.4521, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.0001886748138505141, |
|
"loss": 2.698, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.00018858156366047358, |
|
"loss": 2.5416, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.00018848795437641697, |
|
"loss": 2.6364, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.00018839398637781972, |
|
"loss": 2.5949, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.00018829966004561163, |
|
"loss": 2.5397, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.00018820497576217492, |
|
"loss": 2.5792, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00018810993391134295, |
|
"loss": 2.5549, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00018801453487839862, |
|
"loss": 2.6141, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00018791877905007277, |
|
"loss": 2.6055, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.00018782266681454255, |
|
"loss": 2.5834, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.00018772619856143009, |
|
"loss": 2.6272, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.0001876293746818006, |
|
"loss": 2.5862, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.000187532195568161, |
|
"loss": 2.6453, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00018743466161445823, |
|
"loss": 2.5199, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00018733677321607775, |
|
"loss": 2.5887, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.0001872385307698418, |
|
"loss": 2.5769, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.00018713993467400796, |
|
"loss": 2.6303, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.00018704098532826735, |
|
"loss": 2.5144, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0001869416831337432, |
|
"loss": 2.5568, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.00018684202849298897, |
|
"loss": 2.6413, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.00018674202180998708, |
|
"loss": 2.5877, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0001866416634901469, |
|
"loss": 2.5414, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.00018654095394030334, |
|
"loss": 2.5394, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.00018643989356871514, |
|
"loss": 2.5929, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.00018633848278506323, |
|
"loss": 2.6068, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.00018623672200044898, |
|
"loss": 2.6195, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.00018613461162739263, |
|
"loss": 2.5121, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00018603215207983165, |
|
"loss": 2.5959, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0001859293437731189, |
|
"loss": 2.5925, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.00018582618712402113, |
|
"loss": 2.608, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.00018572268255071718, |
|
"loss": 2.6188, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.0001856188304727963, |
|
"loss": 2.5683, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.00018551463131125649, |
|
"loss": 2.5835, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.00018541008548850273, |
|
"loss": 2.5374, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.0001853051934283453, |
|
"loss": 2.5489, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00018519995555599817, |
|
"loss": 2.4947, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.000185094372298077, |
|
"loss": 2.4604, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.00018498844408259773, |
|
"loss": 2.5453, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.00018488217133897462, |
|
"loss": 2.5738, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.00018477555449801863, |
|
"loss": 2.5437, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.00018466859399193555, |
|
"loss": 2.46, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.00018456129025432442, |
|
"loss": 2.5457, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.00018445364372017564, |
|
"loss": 2.5188, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 0.00018434565482586924, |
|
"loss": 2.5652, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00018423732400917316, |
|
"loss": 2.544, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00018412865170924135, |
|
"loss": 2.6398, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.00018401963836661218, |
|
"loss": 2.6341, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.00018391028442320644, |
|
"loss": 2.5351, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.0001838005903223257, |
|
"loss": 2.5473, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.00018369055650865052, |
|
"loss": 2.5146, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.00018358018342823855, |
|
"loss": 2.5715, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0001834694715285227, |
|
"loss": 2.5376, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.00018335842125830954, |
|
"loss": 2.6296, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.00018324703306777718, |
|
"loss": 2.4321, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.00018313530740847375, |
|
"loss": 2.5319, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0001830232447333153, |
|
"loss": 2.5231, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00018291084549658412, |
|
"loss": 2.5694, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.00018279811015392685, |
|
"loss": 2.499, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.00018268503916235273, |
|
"loss": 2.5255, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.00018257163298023151, |
|
"loss": 2.5671, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.0001824578920672919, |
|
"loss": 2.4801, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.00018234381688461942, |
|
"loss": 2.5006, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00018222940789465475, |
|
"loss": 2.5033, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.00018211466556119173, |
|
"loss": 2.565, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.0001819995903493755, |
|
"loss": 2.5869, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.00018188418272570061, |
|
"loss": 2.422, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00018176844315800924, |
|
"loss": 2.4295, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.0001816523721154892, |
|
"loss": 2.4252, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.00018153597006867188, |
|
"loss": 2.5694, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.00018141923748943073, |
|
"loss": 2.4952, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.00018130217485097893, |
|
"loss": 2.4748, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.00018118478262786782, |
|
"loss": 2.5343, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.0001810670612959847, |
|
"loss": 2.4971, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.00018094901133255105, |
|
"loss": 2.4903, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.00018083063321612056, |
|
"loss": 2.5106, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.0001807119274265773, |
|
"loss": 2.4929, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.00018059289444513347, |
|
"loss": 2.5104, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00018047353475432782, |
|
"loss": 2.4528, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.00018035384883802346, |
|
"loss": 2.4571, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.00018023383718140593, |
|
"loss": 2.5601, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00018011350027098127, |
|
"loss": 2.4553, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.00017999283859457412, |
|
"loss": 2.5075, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.0001798718526413256, |
|
"loss": 2.5526, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.00017975054290169138, |
|
"loss": 2.4881, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.0001796289098674397, |
|
"loss": 2.4973, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.00017950695403164943, |
|
"loss": 2.4831, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.000179384675888708, |
|
"loss": 2.5187, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.0001792620759343094, |
|
"loss": 2.4949, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.00017913915466545217, |
|
"loss": 2.533, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.00017901591258043747, |
|
"loss": 2.5053, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6025211491749728, |
|
"eval_loss": 2.383329153060913, |
|
"eval_runtime": 16.3176, |
|
"eval_samples_per_second": 134.64, |
|
"eval_steps_per_second": 67.351, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0001788923501788669, |
|
"loss": 2.5555, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.00017876846796164068, |
|
"loss": 2.4955, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.0001786442664309554, |
|
"loss": 2.5338, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0001785197460903021, |
|
"loss": 2.4958, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0001783949074444643, |
|
"loss": 2.4291, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.00017826975099951583, |
|
"loss": 2.4112, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.0001781442772628188, |
|
"loss": 2.4166, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00017801848674302154, |
|
"loss": 2.5571, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.00017789237995005668, |
|
"loss": 2.4778, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.0001777659573951388, |
|
"loss": 2.5491, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00017763921959076273, |
|
"loss": 2.4311, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.00017751216705070105, |
|
"loss": 2.4439, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.00017738480029000234, |
|
"loss": 2.4623, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.000177257119824989, |
|
"loss": 2.5291, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.00017712912617325502, |
|
"loss": 2.4934, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.0001770008198536641, |
|
"loss": 2.5712, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0001768722013863474, |
|
"loss": 2.5426, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.00017674327129270148, |
|
"loss": 2.4619, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00017661403009538616, |
|
"loss": 2.484, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.00017648447831832242, |
|
"loss": 2.4566, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0001763546164866903, |
|
"loss": 2.5267, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.00017622444512692672, |
|
"loss": 2.4614, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.00017609396476672343, |
|
"loss": 2.4796, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.0001759631759350247, |
|
"loss": 2.3971, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.0001758320791620254, |
|
"loss": 2.4879, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0001757006749791687, |
|
"loss": 2.5324, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00017556896391914394, |
|
"loss": 2.3853, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.0001754369465158845, |
|
"loss": 2.455, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.0001753046233045656, |
|
"loss": 2.4964, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.0001751719948216022, |
|
"loss": 2.4615, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00017503906160464672, |
|
"loss": 2.4745, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.00017490582419258697, |
|
"loss": 2.4068, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.00017477228312554388, |
|
"loss": 2.4827, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.00017463843894486937, |
|
"loss": 2.4002, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.00017450429219314408, |
|
"loss": 2.3769, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.00017436984341417532, |
|
"loss": 2.4319, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00017423509315299458, |
|
"loss": 2.4803, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.00017410004195585573, |
|
"loss": 2.4286, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.00017396469037023242, |
|
"loss": 2.4884, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.00017382903894481611, |
|
"loss": 2.4376, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.00017369308822951367, |
|
"loss": 2.4966, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.00017355683877544532, |
|
"loss": 2.5422, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.00017342029113494233, |
|
"loss": 2.4287, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.00017328344586154467, |
|
"loss": 2.445, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.0001731463035099989, |
|
"loss": 2.4555, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.00017300886463625595, |
|
"loss": 2.4885, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.00017287112979746868, |
|
"loss": 2.4137, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.00017273309955198984, |
|
"loss": 2.494, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.0001725947744593697, |
|
"loss": 2.4098, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0001724561550803537, |
|
"loss": 2.4887, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00017231724197688033, |
|
"loss": 2.4888, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0001721780357120788, |
|
"loss": 2.5013, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.00017203853685026675, |
|
"loss": 2.422, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.00017189874595694788, |
|
"loss": 2.4682, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.00017175866359880982, |
|
"loss": 2.3751, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.00017161829034372168, |
|
"loss": 2.4773, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.00017147762676073187, |
|
"loss": 2.4837, |
|
"step": 1401 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.0001713366734200657, |
|
"loss": 2.4487, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.00017119543089312317, |
|
"loss": 2.4272, |
|
"step": 1407 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.00017105389975247647, |
|
"loss": 2.4699, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.00017091208057186792, |
|
"loss": 2.4837, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.00017076997392620737, |
|
"loss": 2.4351, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.0001706275803915701, |
|
"loss": 2.3902, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.00017048490054519434, |
|
"loss": 2.3355, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00017034193496547902, |
|
"loss": 2.3968, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.00017019868423198134, |
|
"loss": 2.4197, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.00017005514892541444, |
|
"loss": 2.4192, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.00016991132962764516, |
|
"loss": 2.359, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 0.00016976722692169148, |
|
"loss": 2.4068, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.00016962284139172037, |
|
"loss": 2.4527, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.00016947817362304525, |
|
"loss": 2.4723, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.00016933322420212372, |
|
"loss": 2.4029, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.00016918799371655512, |
|
"loss": 2.4574, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.00016904248275507818, |
|
"loss": 2.4426, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00016889669190756868, |
|
"loss": 2.4689, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.00016875062176503693, |
|
"loss": 2.407, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.0001686042729196255, |
|
"loss": 2.3877, |
|
"step": 1461 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.0001684576459646068, |
|
"loss": 2.394, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.00016831074149438056, |
|
"loss": 2.4613, |
|
"step": 1467 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 0.00016816356010447163, |
|
"loss": 2.3824, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.0001680161023915273, |
|
"loss": 2.5161, |
|
"step": 1473 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00016786836895331514, |
|
"loss": 2.3828, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.00016772036038872039, |
|
"loss": 2.4413, |
|
"step": 1479 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0001675720772977437, |
|
"loss": 2.3981, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.00016742352028149843, |
|
"loss": 2.4631, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 0.0001672746899422086, |
|
"loss": 2.4012, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.000167125586883206, |
|
"loss": 2.4043, |
|
"step": 1491 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.00016697621170892824, |
|
"loss": 2.3889, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.00016682656502491576, |
|
"loss": 2.4473, |
|
"step": 1497 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.0001666766474378099, |
|
"loss": 2.5165, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.00016652645955535006, |
|
"loss": 2.5155, |
|
"step": 1503 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.00016637600198637133, |
|
"loss": 2.3923, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.00016622527534080218, |
|
"loss": 2.4353, |
|
"step": 1509 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 0.00016607428022966182, |
|
"loss": 2.3346, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.00016592301726505771, |
|
"loss": 2.3873, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.00016577148706018328, |
|
"loss": 2.4799, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.00016561969022931515, |
|
"loss": 2.4228, |
|
"step": 1521 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0001654676273878109, |
|
"loss": 2.4024, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00016531529915210643, |
|
"loss": 2.386, |
|
"step": 1527 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0001651627061397135, |
|
"loss": 2.4365, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.00016500984896921725, |
|
"loss": 2.3997, |
|
"step": 1533 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.00016485672826027363, |
|
"loss": 2.3659, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00016470334463360698, |
|
"loss": 2.4531, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6106430794745761, |
|
"eval_loss": 2.3084843158721924, |
|
"eval_runtime": 16.3383, |
|
"eval_samples_per_second": 134.469, |
|
"eval_steps_per_second": 67.265, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.00016454969871100743, |
|
"loss": 2.376, |
|
"step": 1542 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.0001643957911153284, |
|
"loss": 2.2957, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.00016424162247048412, |
|
"loss": 2.3557, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.00016408719340144705, |
|
"loss": 2.3722, |
|
"step": 1551 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.00016393250453424534, |
|
"loss": 2.3903, |
|
"step": 1554 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.0001637775564959604, |
|
"loss": 2.3326, |
|
"step": 1557 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.00016362234991472416, |
|
"loss": 2.3791, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.00016346688541971668, |
|
"loss": 2.4608, |
|
"step": 1563 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00016331116364116363, |
|
"loss": 2.3049, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.00016315518521033354, |
|
"loss": 2.437, |
|
"step": 1569 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.00016299895075953547, |
|
"loss": 2.4088, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.0001628424609221163, |
|
"loss": 2.4097, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.00016268571633245812, |
|
"loss": 2.3635, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.00016252871762597592, |
|
"loss": 2.4373, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00016237146543911463, |
|
"loss": 2.2713, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.00016221396040934694, |
|
"loss": 2.4049, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.00016205620317517034, |
|
"loss": 2.3796, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.00016189819437610484, |
|
"loss": 2.3642, |
|
"step": 1593 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.00016173993465269022, |
|
"loss": 2.3668, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00016158142464648342, |
|
"loss": 2.4196, |
|
"step": 1599 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.00016142266500005604, |
|
"loss": 2.488, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.00016126365635699166, |
|
"loss": 2.3974, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00016110439936188318, |
|
"loss": 2.4516, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.00016094489466033043, |
|
"loss": 2.3589, |
|
"step": 1611 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.0001607851428989372, |
|
"loss": 2.4077, |
|
"step": 1614 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.00016062514472530898, |
|
"loss": 2.3902, |
|
"step": 1617 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.0001604649007880501, |
|
"loss": 2.3319, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.00016030441173676117, |
|
"loss": 2.3729, |
|
"step": 1623 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00016014367822203646, |
|
"loss": 2.3052, |
|
"step": 1626 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0001599827008954613, |
|
"loss": 2.3613, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.0001598214804096093, |
|
"loss": 2.4415, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00015966001741803983, |
|
"loss": 2.3959, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.0001594983125752954, |
|
"loss": 2.3294, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0001593363665368988, |
|
"loss": 2.3211, |
|
"step": 1641 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.00015917417995935077, |
|
"loss": 2.3113, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.00015901175350012698, |
|
"loss": 2.3507, |
|
"step": 1647 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.00015884908781767565, |
|
"loss": 2.3533, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.00015868618357141472, |
|
"loss": 2.4636, |
|
"step": 1653 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.00015852304142172923, |
|
"loss": 2.4222, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.00015835966202996867, |
|
"loss": 2.4257, |
|
"step": 1659 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.00015819604605844418, |
|
"loss": 2.3802, |
|
"step": 1662 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.00015803219417042608, |
|
"loss": 2.2824, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.00015786810703014096, |
|
"loss": 2.4089, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.0001577037853027691, |
|
"loss": 2.3537, |
|
"step": 1671 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.00015753922965444184, |
|
"loss": 2.3758, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0001573744407522386, |
|
"loss": 2.3749, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.00015720941926418455, |
|
"loss": 2.3841, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.0001570441658592477, |
|
"loss": 2.3546, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.00015687868120733614, |
|
"loss": 2.3845, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.00015671296597929535, |
|
"loss": 2.2959, |
|
"step": 1689 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.00015654702084690568, |
|
"loss": 2.4619, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.0001563808464828794, |
|
"loss": 2.2943, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.00015621444356085803, |
|
"loss": 2.3027, |
|
"step": 1698 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.00015604781275540956, |
|
"loss": 2.4349, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.00015588095474202595, |
|
"loss": 2.3143, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.00015571387019712004, |
|
"loss": 2.3555, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.0001555465597980231, |
|
"loss": 2.4337, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.00015537902422298197, |
|
"loss": 2.393, |
|
"step": 1713 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.00015521126415115623, |
|
"loss": 2.3029, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 0.00015504328026261566, |
|
"loss": 2.3065, |
|
"step": 1719 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.0001548750732383372, |
|
"loss": 2.3214, |
|
"step": 1722 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00015470664376020246, |
|
"loss": 2.3422, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.00015453799251099478, |
|
"loss": 2.3227, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.00015436912017439657, |
|
"loss": 2.3816, |
|
"step": 1731 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.00015420002743498645, |
|
"loss": 2.3966, |
|
"step": 1734 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.00015403071497823652, |
|
"loss": 2.2734, |
|
"step": 1737 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.0001538611834905096, |
|
"loss": 2.384, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00015369143365905635, |
|
"loss": 2.3495, |
|
"step": 1743 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.00015352146617201266, |
|
"loss": 2.3252, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.00015335128171839671, |
|
"loss": 2.349, |
|
"step": 1749 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.00015318088098810622, |
|
"loss": 2.3845, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.0001530102646719156, |
|
"loss": 2.3248, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.0001528394334614733, |
|
"loss": 2.3738, |
|
"step": 1758 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.00015266838804929892, |
|
"loss": 2.3512, |
|
"step": 1761 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 0.00015249712912878031, |
|
"loss": 2.4223, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.00015232565739417092, |
|
"loss": 2.3593, |
|
"step": 1767 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.00015215397354058686, |
|
"loss": 2.3934, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.00015198207826400413, |
|
"loss": 2.4059, |
|
"step": 1773 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.00015180997226125592, |
|
"loss": 2.3081, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.00015163765623002945, |
|
"loss": 2.2689, |
|
"step": 1779 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.00015146513086886356, |
|
"loss": 2.3314, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.00015129239687714557, |
|
"loss": 2.2807, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.00015111945495510857, |
|
"loss": 2.3481, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.0001509463058038286, |
|
"loss": 2.3724, |
|
"step": 1791 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.00015077295012522174, |
|
"loss": 2.3322, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.00015059938862204127, |
|
"loss": 2.2553, |
|
"step": 1797 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.0001504256219978749, |
|
"loss": 2.2632, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.0001502516509571418, |
|
"loss": 2.27, |
|
"step": 1803 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00015007747620508988, |
|
"loss": 2.3615, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.00014990309844779284, |
|
"loss": 2.2815, |
|
"step": 1809 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.0001497285183921473, |
|
"loss": 2.3518, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00014955373674586996, |
|
"loss": 2.3358, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.00014937875421749472, |
|
"loss": 2.3217, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.00014920357151636992, |
|
"loss": 2.3558, |
|
"step": 1821 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.00014902818935265527, |
|
"loss": 2.3474, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00014885260843731905, |
|
"loss": 2.3579, |
|
"step": 1827 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00014867682948213536, |
|
"loss": 2.3964, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.000148500853199681, |
|
"loss": 2.3697, |
|
"step": 1833 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.00014832468030333265, |
|
"loss": 2.3099, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.00014814831150726428, |
|
"loss": 2.3651, |
|
"step": 1839 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.00014797174752644382, |
|
"loss": 2.304, |
|
"step": 1842 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.00014779498907663033, |
|
"loss": 2.3598, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0001476180368743715, |
|
"loss": 2.2852, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6175154625608633, |
|
"eval_loss": 2.245072364807129, |
|
"eval_runtime": 16.3034, |
|
"eval_samples_per_second": 134.757, |
|
"eval_steps_per_second": 67.409, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.00014744089163700025, |
|
"loss": 2.395, |
|
"step": 1851 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.0001472635540826321, |
|
"loss": 2.3843, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.00014708602493016218, |
|
"loss": 2.3417, |
|
"step": 1857 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.0001469083048992623, |
|
"loss": 2.2776, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.00014673039471037807, |
|
"loss": 2.2988, |
|
"step": 1863 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.000146552295084726, |
|
"loss": 2.2561, |
|
"step": 1866 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00014637400674429057, |
|
"loss": 2.3886, |
|
"step": 1869 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00014619553041182116, |
|
"loss": 2.2859, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00014601686681082934, |
|
"loss": 2.3711, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.00014583801666558576, |
|
"loss": 2.315, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.00014565898070111735, |
|
"loss": 2.2431, |
|
"step": 1881 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.0001454797596432043, |
|
"loss": 2.3051, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00014530035421837716, |
|
"loss": 2.3228, |
|
"step": 1887 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.00014512076515391375, |
|
"loss": 2.3054, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0001449409931778365, |
|
"loss": 2.3504, |
|
"step": 1893 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0001447610390189092, |
|
"loss": 2.328, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.00014458090340663428, |
|
"loss": 2.268, |
|
"step": 1899 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00014440058707124967, |
|
"loss": 2.3306, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.00014422009074372604, |
|
"loss": 2.2928, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.00014403941515576344, |
|
"loss": 2.3729, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.00014385856103978894, |
|
"loss": 2.2654, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.0001436775291289532, |
|
"loss": 2.3404, |
|
"step": 1914 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00014349632015712752, |
|
"loss": 2.2935, |
|
"step": 1917 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.00014331493485890114, |
|
"loss": 2.2743, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.00014313337396957803, |
|
"loss": 2.4234, |
|
"step": 1923 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00014295163822517393, |
|
"loss": 2.3393, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0001427697283624135, |
|
"loss": 2.3336, |
|
"step": 1929 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.00014258764511872716, |
|
"loss": 2.3729, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.00014240538923224823, |
|
"loss": 2.3284, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.00014222296144180994, |
|
"loss": 2.3265, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00014204036248694225, |
|
"loss": 2.3156, |
|
"step": 1941 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.00014185759310786917, |
|
"loss": 2.2792, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.00014167465404550542, |
|
"loss": 2.2745, |
|
"step": 1947 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00014149154604145366, |
|
"loss": 2.2669, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00014130826983800145, |
|
"loss": 2.2678, |
|
"step": 1953 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.0001411248261781181, |
|
"loss": 2.3385, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00014094121580545183, |
|
"loss": 2.3564, |
|
"step": 1959 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0001407574394643267, |
|
"loss": 2.2749, |
|
"step": 1962 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.00014057349789973946, |
|
"loss": 2.2941, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.00014038939185735683, |
|
"loss": 2.2932, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0001402051220835121, |
|
"loss": 2.2875, |
|
"step": 1971 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00014002068932520247, |
|
"loss": 2.3496, |
|
"step": 1974 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.00013983609433008574, |
|
"loss": 2.273, |
|
"step": 1977 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.0001396513378464774, |
|
"loss": 2.2399, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.00013946642062334766, |
|
"loss": 2.3506, |
|
"step": 1983 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00013928134341031825, |
|
"loss": 2.2949, |
|
"step": 1986 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.00013909610695765948, |
|
"loss": 2.3473, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00013891071201628728, |
|
"loss": 2.2964, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.00013872515933776, |
|
"loss": 2.3721, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.00013853944967427535, |
|
"loss": 2.3076, |
|
"step": 1998 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.00013835358377866763, |
|
"loss": 2.3327, |
|
"step": 2001 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.00013816756240440424, |
|
"loss": 2.2885, |
|
"step": 2004 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00013798138630558303, |
|
"loss": 2.3773, |
|
"step": 2007 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.00013779505623692909, |
|
"loss": 2.3261, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.00013760857295379154, |
|
"loss": 2.3456, |
|
"step": 2013 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.00013742193721214064, |
|
"loss": 2.422, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.00013723514976856483, |
|
"loss": 2.2936, |
|
"step": 2019 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.00013704821138026737, |
|
"loss": 2.3485, |
|
"step": 2022 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.00013686112280506346, |
|
"loss": 2.2716, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.00013667388480137716, |
|
"loss": 2.291, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.0001364864981282383, |
|
"loss": 2.3078, |
|
"step": 2031 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.00013629896354527932, |
|
"loss": 2.3167, |
|
"step": 2034 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.0001361112818127323, |
|
"loss": 2.3923, |
|
"step": 2037 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.00013592345369142585, |
|
"loss": 2.3003, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00013573547994278205, |
|
"loss": 2.2712, |
|
"step": 2043 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.0001355473613288132, |
|
"loss": 2.3246, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 0.00013535909861211903, |
|
"loss": 2.2397, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.00013517069255588327, |
|
"loss": 2.2596, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00013498214392387083, |
|
"loss": 2.1575, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.0001347934534804246, |
|
"loss": 2.2904, |
|
"step": 2058 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.00013460462199046226, |
|
"loss": 2.1767, |
|
"step": 2061 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.00013441565021947332, |
|
"loss": 2.2593, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.00013422653893351604, |
|
"loss": 2.3004, |
|
"step": 2067 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.0001340372888992141, |
|
"loss": 2.2932, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.0001338479008837538, |
|
"loss": 2.3329, |
|
"step": 2073 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.00013365837565488064, |
|
"loss": 2.2765, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.00013346871398089644, |
|
"loss": 2.2924, |
|
"step": 2079 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.00013327891663065614, |
|
"loss": 2.2459, |
|
"step": 2082 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 0.0001330889843735647, |
|
"loss": 2.1898, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00013289891797957395, |
|
"loss": 2.2234, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.00013270871821917946, |
|
"loss": 2.3638, |
|
"step": 2091 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.00013251838586341745, |
|
"loss": 2.3405, |
|
"step": 2094 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.00013232792168386176, |
|
"loss": 2.2713, |
|
"step": 2097 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.00013213732645262044, |
|
"loss": 2.2664, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.00013194660094233298, |
|
"loss": 2.2861, |
|
"step": 2103 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00013175574592616692, |
|
"loss": 2.2772, |
|
"step": 2106 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 0.00013156476217781468, |
|
"loss": 2.2263, |
|
"step": 2109 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00013137365047149078, |
|
"loss": 2.3177, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.00013118241158192827, |
|
"loss": 2.2316, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.0001309910462843758, |
|
"loss": 2.3542, |
|
"step": 2118 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.00013079955535459455, |
|
"loss": 2.2913, |
|
"step": 2121 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.000130607939568855, |
|
"loss": 2.2785, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.00013041619970393352, |
|
"loss": 2.2508, |
|
"step": 2127 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 0.0001302243365371098, |
|
"loss": 2.2514, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.00013003235084616324, |
|
"loss": 2.2517, |
|
"step": 2133 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00012984024340936983, |
|
"loss": 2.2517, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00012964801500549931, |
|
"loss": 2.2079, |
|
"step": 2139 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.00012945566641381159, |
|
"loss": 2.3606, |
|
"step": 2142 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00012926319841405394, |
|
"loss": 2.3764, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.00012907061178645763, |
|
"loss": 2.2781, |
|
"step": 2148 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.00012887790731173486, |
|
"loss": 2.2436, |
|
"step": 2151 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.0001286850857710755, |
|
"loss": 2.228, |
|
"step": 2154 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6243810506983248, |
|
"eval_loss": 2.193675994873047, |
|
"eval_runtime": 16.3306, |
|
"eval_samples_per_second": 134.533, |
|
"eval_steps_per_second": 67.297, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.00012849214794614407, |
|
"loss": 2.2569, |
|
"step": 2157 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 0.0001282990946190764, |
|
"loss": 2.3658, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.00012810592657247656, |
|
"loss": 2.2651, |
|
"step": 2163 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.0001279126445894138, |
|
"loss": 2.215, |
|
"step": 2166 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.00012771924945341906, |
|
"loss": 2.2391, |
|
"step": 2169 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.00012752574194848211, |
|
"loss": 2.2661, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.00012733212285904818, |
|
"loss": 2.1834, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.0001271383929700149, |
|
"loss": 2.2816, |
|
"step": 2178 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.00012694455306672895, |
|
"loss": 2.2706, |
|
"step": 2181 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.00012675060393498318, |
|
"loss": 2.2744, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00012655654636101304, |
|
"loss": 2.2792, |
|
"step": 2187 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.00012636238113149367, |
|
"loss": 2.3216, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.00012616810903353666, |
|
"loss": 2.2724, |
|
"step": 2193 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.00012597373085468678, |
|
"loss": 2.2908, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00012577924738291877, |
|
"loss": 2.3213, |
|
"step": 2199 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.0001255846594066344, |
|
"loss": 2.2962, |
|
"step": 2202 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00012538996771465887, |
|
"loss": 2.2341, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.00012519517309623793, |
|
"loss": 2.3292, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 0.0001250002763410346, |
|
"loss": 2.2678, |
|
"step": 2211 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.0001248052782391259, |
|
"loss": 2.2799, |
|
"step": 2214 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00012461017958099966, |
|
"loss": 2.2489, |
|
"step": 2217 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.00012441498115755146, |
|
"loss": 2.3042, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.00012421968376008115, |
|
"loss": 2.1692, |
|
"step": 2223 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.00012402428818028994, |
|
"loss": 2.3398, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.000123828795210277, |
|
"loss": 2.2435, |
|
"step": 2229 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.00012363320564253637, |
|
"loss": 2.2838, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0001234375202699535, |
|
"loss": 2.1099, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.00012324173988580235, |
|
"loss": 2.239, |
|
"step": 2238 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.0001230458652837421, |
|
"loss": 2.2111, |
|
"step": 2241 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.00012284989725781377, |
|
"loss": 2.2932, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.0001226538366024371, |
|
"loss": 2.2204, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.00012245768411240737, |
|
"loss": 2.2478, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.00012226144058289216, |
|
"loss": 2.2759, |
|
"step": 2253 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.00012206510680942806, |
|
"loss": 2.297, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.00012186868358791756, |
|
"loss": 2.2437, |
|
"step": 2259 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.00012167217171462566, |
|
"loss": 2.2756, |
|
"step": 2262 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00012147557198617678, |
|
"loss": 2.2352, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.00012127888519955157, |
|
"loss": 2.2638, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.0001210821121520835, |
|
"loss": 2.1679, |
|
"step": 2271 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 0.0001208852536414557, |
|
"loss": 2.2499, |
|
"step": 2274 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.00012068831046569789, |
|
"loss": 2.2459, |
|
"step": 2277 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.00012049128342318288, |
|
"loss": 2.1518, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.00012029417331262349, |
|
"loss": 2.246, |
|
"step": 2283 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.00012009698093306936, |
|
"loss": 2.2471, |
|
"step": 2286 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.00011989970708390353, |
|
"loss": 2.2332, |
|
"step": 2289 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.00011970235256483934, |
|
"loss": 2.2554, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.00011950491817591717, |
|
"loss": 2.2454, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.00011930740471750121, |
|
"loss": 2.2378, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.00011910981299027608, |
|
"loss": 2.2978, |
|
"step": 2301 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.00011891214379524375, |
|
"loss": 2.2042, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.0001187143979337203, |
|
"loss": 2.2105, |
|
"step": 2307 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.00011851657620733243, |
|
"loss": 2.2343, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.00011831867941801455, |
|
"loss": 2.2197, |
|
"step": 2313 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.00011812070836800533, |
|
"loss": 2.3094, |
|
"step": 2316 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.00011792266385984433, |
|
"loss": 2.2106, |
|
"step": 2319 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.00011772454669636912, |
|
"loss": 2.3657, |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.00011752635768071167, |
|
"loss": 2.3096, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.0001173280976162952, |
|
"loss": 2.1856, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 0.00011712976730683108, |
|
"loss": 2.236, |
|
"step": 2331 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.00011693136755631528, |
|
"loss": 2.2304, |
|
"step": 2334 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 0.00011673289916902539, |
|
"loss": 2.2346, |
|
"step": 2337 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.00011653436294951724, |
|
"loss": 2.2353, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.00011633575970262152, |
|
"loss": 2.2511, |
|
"step": 2343 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.0001161370902334408, |
|
"loss": 2.2557, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.00011593835534734596, |
|
"loss": 2.2501, |
|
"step": 2349 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.00011573955584997318, |
|
"loss": 2.2231, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.00011554069254722051, |
|
"loss": 2.1851, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.00011534176624524464, |
|
"loss": 2.2573, |
|
"step": 2358 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 0.00011514277775045768, |
|
"loss": 2.2314, |
|
"step": 2361 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.00011494372786952384, |
|
"loss": 2.1805, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.00011474461740935621, |
|
"loss": 2.2441, |
|
"step": 2367 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.0001145454471771134, |
|
"loss": 2.2018, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.0001143462179801964, |
|
"loss": 2.1672, |
|
"step": 2373 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.00011414693062624515, |
|
"loss": 2.2151, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.00011394758592313543, |
|
"loss": 2.2059, |
|
"step": 2379 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 0.00011374818467897541, |
|
"loss": 2.2516, |
|
"step": 2382 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.00011354872770210256, |
|
"loss": 2.1991, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.00011334921580108027, |
|
"loss": 2.2307, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.00011314964978469445, |
|
"loss": 2.1478, |
|
"step": 2391 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.00011295003046195058, |
|
"loss": 2.1494, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.00011275035864207017, |
|
"loss": 2.289, |
|
"step": 2397 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.00011255063513448743, |
|
"loss": 2.1709, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.00011235086074884622, |
|
"loss": 2.2586, |
|
"step": 2403 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.00011215103629499661, |
|
"loss": 2.2279, |
|
"step": 2406 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.00011195116258299169, |
|
"loss": 2.2403, |
|
"step": 2409 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00011175124042308416, |
|
"loss": 2.2453, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.00011155127062572314, |
|
"loss": 2.2293, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 0.00011135125400155091, |
|
"loss": 2.2598, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.00011115119136139951, |
|
"loss": 2.2331, |
|
"step": 2421 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.00011095108351628758, |
|
"loss": 2.2416, |
|
"step": 2424 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.00011075093127741695, |
|
"loss": 2.1992, |
|
"step": 2427 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 0.00011055073545616952, |
|
"loss": 2.1727, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.0001103504968641037, |
|
"loss": 2.2371, |
|
"step": 2433 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.00011015021631295149, |
|
"loss": 2.23, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.00010994989461461476, |
|
"loss": 2.1677, |
|
"step": 2439 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.00010974953258116238, |
|
"loss": 2.2252, |
|
"step": 2442 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.00010954913102482664, |
|
"loss": 2.2119, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.000109348690758, |
|
"loss": 2.2567, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.00010914821259323202, |
|
"loss": 2.2209, |
|
"step": 2451 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.00010894769734322567, |
|
"loss": 2.2701, |
|
"step": 2454 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 0.00010874714582083438, |
|
"loss": 2.1552, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.00010854655883905869, |
|
"loss": 2.1527, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001083459372110427, |
|
"loss": 2.2013, |
|
"step": 2463 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6309924029431118, |
|
"eval_loss": 2.1445603370666504, |
|
"eval_runtime": 16.3353, |
|
"eval_samples_per_second": 134.494, |
|
"eval_steps_per_second": 67.278, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.00010814528175007108, |
|
"loss": 2.22, |
|
"step": 2466 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.0001079445932695657, |
|
"loss": 2.2225, |
|
"step": 2469 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 0.00010774387258308217, |
|
"loss": 2.2667, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.00010754312050430668, |
|
"loss": 2.2468, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.00010734233784705276, |
|
"loss": 2.2416, |
|
"step": 2478 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.00010714152542525781, |
|
"loss": 2.1588, |
|
"step": 2481 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.00010694068405297997, |
|
"loss": 2.2093, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.00010673981454439463, |
|
"loss": 2.2511, |
|
"step": 2487 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 0.00010653891771379134, |
|
"loss": 2.2265, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.00010633799437557039, |
|
"loss": 2.2257, |
|
"step": 2493 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.00010613704534423949, |
|
"loss": 2.2588, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.00010593607143441053, |
|
"loss": 2.1435, |
|
"step": 2499 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.00010573507346079625, |
|
"loss": 2.1494, |
|
"step": 2502 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 0.00010553405223820693, |
|
"loss": 2.2067, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.00010533300858154715, |
|
"loss": 2.2174, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.00010513194330581233, |
|
"loss": 2.168, |
|
"step": 2511 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 0.00010493085722608562, |
|
"loss": 2.2395, |
|
"step": 2514 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.00010472975115753452, |
|
"loss": 2.1584, |
|
"step": 2517 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 0.00010452862591540742, |
|
"loss": 2.1738, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 0.0001043274823150306, |
|
"loss": 2.2533, |
|
"step": 2523 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.00010412632117180471, |
|
"loss": 2.308, |
|
"step": 2526 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.00010392514330120145, |
|
"loss": 2.1664, |
|
"step": 2529 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 0.00010372394951876043, |
|
"loss": 2.2669, |
|
"step": 2532 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.00010352274064008567, |
|
"loss": 2.1636, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 0.00010332151748084242, |
|
"loss": 2.2729, |
|
"step": 2538 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.00010312028085675391, |
|
"loss": 2.2097, |
|
"step": 2541 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.00010291903158359783, |
|
"loss": 2.2306, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 0.0001027177704772032, |
|
"loss": 2.1675, |
|
"step": 2547 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 0.00010251649835344696, |
|
"loss": 2.266, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.0001023152160282508, |
|
"loss": 2.2716, |
|
"step": 2553 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.00010211392431757773, |
|
"loss": 2.2013, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 0.00010191262403742878, |
|
"loss": 2.1526, |
|
"step": 2559 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.00010171131600383974, |
|
"loss": 2.1521, |
|
"step": 2562 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.00010151000103287784, |
|
"loss": 2.2781, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 0.00010130867994063839, |
|
"loss": 2.1813, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 0.00010110735354324159, |
|
"loss": 2.2153, |
|
"step": 2571 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 0.00010090602265682906, |
|
"loss": 2.2226, |
|
"step": 2574 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.00010070468809756068, |
|
"loss": 2.2177, |
|
"step": 2577 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.00010050335068161123, |
|
"loss": 2.186, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 0.00010030201122516696, |
|
"loss": 2.2026, |
|
"step": 2583 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.00010010067054442251, |
|
"loss": 2.2229, |
|
"step": 2586 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 9.989932945557751e-05, |
|
"loss": 2.2416, |
|
"step": 2589 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 9.969798877483308e-05, |
|
"loss": 2.2854, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 9.949664931838882e-05, |
|
"loss": 2.158, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 9.929531190243932e-05, |
|
"loss": 2.2394, |
|
"step": 2598 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 9.909397734317095e-05, |
|
"loss": 2.1703, |
|
"step": 2601 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 9.889264645675843e-05, |
|
"loss": 2.2031, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 9.869132005936163e-05, |
|
"loss": 2.2224, |
|
"step": 2607 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 9.848999896712217e-05, |
|
"loss": 2.2693, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 9.82886839961603e-05, |
|
"loss": 2.2381, |
|
"step": 2613 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 9.808737596257121e-05, |
|
"loss": 2.1839, |
|
"step": 2616 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 9.788607568242229e-05, |
|
"loss": 2.129, |
|
"step": 2619 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 9.768478397174922e-05, |
|
"loss": 2.121, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 9.748350164655306e-05, |
|
"loss": 2.2323, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 9.728222952279684e-05, |
|
"loss": 2.2613, |
|
"step": 2628 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 9.708096841640222e-05, |
|
"loss": 2.1113, |
|
"step": 2631 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 9.687971914324607e-05, |
|
"loss": 2.1729, |
|
"step": 2634 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 9.667848251915758e-05, |
|
"loss": 2.2271, |
|
"step": 2637 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 9.647725935991436e-05, |
|
"loss": 2.2319, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 9.627605048123959e-05, |
|
"loss": 2.2244, |
|
"step": 2643 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 9.607485669879857e-05, |
|
"loss": 2.1239, |
|
"step": 2646 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 9.587367882819532e-05, |
|
"loss": 2.2429, |
|
"step": 2649 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 9.567251768496938e-05, |
|
"loss": 2.1936, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 9.547137408459257e-05, |
|
"loss": 2.2038, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 9.52702488424655e-05, |
|
"loss": 2.1978, |
|
"step": 2658 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 9.506914277391439e-05, |
|
"loss": 2.1977, |
|
"step": 2661 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 9.486805669418769e-05, |
|
"loss": 2.1772, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 9.466699141845287e-05, |
|
"loss": 2.1929, |
|
"step": 2667 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 9.446594776179306e-05, |
|
"loss": 2.2712, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 9.426492653920375e-05, |
|
"loss": 2.2606, |
|
"step": 2673 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 9.406392856558949e-05, |
|
"loss": 2.1655, |
|
"step": 2676 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 9.386295465576053e-05, |
|
"loss": 2.1544, |
|
"step": 2679 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 9.366200562442963e-05, |
|
"loss": 2.2016, |
|
"step": 2682 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 9.346108228620868e-05, |
|
"loss": 2.3044, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 9.326018545560542e-05, |
|
"loss": 2.2102, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 9.305931594702007e-05, |
|
"loss": 2.1511, |
|
"step": 2691 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 9.28584745747422e-05, |
|
"loss": 2.212, |
|
"step": 2694 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 9.265766215294725e-05, |
|
"loss": 2.1871, |
|
"step": 2697 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 9.245687949569332e-05, |
|
"loss": 2.226, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 9.225612741691788e-05, |
|
"loss": 2.1323, |
|
"step": 2703 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 9.205540673043434e-05, |
|
"loss": 2.1258, |
|
"step": 2706 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 9.185471824992891e-05, |
|
"loss": 2.1963, |
|
"step": 2709 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 9.165406278895732e-05, |
|
"loss": 2.2423, |
|
"step": 2712 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 9.145344116094134e-05, |
|
"loss": 2.1678, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 9.125285417916563e-05, |
|
"loss": 2.196, |
|
"step": 2718 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 9.105230265677437e-05, |
|
"loss": 2.1637, |
|
"step": 2721 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 9.085178740676803e-05, |
|
"loss": 2.1019, |
|
"step": 2724 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 9.065130924199998e-05, |
|
"loss": 2.1651, |
|
"step": 2727 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 9.045086897517337e-05, |
|
"loss": 2.2656, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 9.025046741883764e-05, |
|
"loss": 2.2224, |
|
"step": 2733 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 9.005010538538527e-05, |
|
"loss": 2.1764, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 8.984978368704855e-05, |
|
"loss": 2.1928, |
|
"step": 2739 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 8.964950313589633e-05, |
|
"loss": 2.111, |
|
"step": 2742 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 8.944926454383049e-05, |
|
"loss": 2.2286, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 8.924906872258306e-05, |
|
"loss": 2.18, |
|
"step": 2748 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 8.904891648371244e-05, |
|
"loss": 2.1869, |
|
"step": 2751 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 8.884880863860051e-05, |
|
"loss": 2.2054, |
|
"step": 2754 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 8.864874599844911e-05, |
|
"loss": 2.1351, |
|
"step": 2757 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 8.84487293742769e-05, |
|
"loss": 2.2412, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 8.824875957691588e-05, |
|
"loss": 2.2394, |
|
"step": 2763 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 8.804883741700833e-05, |
|
"loss": 2.1228, |
|
"step": 2766 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 8.78489637050034e-05, |
|
"loss": 2.1862, |
|
"step": 2769 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 8.764913925115381e-05, |
|
"loss": 2.1463, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6357306777396295, |
|
"eval_loss": 2.106170654296875, |
|
"eval_runtime": 16.2999, |
|
"eval_samples_per_second": 134.786, |
|
"eval_steps_per_second": 67.424, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 8.744936486551262e-05, |
|
"loss": 2.2427, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 8.724964135792988e-05, |
|
"loss": 2.105, |
|
"step": 2778 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 8.70499695380494e-05, |
|
"loss": 2.2257, |
|
"step": 2781 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 8.685035021530554e-05, |
|
"loss": 2.2433, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 8.665078419891977e-05, |
|
"loss": 2.1763, |
|
"step": 2787 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 8.645127229789746e-05, |
|
"loss": 2.2108, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 8.625181532102463e-05, |
|
"loss": 2.115, |
|
"step": 2793 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 8.605241407686462e-05, |
|
"loss": 2.2288, |
|
"step": 2796 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 8.585306937375486e-05, |
|
"loss": 2.1351, |
|
"step": 2799 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 8.565378201980361e-05, |
|
"loss": 2.1875, |
|
"step": 2802 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 8.545455282288661e-05, |
|
"loss": 2.1558, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 8.525538259064381e-05, |
|
"loss": 2.2156, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 8.505627213047617e-05, |
|
"loss": 2.1982, |
|
"step": 2811 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 8.485722224954237e-05, |
|
"loss": 2.1447, |
|
"step": 2814 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 8.465823375475537e-05, |
|
"loss": 2.2311, |
|
"step": 2817 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 8.445930745277953e-05, |
|
"loss": 2.1685, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 8.426044415002684e-05, |
|
"loss": 2.1237, |
|
"step": 2823 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 8.406164465265406e-05, |
|
"loss": 2.1082, |
|
"step": 2826 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 8.386290976655924e-05, |
|
"loss": 2.1352, |
|
"step": 2829 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 8.366424029737853e-05, |
|
"loss": 2.1588, |
|
"step": 2832 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 8.346563705048277e-05, |
|
"loss": 2.2979, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 8.326710083097462e-05, |
|
"loss": 2.1507, |
|
"step": 2838 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 8.306863244368474e-05, |
|
"loss": 2.127, |
|
"step": 2841 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 8.287023269316894e-05, |
|
"loss": 2.1869, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 8.267190238370482e-05, |
|
"loss": 2.1259, |
|
"step": 2847 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 8.247364231928837e-05, |
|
"loss": 2.2649, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 8.227545330363087e-05, |
|
"loss": 2.1888, |
|
"step": 2853 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 8.207733614015566e-05, |
|
"loss": 2.1949, |
|
"step": 2856 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 8.18792916319947e-05, |
|
"loss": 2.1379, |
|
"step": 2859 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 8.168132058198546e-05, |
|
"loss": 2.1585, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 8.148342379266759e-05, |
|
"loss": 2.1941, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 8.128560206627974e-05, |
|
"loss": 2.1447, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 8.108785620475624e-05, |
|
"loss": 2.1413, |
|
"step": 2871 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 8.089018700972393e-05, |
|
"loss": 2.25, |
|
"step": 2874 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 8.069259528249882e-05, |
|
"loss": 2.2048, |
|
"step": 2877 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 8.049508182408284e-05, |
|
"loss": 2.1432, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 8.029764743516068e-05, |
|
"loss": 2.2312, |
|
"step": 2883 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 8.01002929160965e-05, |
|
"loss": 2.1929, |
|
"step": 2886 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 7.990301906693069e-05, |
|
"loss": 2.1687, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 7.970582668737652e-05, |
|
"loss": 2.1176, |
|
"step": 2892 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 7.950871657681716e-05, |
|
"loss": 2.1793, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 7.931168953430213e-05, |
|
"loss": 2.1538, |
|
"step": 2898 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 7.91147463585443e-05, |
|
"loss": 2.1519, |
|
"step": 2901 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 7.891788784791655e-05, |
|
"loss": 2.1821, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 7.872111480044847e-05, |
|
"loss": 2.145, |
|
"step": 2907 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 7.852442801382322e-05, |
|
"loss": 2.1574, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 7.832782828537437e-05, |
|
"loss": 2.2808, |
|
"step": 2913 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 7.813131641208245e-05, |
|
"loss": 2.1604, |
|
"step": 2916 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 7.793489319057195e-05, |
|
"loss": 2.1786, |
|
"step": 2919 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 7.773855941710786e-05, |
|
"loss": 2.2453, |
|
"step": 2922 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 7.754231588759265e-05, |
|
"loss": 2.2529, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 7.734616339756291e-05, |
|
"loss": 2.1199, |
|
"step": 2928 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 7.715010274218625e-05, |
|
"loss": 2.2108, |
|
"step": 2931 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 7.695413471625792e-05, |
|
"loss": 2.2136, |
|
"step": 2934 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 7.675826011419766e-05, |
|
"loss": 2.1859, |
|
"step": 2937 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 7.656247973004656e-05, |
|
"loss": 2.1492, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 7.63667943574637e-05, |
|
"loss": 2.2054, |
|
"step": 2943 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 7.617120478972297e-05, |
|
"loss": 2.1295, |
|
"step": 2946 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 7.597571181971006e-05, |
|
"loss": 2.0959, |
|
"step": 2949 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 7.578031623991886e-05, |
|
"loss": 2.2796, |
|
"step": 2952 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 7.558501884244857e-05, |
|
"loss": 2.0984, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 7.538982041900033e-05, |
|
"loss": 2.2107, |
|
"step": 2958 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 7.519472176087414e-05, |
|
"loss": 2.1768, |
|
"step": 2961 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 7.49997236589654e-05, |
|
"loss": 2.1817, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 7.480482690376207e-05, |
|
"loss": 2.1886, |
|
"step": 2967 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 7.461003228534115e-05, |
|
"loss": 2.2048, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 7.441534059336563e-05, |
|
"loss": 2.1702, |
|
"step": 2973 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 7.422075261708125e-05, |
|
"loss": 2.1699, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 7.402626914531328e-05, |
|
"loss": 2.1161, |
|
"step": 2979 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 7.383189096646335e-05, |
|
"loss": 2.1665, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 7.363761886850633e-05, |
|
"loss": 2.196, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 7.344345363898697e-05, |
|
"loss": 2.1785, |
|
"step": 2988 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 7.324939606501685e-05, |
|
"loss": 2.1046, |
|
"step": 2991 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 7.305544693327106e-05, |
|
"loss": 2.1544, |
|
"step": 2994 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 7.286160702998515e-05, |
|
"loss": 2.1705, |
|
"step": 2997 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 7.266787714095182e-05, |
|
"loss": 2.2029, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 7.247425805151788e-05, |
|
"loss": 2.1483, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 7.228075054658096e-05, |
|
"loss": 2.1243, |
|
"step": 3006 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 7.208735541058622e-05, |
|
"loss": 2.0956, |
|
"step": 3009 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 7.189407342752345e-05, |
|
"loss": 2.1788, |
|
"step": 3012 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 7.170090538092367e-05, |
|
"loss": 2.2902, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 7.150785205385596e-05, |
|
"loss": 2.0941, |
|
"step": 3018 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 7.131491422892454e-05, |
|
"loss": 2.1134, |
|
"step": 3021 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 7.112209268826517e-05, |
|
"loss": 2.1195, |
|
"step": 3024 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 7.092938821354238e-05, |
|
"loss": 2.2264, |
|
"step": 3027 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 7.073680158594609e-05, |
|
"loss": 2.205, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 7.054433358618847e-05, |
|
"loss": 2.1962, |
|
"step": 3033 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 7.035198499450071e-05, |
|
"loss": 2.1881, |
|
"step": 3036 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 7.015975659063017e-05, |
|
"loss": 2.1807, |
|
"step": 3039 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 6.996764915383681e-05, |
|
"loss": 2.1038, |
|
"step": 3042 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 6.97756634628902e-05, |
|
"loss": 2.1783, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 6.95838002960665e-05, |
|
"loss": 2.1281, |
|
"step": 3048 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 6.939206043114506e-05, |
|
"loss": 2.1633, |
|
"step": 3051 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 6.920044464540543e-05, |
|
"loss": 2.1237, |
|
"step": 3054 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 6.900895371562419e-05, |
|
"loss": 2.1732, |
|
"step": 3057 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 6.881758841807176e-05, |
|
"loss": 2.139, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 6.862634952850926e-05, |
|
"loss": 2.1296, |
|
"step": 3063 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 6.843523782218534e-05, |
|
"loss": 2.1914, |
|
"step": 3066 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 6.824425407383315e-05, |
|
"loss": 2.1584, |
|
"step": 3069 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 6.805339905766706e-05, |
|
"loss": 2.128, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 6.786267354737955e-05, |
|
"loss": 2.082, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 6.767207831613828e-05, |
|
"loss": 2.0882, |
|
"step": 3078 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.636978362829467, |
|
"eval_loss": 2.084742307662964, |
|
"eval_runtime": 16.3444, |
|
"eval_samples_per_second": 134.419, |
|
"eval_steps_per_second": 67.24, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 6.748161413658256e-05, |
|
"loss": 2.0895, |
|
"step": 3081 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 6.729128178082058e-05, |
|
"loss": 2.143, |
|
"step": 3084 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 6.71010820204261e-05, |
|
"loss": 2.1523, |
|
"step": 3087 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 6.691101562643534e-05, |
|
"loss": 2.1812, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 6.672108336934386e-05, |
|
"loss": 2.1807, |
|
"step": 3093 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 6.653128601910357e-05, |
|
"loss": 2.1916, |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 6.63416243451194e-05, |
|
"loss": 2.2036, |
|
"step": 3099 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"learning_rate": 6.615209911624623e-05, |
|
"loss": 2.1112, |
|
"step": 3102 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 6.596271110078591e-05, |
|
"loss": 2.0984, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 6.577346106648399e-05, |
|
"loss": 2.2862, |
|
"step": 3108 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 6.558434978052667e-05, |
|
"loss": 2.1379, |
|
"step": 3111 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 6.539537800953777e-05, |
|
"loss": 2.1475, |
|
"step": 3114 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 6.520654651957543e-05, |
|
"loss": 2.144, |
|
"step": 3117 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 6.50178560761292e-05, |
|
"loss": 2.1383, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 6.482930744411677e-05, |
|
"loss": 2.0835, |
|
"step": 3123 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 6.464090138788102e-05, |
|
"loss": 2.0889, |
|
"step": 3126 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 6.445263867118679e-05, |
|
"loss": 2.2135, |
|
"step": 3129 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 6.426452005721797e-05, |
|
"loss": 2.1301, |
|
"step": 3132 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 6.407654630857416e-05, |
|
"loss": 2.0498, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 6.388871818726774e-05, |
|
"loss": 2.0766, |
|
"step": 3138 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 6.370103645472072e-05, |
|
"loss": 2.195, |
|
"step": 3141 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 6.351350187176176e-05, |
|
"loss": 2.2205, |
|
"step": 3144 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 6.332611519862284e-05, |
|
"loss": 2.1172, |
|
"step": 3147 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 6.313887719493657e-05, |
|
"loss": 2.1688, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 6.295178861973267e-05, |
|
"loss": 2.0986, |
|
"step": 3153 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 6.27648502314352e-05, |
|
"loss": 2.0658, |
|
"step": 3156 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 6.257806278785937e-05, |
|
"loss": 2.1681, |
|
"step": 3159 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 6.239142704620853e-05, |
|
"loss": 2.2028, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 6.220494376307094e-05, |
|
"loss": 2.1707, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 6.201861369441697e-05, |
|
"loss": 2.2144, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 6.183243759559579e-05, |
|
"loss": 2.2155, |
|
"step": 3171 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 6.164641622133241e-05, |
|
"loss": 2.1628, |
|
"step": 3174 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 6.146055032572466e-05, |
|
"loss": 2.1457, |
|
"step": 3177 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 6.127484066224005e-05, |
|
"loss": 2.079, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 6.108928798371272e-05, |
|
"loss": 2.1211, |
|
"step": 3183 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 6.090389304234052e-05, |
|
"loss": 2.1723, |
|
"step": 3186 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 6.0718656589681764e-05, |
|
"loss": 2.1723, |
|
"step": 3189 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 6.053357937665237e-05, |
|
"loss": 2.1179, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 6.034866215352262e-05, |
|
"loss": 2.2066, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 6.016390566991429e-05, |
|
"loss": 2.1562, |
|
"step": 3198 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 5.997931067479753e-05, |
|
"loss": 2.1374, |
|
"step": 3201 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 5.979487791648789e-05, |
|
"loss": 2.1595, |
|
"step": 3204 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 5.961060814264321e-05, |
|
"loss": 2.194, |
|
"step": 3207 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 5.942650210026055e-05, |
|
"loss": 2.1749, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 5.9242560535673344e-05, |
|
"loss": 2.207, |
|
"step": 3213 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 5.905878419454821e-05, |
|
"loss": 2.1641, |
|
"step": 3216 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 5.8875173821881904e-05, |
|
"loss": 2.2015, |
|
"step": 3219 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 5.869173016199858e-05, |
|
"loss": 2.0588, |
|
"step": 3222 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"learning_rate": 5.850845395854636e-05, |
|
"loss": 2.1809, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 5.8325345954494633e-05, |
|
"loss": 2.0862, |
|
"step": 3228 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 5.814240689213086e-05, |
|
"loss": 2.1122, |
|
"step": 3231 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 5.795963751305777e-05, |
|
"loss": 2.1289, |
|
"step": 3234 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 5.77770385581901e-05, |
|
"loss": 2.1519, |
|
"step": 3237 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 5.759461076775177e-05, |
|
"loss": 2.1731, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 5.7412354881272865e-05, |
|
"loss": 2.1847, |
|
"step": 3243 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 5.7230271637586555e-05, |
|
"loss": 2.2063, |
|
"step": 3246 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 5.7048361774826086e-05, |
|
"loss": 2.1409, |
|
"step": 3249 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 5.686662603042201e-05, |
|
"loss": 2.0635, |
|
"step": 3252 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 5.668506514109887e-05, |
|
"loss": 2.0779, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 5.6503679842872506e-05, |
|
"loss": 2.0536, |
|
"step": 3258 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 5.6322470871046825e-05, |
|
"loss": 2.1569, |
|
"step": 3261 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 5.6141438960211065e-05, |
|
"loss": 2.1513, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 5.596058484423656e-05, |
|
"loss": 2.1937, |
|
"step": 3267 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 5.5779909256274035e-05, |
|
"loss": 2.1962, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 5.559941292875035e-05, |
|
"loss": 2.1182, |
|
"step": 3273 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 5.5419096593365724e-05, |
|
"loss": 2.1865, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 5.523896098109079e-05, |
|
"loss": 2.158, |
|
"step": 3279 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 5.505900682216354e-05, |
|
"loss": 2.0896, |
|
"step": 3282 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 5.487923484608629e-05, |
|
"loss": 2.1242, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 5.469964578162288e-05, |
|
"loss": 2.1423, |
|
"step": 3288 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 5.4520240356795725e-05, |
|
"loss": 2.114, |
|
"step": 3291 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 5.4341019298882656e-05, |
|
"loss": 2.1531, |
|
"step": 3294 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 5.416198333441423e-05, |
|
"loss": 2.1431, |
|
"step": 3297 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 5.3983133189170686e-05, |
|
"loss": 2.0837, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 5.380446958817888e-05, |
|
"loss": 2.1647, |
|
"step": 3303 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 5.362599325570945e-05, |
|
"loss": 2.1104, |
|
"step": 3306 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 5.344770491527402e-05, |
|
"loss": 2.11, |
|
"step": 3309 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 5.3269605289621947e-05, |
|
"loss": 2.1962, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 5.309169510073777e-05, |
|
"loss": 2.213, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 5.291397506983786e-05, |
|
"loss": 2.0556, |
|
"step": 3318 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 5.273644591736793e-05, |
|
"loss": 2.2094, |
|
"step": 3321 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 5.2559108362999796e-05, |
|
"loss": 2.0953, |
|
"step": 3324 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 5.238196312562851e-05, |
|
"loss": 2.2436, |
|
"step": 3327 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 5.220501092336966e-05, |
|
"loss": 2.1752, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 5.2028252473556226e-05, |
|
"loss": 2.1757, |
|
"step": 3333 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 5.1851688492735705e-05, |
|
"loss": 2.193, |
|
"step": 3336 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 5.167531969666735e-05, |
|
"loss": 2.1306, |
|
"step": 3339 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 5.149914680031909e-05, |
|
"loss": 2.1043, |
|
"step": 3342 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 5.132317051786468e-05, |
|
"loss": 2.1032, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 5.114739156268094e-05, |
|
"loss": 2.1205, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 5.097181064734475e-05, |
|
"loss": 2.1494, |
|
"step": 3351 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 5.0796428483630074e-05, |
|
"loss": 2.1382, |
|
"step": 3354 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 5.062124578250529e-05, |
|
"loss": 2.0946, |
|
"step": 3357 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 5.04462632541301e-05, |
|
"loss": 2.0986, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 5.027148160785273e-05, |
|
"loss": 2.1342, |
|
"step": 3363 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 5.009690155220715e-05, |
|
"loss": 2.1049, |
|
"step": 3366 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 4.992252379491012e-05, |
|
"loss": 2.2269, |
|
"step": 3369 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 4.974834904285822e-05, |
|
"loss": 2.1275, |
|
"step": 3372 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 4.957437800212512e-05, |
|
"loss": 2.0984, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 4.940061137795876e-05, |
|
"loss": 2.1488, |
|
"step": 3378 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 4.9227049874778306e-05, |
|
"loss": 2.1061, |
|
"step": 3381 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 4.905369419617137e-05, |
|
"loss": 2.2105, |
|
"step": 3384 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.888054504489142e-05, |
|
"loss": 2.1669, |
|
"step": 3387 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6398645281002108, |
|
"eval_loss": 2.068744421005249, |
|
"eval_runtime": 16.3144, |
|
"eval_samples_per_second": 134.666, |
|
"eval_steps_per_second": 67.364, |
|
"step": 3388 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.870760312285445e-05, |
|
"loss": 2.1265, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 4.853486913113644e-05, |
|
"loss": 2.2385, |
|
"step": 3393 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 4.836234376997056e-05, |
|
"loss": 2.0393, |
|
"step": 3396 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 4.8190027738744134e-05, |
|
"loss": 2.0961, |
|
"step": 3399 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 4.801792173599586e-05, |
|
"loss": 2.0916, |
|
"step": 3402 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 4.784602645941314e-05, |
|
"loss": 2.1191, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 4.7674342605829094e-05, |
|
"loss": 2.0921, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 4.7502870871219675e-05, |
|
"loss": 2.0888, |
|
"step": 3411 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 4.7331611950701096e-05, |
|
"loss": 2.1485, |
|
"step": 3414 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 4.716056653852672e-05, |
|
"loss": 2.1266, |
|
"step": 3417 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 4.698973532808443e-05, |
|
"loss": 2.1702, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 4.6819119011893805e-05, |
|
"loss": 2.1296, |
|
"step": 3423 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 4.664871828160331e-05, |
|
"loss": 2.0754, |
|
"step": 3426 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 4.647853382798736e-05, |
|
"loss": 2.1102, |
|
"step": 3429 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 4.630856634094366e-05, |
|
"loss": 2.1222, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 4.613881650949044e-05, |
|
"loss": 2.1703, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 4.596928502176349e-05, |
|
"loss": 2.1288, |
|
"step": 3438 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"learning_rate": 4.579997256501355e-05, |
|
"loss": 2.0522, |
|
"step": 3441 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 4.563087982560345e-05, |
|
"loss": 2.1225, |
|
"step": 3444 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 4.546200748900525e-05, |
|
"loss": 2.068, |
|
"step": 3447 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.529335623979757e-05, |
|
"loss": 2.0433, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 4.512492676166283e-05, |
|
"loss": 2.0679, |
|
"step": 3453 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 4.49567197373844e-05, |
|
"loss": 2.1568, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 4.478873584884378e-05, |
|
"loss": 2.0446, |
|
"step": 3459 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 4.4620975777018034e-05, |
|
"loss": 2.1532, |
|
"step": 3462 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 4.44534402019769e-05, |
|
"loss": 2.1659, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 4.428612980287996e-05, |
|
"loss": 2.1492, |
|
"step": 3468 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 4.411904525797408e-05, |
|
"loss": 2.1019, |
|
"step": 3471 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 4.395218724459047e-05, |
|
"loss": 2.0283, |
|
"step": 3474 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 4.3785556439142005e-05, |
|
"loss": 2.1538, |
|
"step": 3477 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.361915351712059e-05, |
|
"loss": 2.1303, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 4.345297915309432e-05, |
|
"loss": 2.0312, |
|
"step": 3483 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 4.3287034020704684e-05, |
|
"loss": 2.1296, |
|
"step": 3486 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 4.3121318792663914e-05, |
|
"loss": 2.1551, |
|
"step": 3489 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 4.295583414075234e-05, |
|
"loss": 2.1197, |
|
"step": 3492 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 4.279058073581544e-05, |
|
"loss": 2.2175, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 4.2625559247761394e-05, |
|
"loss": 2.1445, |
|
"step": 3498 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 4.246077034555819e-05, |
|
"loss": 2.1581, |
|
"step": 3501 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 4.229621469723091e-05, |
|
"loss": 2.0796, |
|
"step": 3504 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"learning_rate": 4.2131892969859054e-05, |
|
"loss": 2.1374, |
|
"step": 3507 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 4.196780582957396e-05, |
|
"loss": 2.1188, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 4.180395394155584e-05, |
|
"loss": 2.0809, |
|
"step": 3513 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 4.1640337970031384e-05, |
|
"loss": 2.1211, |
|
"step": 3516 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 4.1476958578270783e-05, |
|
"loss": 2.1566, |
|
"step": 3519 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 4.1313816428585316e-05, |
|
"loss": 2.1824, |
|
"step": 3522 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 4.1150912182324396e-05, |
|
"loss": 2.1873, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 4.098824649987304e-05, |
|
"loss": 2.1329, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"learning_rate": 4.0825820040649246e-05, |
|
"loss": 2.1283, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 4.06636334631012e-05, |
|
"loss": 2.1378, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 4.0501687424704613e-05, |
|
"loss": 2.1189, |
|
"step": 3537 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 4.033998258196019e-05, |
|
"loss": 2.1541, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 4.017851959039075e-05, |
|
"loss": 2.1264, |
|
"step": 3543 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 4.001729910453872e-05, |
|
"loss": 2.0864, |
|
"step": 3546 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 3.985632177796353e-05, |
|
"loss": 2.0457, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"learning_rate": 3.9695588263238847e-05, |
|
"loss": 2.1076, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 3.953509921194991e-05, |
|
"loss": 2.1368, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 3.9374855274691035e-05, |
|
"loss": 2.1462, |
|
"step": 3558 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 3.921485710106283e-05, |
|
"loss": 2.1003, |
|
"step": 3561 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 3.9055105339669595e-05, |
|
"loss": 2.1287, |
|
"step": 3564 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 3.889560063811679e-05, |
|
"loss": 2.144, |
|
"step": 3567 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 3.873634364300835e-05, |
|
"loss": 2.1544, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 3.857733499994397e-05, |
|
"loss": 2.0732, |
|
"step": 3573 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 3.841857535351657e-05, |
|
"loss": 2.1219, |
|
"step": 3576 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 3.82600653473098e-05, |
|
"loss": 2.1629, |
|
"step": 3579 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 3.810180562389519e-05, |
|
"loss": 2.1042, |
|
"step": 3582 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 3.794379682482965e-05, |
|
"loss": 2.1244, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 3.7786039590653076e-05, |
|
"loss": 2.18, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 3.762853456088538e-05, |
|
"loss": 2.1475, |
|
"step": 3591 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 3.747128237402409e-05, |
|
"loss": 2.0909, |
|
"step": 3594 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 3.7314283667541885e-05, |
|
"loss": 2.123, |
|
"step": 3597 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"learning_rate": 3.715753907788374e-05, |
|
"loss": 2.121, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 3.700104924046452e-05, |
|
"loss": 2.0345, |
|
"step": 3603 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 3.6844814789666436e-05, |
|
"loss": 2.0413, |
|
"step": 3606 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 3.6688836358836386e-05, |
|
"loss": 2.1818, |
|
"step": 3609 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 3.6533114580283315e-05, |
|
"loss": 2.1043, |
|
"step": 3612 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 3.6377650085275874e-05, |
|
"loss": 2.0476, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 3.622244350403965e-05, |
|
"loss": 2.124, |
|
"step": 3618 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 3.6067495465754666e-05, |
|
"loss": 2.0856, |
|
"step": 3621 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 3.591280659855296e-05, |
|
"loss": 2.1257, |
|
"step": 3624 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 3.575837752951591e-05, |
|
"loss": 2.1757, |
|
"step": 3627 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 3.5604208884671645e-05, |
|
"loss": 2.0124, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 3.5450301288992596e-05, |
|
"loss": 2.0324, |
|
"step": 3633 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 3.529665536639305e-05, |
|
"loss": 2.1634, |
|
"step": 3636 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 3.514327173972638e-05, |
|
"loss": 2.1465, |
|
"step": 3639 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 3.4990151030782744e-05, |
|
"loss": 2.0668, |
|
"step": 3642 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 3.483729386028651e-05, |
|
"loss": 2.1991, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 3.468470084789359e-05, |
|
"loss": 2.0814, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 3.4532372612189104e-05, |
|
"loss": 2.1976, |
|
"step": 3651 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 3.438030977068487e-05, |
|
"loss": 2.1935, |
|
"step": 3654 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 3.422851293981676e-05, |
|
"loss": 2.1086, |
|
"step": 3657 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 3.4076982734942296e-05, |
|
"loss": 2.1479, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 3.392571977033819e-05, |
|
"loss": 2.1281, |
|
"step": 3663 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 3.377472465919784e-05, |
|
"loss": 2.1517, |
|
"step": 3666 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 3.3623998013628675e-05, |
|
"loss": 2.1178, |
|
"step": 3669 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"learning_rate": 3.347354044464997e-05, |
|
"loss": 2.0988, |
|
"step": 3672 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 3.332335256219012e-05, |
|
"loss": 2.2034, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"learning_rate": 3.317343497508424e-05, |
|
"loss": 2.1123, |
|
"step": 3678 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 3.302378829107178e-05, |
|
"loss": 2.1258, |
|
"step": 3681 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 3.2874413116794e-05, |
|
"loss": 2.1263, |
|
"step": 3684 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 3.2725310057791456e-05, |
|
"loss": 2.0565, |
|
"step": 3687 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 3.2576479718501584e-05, |
|
"loss": 2.0825, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 3.242792270225635e-05, |
|
"loss": 2.1262, |
|
"step": 3693 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.227963961127961e-05, |
|
"loss": 2.0983, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6422703241176929, |
|
"eval_loss": 2.062872886657715, |
|
"eval_runtime": 16.2609, |
|
"eval_samples_per_second": 135.109, |
|
"eval_steps_per_second": 67.585, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 3.213163104668485e-05, |
|
"loss": 2.0955, |
|
"step": 3699 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 3.19838976084727e-05, |
|
"loss": 2.1043, |
|
"step": 3702 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 3.18364398955284e-05, |
|
"loss": 2.1251, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 3.168925850561943e-05, |
|
"loss": 2.1206, |
|
"step": 3708 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 3.154235403539323e-05, |
|
"loss": 2.0734, |
|
"step": 3711 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 3.1395727080374505e-05, |
|
"loss": 2.178, |
|
"step": 3714 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 3.12493782349631e-05, |
|
"loss": 2.0629, |
|
"step": 3717 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 3.110330809243134e-05, |
|
"loss": 2.1535, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 3.095751724492185e-05, |
|
"loss": 2.1331, |
|
"step": 3723 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 3.081200628344494e-05, |
|
"loss": 2.0902, |
|
"step": 3726 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 3.066677579787631e-05, |
|
"loss": 2.0434, |
|
"step": 3729 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 3.0521826376954755e-05, |
|
"loss": 2.1005, |
|
"step": 3732 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 3.0377158608279655e-05, |
|
"loss": 2.1764, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 3.0232773078308517e-05, |
|
"loss": 2.1099, |
|
"step": 3738 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 3.0088670372354877e-05, |
|
"loss": 2.1211, |
|
"step": 3741 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 2.99448510745856e-05, |
|
"loss": 2.1546, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 2.9801315768018688e-05, |
|
"loss": 2.1664, |
|
"step": 3747 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 2.9658065034520978e-05, |
|
"loss": 2.0983, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 2.9515099454805663e-05, |
|
"loss": 2.0519, |
|
"step": 3753 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 2.93724196084299e-05, |
|
"loss": 2.1333, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 2.923002607379265e-05, |
|
"loss": 2.0304, |
|
"step": 3759 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 2.9087919428132114e-05, |
|
"loss": 2.1549, |
|
"step": 3762 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 2.8946100247523533e-05, |
|
"loss": 2.1191, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 2.8804569106876832e-05, |
|
"loss": 2.1154, |
|
"step": 3768 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 2.8663326579934292e-05, |
|
"loss": 2.175, |
|
"step": 3771 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 2.8522373239268152e-05, |
|
"loss": 2.1378, |
|
"step": 3774 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 2.8381709656278333e-05, |
|
"loss": 2.0927, |
|
"step": 3777 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 2.8241336401190222e-05, |
|
"loss": 2.1146, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 2.810125404305216e-05, |
|
"loss": 2.0147, |
|
"step": 3783 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 2.796146314973325e-05, |
|
"loss": 2.1068, |
|
"step": 3786 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 2.7821964287921197e-05, |
|
"loss": 2.1693, |
|
"step": 3789 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 2.7682758023119694e-05, |
|
"loss": 2.1336, |
|
"step": 3792 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 2.7543844919646323e-05, |
|
"loss": 2.0793, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 2.740522554063033e-05, |
|
"loss": 2.0712, |
|
"step": 3798 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 2.726690044801018e-05, |
|
"loss": 2.0706, |
|
"step": 3801 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 2.7128870202531343e-05, |
|
"loss": 2.0728, |
|
"step": 3804 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 2.6991135363744068e-05, |
|
"loss": 2.1108, |
|
"step": 3807 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 2.6853696490001112e-05, |
|
"loss": 2.104, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"learning_rate": 2.6716554138455353e-05, |
|
"loss": 2.0752, |
|
"step": 3813 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 2.6579708865057694e-05, |
|
"loss": 2.154, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 2.6443161224554704e-05, |
|
"loss": 2.0717, |
|
"step": 3819 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 2.6306911770486353e-05, |
|
"loss": 2.1225, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"learning_rate": 2.6170961055183906e-05, |
|
"loss": 2.1377, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 2.6035309629767603e-05, |
|
"loss": 2.1614, |
|
"step": 3828 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 2.5899958044144302e-05, |
|
"loss": 2.1486, |
|
"step": 3831 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 2.576490684700542e-05, |
|
"loss": 2.1206, |
|
"step": 3834 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 2.5630156585824727e-05, |
|
"loss": 2.0882, |
|
"step": 3837 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 2.5495707806855938e-05, |
|
"loss": 2.1787, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 2.536156105513062e-05, |
|
"loss": 2.0932, |
|
"step": 3843 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 2.522771687445612e-05, |
|
"loss": 2.1471, |
|
"step": 3846 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 2.5094175807413055e-05, |
|
"loss": 2.1226, |
|
"step": 3849 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 2.4960938395353296e-05, |
|
"loss": 2.1666, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 2.4828005178397838e-05, |
|
"loss": 2.0437, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 2.4695376695434448e-05, |
|
"loss": 2.0396, |
|
"step": 3858 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 2.456305348411554e-05, |
|
"loss": 2.09, |
|
"step": 3861 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 2.4431036080856073e-05, |
|
"loss": 2.0419, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 2.429932502083132e-05, |
|
"loss": 2.0626, |
|
"step": 3867 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 2.41679208379746e-05, |
|
"loss": 2.1798, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 2.4036824064975317e-05, |
|
"loss": 2.1082, |
|
"step": 3873 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"learning_rate": 2.3906035233276614e-05, |
|
"loss": 2.0504, |
|
"step": 3876 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 2.3775554873073292e-05, |
|
"loss": 2.0439, |
|
"step": 3879 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 2.3645383513309704e-05, |
|
"loss": 2.1104, |
|
"step": 3882 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 2.351552168167761e-05, |
|
"loss": 2.088, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 2.338596990461388e-05, |
|
"loss": 2.0038, |
|
"step": 3888 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 2.3256728707298546e-05, |
|
"loss": 2.043, |
|
"step": 3891 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 2.312779861365263e-05, |
|
"loss": 2.0785, |
|
"step": 3894 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 2.299918014633592e-05, |
|
"loss": 2.1406, |
|
"step": 3897 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 2.2870873826744988e-05, |
|
"loss": 2.1155, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 2.2742880175011028e-05, |
|
"loss": 2.1258, |
|
"step": 3903 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 2.261519970999768e-05, |
|
"loss": 2.1664, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 2.248783294929897e-05, |
|
"loss": 2.0733, |
|
"step": 3909 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 2.2360780409237294e-05, |
|
"loss": 2.135, |
|
"step": 3912 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 2.2234042604861182e-05, |
|
"loss": 2.1826, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 2.2107620049943346e-05, |
|
"loss": 2.0611, |
|
"step": 3918 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 2.1981513256978458e-05, |
|
"loss": 2.0883, |
|
"step": 3921 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 2.185572273718124e-05, |
|
"loss": 2.0715, |
|
"step": 3924 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 2.1730249000484203e-05, |
|
"loss": 2.0608, |
|
"step": 3927 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 2.1605092555535712e-05, |
|
"loss": 2.0845, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 2.14802539096979e-05, |
|
"loss": 2.0893, |
|
"step": 3933 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 2.1355733569044635e-05, |
|
"loss": 2.0895, |
|
"step": 3936 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 2.1231532038359326e-05, |
|
"loss": 2.1653, |
|
"step": 3939 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 2.11076498211331e-05, |
|
"loss": 2.0523, |
|
"step": 3942 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 2.098408741956256e-05, |
|
"loss": 2.056, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 2.086084533454784e-05, |
|
"loss": 2.1268, |
|
"step": 3948 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 2.0737924065690606e-05, |
|
"loss": 2.0818, |
|
"step": 3951 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"learning_rate": 2.0615324111292013e-05, |
|
"loss": 2.038, |
|
"step": 3954 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 2.0493045968350567e-05, |
|
"loss": 2.124, |
|
"step": 3957 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 2.0371090132560322e-05, |
|
"loss": 2.0462, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"learning_rate": 2.0249457098308665e-05, |
|
"loss": 2.1401, |
|
"step": 3963 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 2.012814735867442e-05, |
|
"loss": 2.1156, |
|
"step": 3966 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 2.0007161405425866e-05, |
|
"loss": 2.1056, |
|
"step": 3969 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 1.9886499729018737e-05, |
|
"loss": 2.093, |
|
"step": 3972 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 1.9766162818594114e-05, |
|
"loss": 2.1043, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 1.9646151161976556e-05, |
|
"loss": 2.0714, |
|
"step": 3978 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 1.9526465245672187e-05, |
|
"loss": 2.1165, |
|
"step": 3981 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 1.9407105554866557e-05, |
|
"loss": 2.0959, |
|
"step": 3984 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 1.92880725734227e-05, |
|
"loss": 2.1551, |
|
"step": 3987 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 1.9169366783879428e-05, |
|
"loss": 2.0905, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 1.9050988667448977e-05, |
|
"loss": 2.1042, |
|
"step": 3993 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 1.8932938704015314e-05, |
|
"loss": 2.1607, |
|
"step": 3996 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 1.8815217372132198e-05, |
|
"loss": 2.0262, |
|
"step": 3999 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 1.8697825149021086e-05, |
|
"loss": 2.1215, |
|
"step": 4002 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6475752433152033, |
|
"eval_loss": 2.025885820388794, |
|
"eval_runtime": 16.3422, |
|
"eval_samples_per_second": 134.437, |
|
"eval_steps_per_second": 67.249, |
|
"step": 4004 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.8580762510569295e-05, |
|
"loss": 2.1667, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 1.846402993132811e-05, |
|
"loss": 2.1035, |
|
"step": 4008 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 1.8347627884510832e-05, |
|
"loss": 2.1239, |
|
"step": 4011 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 1.823155684199074e-05, |
|
"loss": 2.1166, |
|
"step": 4014 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 1.8115817274299396e-05, |
|
"loss": 2.1101, |
|
"step": 4017 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 1.800040965062455e-05, |
|
"loss": 2.0607, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 1.7885334438808287e-05, |
|
"loss": 2.0964, |
|
"step": 4023 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 1.777059210534524e-05, |
|
"loss": 2.1132, |
|
"step": 4026 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 1.7656183115380577e-05, |
|
"loss": 2.0793, |
|
"step": 4029 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 1.754210793270812e-05, |
|
"loss": 2.1091, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 1.742836701976849e-05, |
|
"loss": 2.0793, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 1.7314960837647297e-05, |
|
"loss": 2.0164, |
|
"step": 4038 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 1.7201889846073183e-05, |
|
"loss": 2.0697, |
|
"step": 4041 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 1.7089154503415895e-05, |
|
"loss": 2.0631, |
|
"step": 4044 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 1.697675526668473e-05, |
|
"loss": 2.0714, |
|
"step": 4047 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 1.6864692591526278e-05, |
|
"loss": 2.1084, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 1.6752966932222826e-05, |
|
"loss": 2.1333, |
|
"step": 4053 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 1.664157874169049e-05, |
|
"loss": 2.1206, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 1.6530528471477326e-05, |
|
"loss": 2.1727, |
|
"step": 4059 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 1.6419816571761482e-05, |
|
"loss": 2.1396, |
|
"step": 4062 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 1.6309443491349475e-05, |
|
"loss": 2.0632, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 1.6199409677674314e-05, |
|
"loss": 2.0968, |
|
"step": 4068 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 1.6089715576793584e-05, |
|
"loss": 2.0538, |
|
"step": 4071 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 1.5980361633387853e-05, |
|
"loss": 2.1114, |
|
"step": 4074 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 1.587134829075867e-05, |
|
"loss": 2.1656, |
|
"step": 4077 |
|
}, |
|
{ |
|
"epoch": 13.25, |
|
"learning_rate": 1.576267599082686e-05, |
|
"loss": 2.0781, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 1.5654345174130756e-05, |
|
"loss": 2.1749, |
|
"step": 4083 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 1.5546356279824382e-05, |
|
"loss": 2.0654, |
|
"step": 4086 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 1.5438709745675606e-05, |
|
"loss": 2.0904, |
|
"step": 4089 |
|
}, |
|
{ |
|
"epoch": 13.29, |
|
"learning_rate": 1.5331406008064475e-05, |
|
"loss": 2.0368, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 1.522444550198141e-05, |
|
"loss": 2.0759, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 1.511782866102539e-05, |
|
"loss": 2.0462, |
|
"step": 4098 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 1.5011555917402265e-05, |
|
"loss": 2.0873, |
|
"step": 4101 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 1.4905627701923009e-05, |
|
"loss": 2.0913, |
|
"step": 4104 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 1.480004444400187e-05, |
|
"loss": 2.0516, |
|
"step": 4107 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 1.4694806571654696e-05, |
|
"loss": 2.1136, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 1.4589914511497305e-05, |
|
"loss": 2.1294, |
|
"step": 4113 |
|
}, |
|
{ |
|
"epoch": 13.36, |
|
"learning_rate": 1.4485368688743527e-05, |
|
"loss": 2.068, |
|
"step": 4116 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 1.4381169527203719e-05, |
|
"loss": 2.0402, |
|
"step": 4119 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 1.4277317449282834e-05, |
|
"loss": 2.048, |
|
"step": 4122 |
|
}, |
|
{ |
|
"epoch": 13.39, |
|
"learning_rate": 1.4173812875978886e-05, |
|
"loss": 2.0875, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 1.407065622688113e-05, |
|
"loss": 2.1008, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 1.3967847920168386e-05, |
|
"loss": 2.1113, |
|
"step": 4131 |
|
}, |
|
{ |
|
"epoch": 13.42, |
|
"learning_rate": 1.386538837260738e-05, |
|
"loss": 2.0277, |
|
"step": 4134 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 1.376327799955105e-05, |
|
"loss": 2.1696, |
|
"step": 4137 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 1.3661517214936782e-05, |
|
"loss": 2.1531, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 1.356010643128487e-05, |
|
"loss": 2.1222, |
|
"step": 4143 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 1.345904605969669e-05, |
|
"loss": 2.0299, |
|
"step": 4146 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 1.3358336509853131e-05, |
|
"loss": 2.1065, |
|
"step": 4149 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 1.3257978190012931e-05, |
|
"loss": 2.0989, |
|
"step": 4152 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 1.3157971507011036e-05, |
|
"loss": 2.0679, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 1.3058316866256826e-05, |
|
"loss": 2.1828, |
|
"step": 4158 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 1.295901467173265e-05, |
|
"loss": 2.057, |
|
"step": 4161 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 1.2860065325992066e-05, |
|
"loss": 2.0964, |
|
"step": 4164 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 1.2761469230158208e-05, |
|
"loss": 2.1366, |
|
"step": 4167 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"learning_rate": 1.2663226783922266e-05, |
|
"loss": 2.0889, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"learning_rate": 1.2565338385541792e-05, |
|
"loss": 2.0918, |
|
"step": 4173 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 1.2467804431839037e-05, |
|
"loss": 2.1852, |
|
"step": 4176 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 1.2370625318199414e-05, |
|
"loss": 2.0561, |
|
"step": 4179 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 1.2273801438569932e-05, |
|
"loss": 2.0864, |
|
"step": 4182 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 1.2177333185457474e-05, |
|
"loss": 2.125, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 1.2081220949927252e-05, |
|
"loss": 2.0829, |
|
"step": 4188 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 1.1985465121601392e-05, |
|
"loss": 2.0291, |
|
"step": 4191 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 1.189006608865707e-05, |
|
"loss": 2.1237, |
|
"step": 4194 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 1.1795024237825092e-05, |
|
"loss": 2.1423, |
|
"step": 4197 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 1.1700339954388384e-05, |
|
"loss": 2.1, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 1.1606013622180278e-05, |
|
"loss": 2.0367, |
|
"step": 4203 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"learning_rate": 1.1512045623583068e-05, |
|
"loss": 2.0967, |
|
"step": 4206 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 1.1418436339526429e-05, |
|
"loss": 2.0585, |
|
"step": 4209 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 1.1325186149485889e-05, |
|
"loss": 2.1754, |
|
"step": 4212 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 1.1232295431481222e-05, |
|
"loss": 2.0563, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 1.1139764562075017e-05, |
|
"loss": 2.1228, |
|
"step": 4218 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 1.104759391637108e-05, |
|
"loss": 2.013, |
|
"step": 4221 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 1.0955783868012892e-05, |
|
"loss": 2.1053, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 1.0864334789182218e-05, |
|
"loss": 2.0723, |
|
"step": 4227 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 1.0773247050597468e-05, |
|
"loss": 2.142, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 1.0682521021512249e-05, |
|
"loss": 2.0928, |
|
"step": 4233 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 1.0592157069713826e-05, |
|
"loss": 2.0371, |
|
"step": 4236 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 1.0502155561521766e-05, |
|
"loss": 2.1179, |
|
"step": 4239 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 1.0412516861786236e-05, |
|
"loss": 2.0816, |
|
"step": 4242 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 1.032324133388668e-05, |
|
"loss": 2.0207, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 1.0234329339730398e-05, |
|
"loss": 2.0805, |
|
"step": 4248 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 1.0145781239750863e-05, |
|
"loss": 2.1022, |
|
"step": 4251 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 1.0057597392906414e-05, |
|
"loss": 2.1438, |
|
"step": 4254 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 9.969778156678854e-06, |
|
"loss": 2.097, |
|
"step": 4257 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 9.88232388707182e-06, |
|
"loss": 2.0942, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"learning_rate": 9.795234938609466e-06, |
|
"loss": 2.0325, |
|
"step": 4263 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 9.708511664335029e-06, |
|
"loss": 2.1505, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 9.62215441580936e-06, |
|
"loss": 2.05, |
|
"step": 4269 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 9.536163543109488e-06, |
|
"loss": 2.0526, |
|
"step": 4272 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 9.450539394827185e-06, |
|
"loss": 2.0956, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 9.365282318067681e-06, |
|
"loss": 2.0701, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 9.280392658448078e-06, |
|
"loss": 2.1114, |
|
"step": 4281 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 9.19587076009607e-06, |
|
"loss": 2.0833, |
|
"step": 4284 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 9.11171696564853e-06, |
|
"loss": 2.1273, |
|
"step": 4287 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 9.027931616250063e-06, |
|
"loss": 2.0479, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 8.94451505155165e-06, |
|
"loss": 2.0574, |
|
"step": 4293 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 8.861467609709373e-06, |
|
"loss": 2.0643, |
|
"step": 4296 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 8.778789627382833e-06, |
|
"loss": 2.1623, |
|
"step": 4299 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 8.696481439734017e-06, |
|
"loss": 2.0858, |
|
"step": 4302 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"learning_rate": 8.614543380425766e-06, |
|
"loss": 2.1034, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 8.532975781620512e-06, |
|
"loss": 2.1097, |
|
"step": 4308 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 8.451778973978874e-06, |
|
"loss": 2.1255, |
|
"step": 4311 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6460723583804651, |
|
"eval_loss": 2.037827730178833, |
|
"eval_runtime": 16.2998, |
|
"eval_samples_per_second": 134.787, |
|
"eval_steps_per_second": 67.424, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 8.370953286658389e-06, |
|
"loss": 2.0412, |
|
"step": 4314 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 8.290499047312106e-06, |
|
"loss": 2.1136, |
|
"step": 4317 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 8.210416582087332e-06, |
|
"loss": 2.1369, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 8.130706215624195e-06, |
|
"loss": 2.0917, |
|
"step": 4323 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 8.051368271054493e-06, |
|
"loss": 2.1272, |
|
"step": 4326 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 7.972403070000222e-06, |
|
"loss": 2.1439, |
|
"step": 4329 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 7.893810932572333e-06, |
|
"loss": 2.0715, |
|
"step": 4332 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 7.815592177369502e-06, |
|
"loss": 2.0255, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 7.737747121476757e-06, |
|
"loss": 2.0631, |
|
"step": 4338 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 7.66027608046419e-06, |
|
"loss": 2.1339, |
|
"step": 4341 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 7.58317936838574e-06, |
|
"loss": 2.0685, |
|
"step": 4344 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 7.506457297777847e-06, |
|
"loss": 2.0141, |
|
"step": 4347 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 7.4301101796582225e-06, |
|
"loss": 2.0722, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 7.354138323524617e-06, |
|
"loss": 2.0758, |
|
"step": 4353 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 7.278542037353542e-06, |
|
"loss": 2.0993, |
|
"step": 4356 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 7.203321627598947e-06, |
|
"loss": 2.114, |
|
"step": 4359 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 7.128477399191136e-06, |
|
"loss": 2.114, |
|
"step": 4362 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 7.054009655535354e-06, |
|
"loss": 2.0214, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 6.979918698510701e-06, |
|
"loss": 2.0729, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 6.906204828468821e-06, |
|
"loss": 1.9927, |
|
"step": 4371 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 6.832868344232757e-06, |
|
"loss": 2.0514, |
|
"step": 4374 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 6.759909543095632e-06, |
|
"loss": 2.1031, |
|
"step": 4377 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 6.687328720819552e-06, |
|
"loss": 2.0984, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 6.615126171634367e-06, |
|
"loss": 2.0636, |
|
"step": 4383 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 6.543302188236445e-06, |
|
"loss": 2.0285, |
|
"step": 4386 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 6.471857061787501e-06, |
|
"loss": 2.0266, |
|
"step": 4389 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 6.400791081913538e-06, |
|
"loss": 2.1057, |
|
"step": 4392 |
|
}, |
|
{ |
|
"epoch": 14.27, |
|
"learning_rate": 6.33010453670343e-06, |
|
"loss": 2.0328, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 6.25979771270796e-06, |
|
"loss": 2.0908, |
|
"step": 4398 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 6.189870894938587e-06, |
|
"loss": 2.067, |
|
"step": 4401 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 6.120324366866281e-06, |
|
"loss": 2.0099, |
|
"step": 4404 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 6.051158410420355e-06, |
|
"loss": 2.1048, |
|
"step": 4407 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 5.98237330598741e-06, |
|
"loss": 2.0286, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 5.91396933241013e-06, |
|
"loss": 2.0796, |
|
"step": 4413 |
|
}, |
|
{ |
|
"epoch": 14.34, |
|
"learning_rate": 5.845946766986099e-06, |
|
"loss": 2.0391, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 5.778305885466828e-06, |
|
"loss": 2.0937, |
|
"step": 4419 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 5.711046962056488e-06, |
|
"loss": 2.1198, |
|
"step": 4422 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 5.644170269410853e-06, |
|
"loss": 2.1277, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 5.577676078636251e-06, |
|
"loss": 2.1039, |
|
"step": 4428 |
|
}, |
|
{ |
|
"epoch": 14.39, |
|
"learning_rate": 5.511564659288404e-06, |
|
"loss": 2.0803, |
|
"step": 4431 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 5.445836279371308e-06, |
|
"loss": 2.0594, |
|
"step": 4434 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 5.380491205336202e-06, |
|
"loss": 2.131, |
|
"step": 4437 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 5.315529702080491e-06, |
|
"loss": 2.0799, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"learning_rate": 5.250952032946643e-06, |
|
"loss": 2.0678, |
|
"step": 4443 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 5.186758459721075e-06, |
|
"loss": 2.115, |
|
"step": 4446 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 5.122949242633279e-06, |
|
"loss": 2.0485, |
|
"step": 4449 |
|
}, |
|
{ |
|
"epoch": 14.45, |
|
"learning_rate": 5.059524640354496e-06, |
|
"loss": 2.1301, |
|
"step": 4452 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 4.996484909996868e-06, |
|
"loss": 2.0767, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 4.933830307112353e-06, |
|
"loss": 2.1214, |
|
"step": 4458 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 4.871561085691634e-06, |
|
"loss": 2.0206, |
|
"step": 4461 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 4.8096774981631235e-06, |
|
"loss": 2.1629, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 4.7481797953919605e-06, |
|
"loss": 2.0928, |
|
"step": 4467 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 4.687068226679004e-06, |
|
"loss": 2.1403, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 4.6263430397597395e-06, |
|
"loss": 2.0604, |
|
"step": 4473 |
|
}, |
|
{ |
|
"epoch": 14.53, |
|
"learning_rate": 4.566004480803332e-06, |
|
"loss": 2.1267, |
|
"step": 4476 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 4.5060527944116856e-06, |
|
"loss": 2.1578, |
|
"step": 4479 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 4.446488223618306e-06, |
|
"loss": 2.1332, |
|
"step": 4482 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 4.387311009887463e-06, |
|
"loss": 1.9903, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 4.328521393113149e-06, |
|
"loss": 2.2028, |
|
"step": 4488 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 4.270119611618073e-06, |
|
"loss": 2.0242, |
|
"step": 4491 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 4.21210590215273e-06, |
|
"loss": 2.0528, |
|
"step": 4494 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 4.1544804998944756e-06, |
|
"loss": 2.1233, |
|
"step": 4497 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 4.097243638446502e-06, |
|
"loss": 2.1108, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 4.040395549836928e-06, |
|
"loss": 2.0646, |
|
"step": 4503 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 3.983936464517901e-06, |
|
"loss": 2.1225, |
|
"step": 4506 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 3.9278666113645615e-06, |
|
"loss": 2.0553, |
|
"step": 4509 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"learning_rate": 3.872186217674167e-06, |
|
"loss": 2.1065, |
|
"step": 4512 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"learning_rate": 3.816895509165252e-06, |
|
"loss": 2.0563, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 3.7619947099765353e-06, |
|
"loss": 2.1165, |
|
"step": 4518 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 3.707484042666198e-06, |
|
"loss": 2.0359, |
|
"step": 4521 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 3.6533637282108347e-06, |
|
"loss": 2.0946, |
|
"step": 4524 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 3.599633986004669e-06, |
|
"loss": 2.0738, |
|
"step": 4527 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 3.5462950338585597e-06, |
|
"loss": 2.0815, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 3.4933470879992104e-06, |
|
"loss": 2.05, |
|
"step": 4533 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"learning_rate": 3.440790363068247e-06, |
|
"loss": 2.0606, |
|
"step": 4536 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 3.3886250721213544e-06, |
|
"loss": 2.1675, |
|
"step": 4539 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"learning_rate": 3.3368514266273964e-06, |
|
"loss": 2.0499, |
|
"step": 4542 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"learning_rate": 3.2854696364675974e-06, |
|
"loss": 2.1578, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 3.2344799099346733e-06, |
|
"loss": 2.0859, |
|
"step": 4548 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 3.1838824537319456e-06, |
|
"loss": 2.1324, |
|
"step": 4551 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 3.1336774729725736e-06, |
|
"loss": 2.117, |
|
"step": 4554 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 3.0838651711787013e-06, |
|
"loss": 2.0503, |
|
"step": 4557 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 3.034445750280579e-06, |
|
"loss": 2.0449, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 2.985419410615831e-06, |
|
"loss": 2.1285, |
|
"step": 4563 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 2.9367863509285775e-06, |
|
"loss": 2.0391, |
|
"step": 4566 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 2.8885467683686497e-06, |
|
"loss": 2.0469, |
|
"step": 4569 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"learning_rate": 2.840700858490786e-06, |
|
"loss": 2.1386, |
|
"step": 4572 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 2.7932488152538794e-06, |
|
"loss": 2.1428, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 2.7461908310201123e-06, |
|
"loss": 2.0901, |
|
"step": 4578 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"learning_rate": 2.6995270965542554e-06, |
|
"loss": 2.0583, |
|
"step": 4581 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 2.653257801022835e-06, |
|
"loss": 2.0573, |
|
"step": 4584 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 2.607383131993424e-06, |
|
"loss": 2.035, |
|
"step": 4587 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 2.561903275433797e-06, |
|
"loss": 2.1873, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 2.5168184157113084e-06, |
|
"loss": 2.0505, |
|
"step": 4593 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"learning_rate": 2.472128735591983e-06, |
|
"loss": 2.0236, |
|
"step": 4596 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 2.4278344162398935e-06, |
|
"loss": 2.022, |
|
"step": 4599 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 2.3839356372164056e-06, |
|
"loss": 1.9994, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 2.3404325764794012e-06, |
|
"loss": 1.9757, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 2.2973254103826e-06, |
|
"loss": 2.0497, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 2.254614313674863e-06, |
|
"loss": 2.1178, |
|
"step": 4611 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 2.2122994594994227e-06, |
|
"loss": 2.1794, |
|
"step": 4614 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 2.1703810193932307e-06, |
|
"loss": 2.0673, |
|
"step": 4617 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.1288591632862343e-06, |
|
"loss": 2.1751, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6458123953098828, |
|
"eval_loss": 2.0256659984588623, |
|
"eval_runtime": 16.3182, |
|
"eval_samples_per_second": 134.635, |
|
"eval_steps_per_second": 67.348, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 2.087734059500712e-06, |
|
"loss": 2.0922, |
|
"step": 4623 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 2.0470058747505516e-06, |
|
"loss": 2.1413, |
|
"step": 4626 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 2.006674774140638e-06, |
|
"loss": 2.0214, |
|
"step": 4629 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 1.9667409211661437e-06, |
|
"loss": 2.1027, |
|
"step": 4632 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 1.9272044777118524e-06, |
|
"loss": 2.0475, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"learning_rate": 1.8880656040514921e-06, |
|
"loss": 2.0842, |
|
"step": 4638 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"learning_rate": 1.8493244588471793e-06, |
|
"loss": 2.0245, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 1.8109811991486646e-06, |
|
"loss": 2.0969, |
|
"step": 4644 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 1.7730359803927343e-06, |
|
"loss": 2.1304, |
|
"step": 4647 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 1.735488956402631e-06, |
|
"loss": 2.001, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 1.698340279387356e-06, |
|
"loss": 2.1577, |
|
"step": 4653 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 1.6615900999410683e-06, |
|
"loss": 2.058, |
|
"step": 4656 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 1.6252385670425307e-06, |
|
"loss": 2.0714, |
|
"step": 4659 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 1.589285828054421e-06, |
|
"loss": 2.0709, |
|
"step": 4662 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 1.5537320287227764e-06, |
|
"loss": 2.0754, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 1.5185773131764502e-06, |
|
"loss": 2.1037, |
|
"step": 4668 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 1.4838218239264456e-06, |
|
"loss": 2.1344, |
|
"step": 4671 |
|
}, |
|
{ |
|
"epoch": 15.18, |
|
"learning_rate": 1.4494657018653823e-06, |
|
"loss": 2.0933, |
|
"step": 4674 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 1.4155090862668863e-06, |
|
"loss": 2.0771, |
|
"step": 4677 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 1.3819521147851123e-06, |
|
"loss": 2.0867, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 1.3487949234540664e-06, |
|
"loss": 2.1519, |
|
"step": 4683 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 1.3160376466871739e-06, |
|
"loss": 2.0861, |
|
"step": 4686 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 1.2836804172766449e-06, |
|
"loss": 2.0644, |
|
"step": 4689 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 1.2517233663929651e-06, |
|
"loss": 2.1133, |
|
"step": 4692 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 1.2201666235843735e-06, |
|
"loss": 2.0873, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 1.18901031677634e-06, |
|
"loss": 2.1127, |
|
"step": 4698 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 1.1582545722710225e-06, |
|
"loss": 2.0917, |
|
"step": 4701 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 1.1278995147467885e-06, |
|
"loss": 2.0339, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 15.28, |
|
"learning_rate": 1.0979452672576718e-06, |
|
"loss": 2.0649, |
|
"step": 4707 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 1.0683919512329166e-06, |
|
"loss": 2.0376, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 1.0392396864764231e-06, |
|
"loss": 2.0764, |
|
"step": 4713 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 1.0104885911663474e-06, |
|
"loss": 2.1247, |
|
"step": 4716 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 9.821387818545358e-07, |
|
"loss": 2.1067, |
|
"step": 4719 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 9.54190373466113e-07, |
|
"loss": 2.0267, |
|
"step": 4722 |
|
}, |
|
{ |
|
"epoch": 15.34, |
|
"learning_rate": 9.266434792989942e-07, |
|
"loss": 2.0377, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 15.35, |
|
"learning_rate": 8.994982110234307e-07, |
|
"loss": 2.0895, |
|
"step": 4728 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 8.727546786815421e-07, |
|
"loss": 2.027, |
|
"step": 4731 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 8.464129906868734e-07, |
|
"loss": 2.0593, |
|
"step": 4734 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 8.204732538239835e-07, |
|
"loss": 1.9569, |
|
"step": 4737 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 7.949355732479902e-07, |
|
"loss": 2.0478, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 7.698000524841376e-07, |
|
"loss": 2.0132, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 7.450667934273958e-07, |
|
"loss": 2.1406, |
|
"step": 4746 |
|
}, |
|
{ |
|
"epoch": 15.42, |
|
"learning_rate": 7.207358963420063e-07, |
|
"loss": 2.0989, |
|
"step": 4749 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 6.968074598611484e-07, |
|
"loss": 2.1065, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 15.44, |
|
"learning_rate": 6.732815809864734e-07, |
|
"loss": 2.0832, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 6.501583550877488e-07, |
|
"loss": 2.1096, |
|
"step": 4758 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"learning_rate": 6.274378759024257e-07, |
|
"loss": 2.0831, |
|
"step": 4761 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 6.051202355353392e-07, |
|
"loss": 2.0592, |
|
"step": 4764 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 5.832055244582524e-07, |
|
"loss": 2.1122, |
|
"step": 4767 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 5.616938315095243e-07, |
|
"loss": 2.1232, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 5.405852438937764e-07, |
|
"loss": 2.0721, |
|
"step": 4773 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"learning_rate": 5.198798471814814e-07, |
|
"loss": 2.1295, |
|
"step": 4776 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 4.995777253086753e-07, |
|
"loss": 2.0643, |
|
"step": 4779 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 4.796789605765573e-07, |
|
"loss": 2.0741, |
|
"step": 4782 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 4.601836336512233e-07, |
|
"loss": 2.0587, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"learning_rate": 4.4109182356327774e-07, |
|
"loss": 2.1124, |
|
"step": 4788 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 4.2240360770753327e-07, |
|
"loss": 2.1365, |
|
"step": 4791 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 4.0411906184273376e-07, |
|
"loss": 2.1643, |
|
"step": 4794 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"learning_rate": 3.8623826009120955e-07, |
|
"loss": 2.066, |
|
"step": 4797 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 3.6876127493854495e-07, |
|
"loss": 2.0562, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 3.516881772333669e-07, |
|
"loss": 2.1947, |
|
"step": 4803 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 3.35019036187012e-07, |
|
"loss": 2.0512, |
|
"step": 4806 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 3.187539193732048e-07, |
|
"loss": 2.0424, |
|
"step": 4809 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 3.028928927278685e-07, |
|
"loss": 2.101, |
|
"step": 4812 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 2.874360205488258e-07, |
|
"loss": 2.1028, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 2.723833654954655e-07, |
|
"loss": 2.0677, |
|
"step": 4818 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 2.577349885886315e-07, |
|
"loss": 2.0715, |
|
"step": 4821 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 2.434909492102455e-07, |
|
"loss": 2.106, |
|
"step": 4824 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 2.2965130510310685e-07, |
|
"loss": 2.0889, |
|
"step": 4827 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 2.1621611237071516e-07, |
|
"loss": 2.0497, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 2.031854254769594e-07, |
|
"loss": 2.0797, |
|
"step": 4833 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 1.9055929724595134e-07, |
|
"loss": 2.0286, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 1.7833777886175907e-07, |
|
"loss": 1.9975, |
|
"step": 4839 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 1.66520919868296e-07, |
|
"loss": 2.0781, |
|
"step": 4842 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 1.5510876816898778e-07, |
|
"loss": 2.1324, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 15.74, |
|
"learning_rate": 1.4410137002670575e-07, |
|
"loss": 2.0367, |
|
"step": 4848 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 1.334987700634893e-07, |
|
"loss": 2.1207, |
|
"step": 4851 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 1.233010112604016e-07, |
|
"loss": 2.1331, |
|
"step": 4854 |
|
}, |
|
{ |
|
"epoch": 15.77, |
|
"learning_rate": 1.1350813495737411e-07, |
|
"loss": 2.1653, |
|
"step": 4857 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"learning_rate": 1.0412018085297348e-07, |
|
"loss": 2.0604, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 9.513718700432384e-08, |
|
"loss": 2.0798, |
|
"step": 4863 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 8.655918982689581e-08, |
|
"loss": 2.04, |
|
"step": 4866 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 7.838622409436225e-08, |
|
"loss": 2.017, |
|
"step": 4869 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 7.061832293849823e-08, |
|
"loss": 2.0456, |
|
"step": 4872 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 6.325551784900352e-08, |
|
"loss": 2.1483, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 5.629783867336924e-08, |
|
"loss": 2.0091, |
|
"step": 4878 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 4.9745313616822445e-08, |
|
"loss": 2.1577, |
|
"step": 4881 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 4.3597969242126225e-08, |
|
"loss": 2.0558, |
|
"step": 4884 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 3.7855830469535334e-08, |
|
"loss": 2.0223, |
|
"step": 4887 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 3.2518920576662945e-08, |
|
"loss": 2.0815, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 2.7587261198414038e-08, |
|
"loss": 2.0999, |
|
"step": 4893 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 2.3060872326841066e-08, |
|
"loss": 2.1187, |
|
"step": 4896 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 1.8939772311143967e-08, |
|
"loss": 2.0901, |
|
"step": 4899 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 1.522397785752583e-08, |
|
"loss": 2.1402, |
|
"step": 4902 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 1.1913504029159583e-08, |
|
"loss": 2.1058, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 9.008364246121393e-09, |
|
"loss": 2.0656, |
|
"step": 4908 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 6.508570285346238e-09, |
|
"loss": 2.1138, |
|
"step": 4911 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 4.414132280550209e-09, |
|
"loss": 2.0542, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"learning_rate": 2.7250587222082957e-09, |
|
"loss": 1.972, |
|
"step": 4917 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 1.4413564575432858e-09, |
|
"loss": 2.069, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 5.630306904369498e-10, |
|
"loss": 2.1005, |
|
"step": 4923 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 9.008498147444755e-11, |
|
"loss": 1.9516, |
|
"step": 4926 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6450172750488208, |
|
"eval_loss": 2.037081241607666, |
|
"eval_runtime": 16.3187, |
|
"eval_samples_per_second": 134.631, |
|
"eval_steps_per_second": 67.346, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 4928, |
|
"total_flos": 1.0508396840353792e+16, |
|
"train_loss": 2.4320973860366, |
|
"train_runtime": 3195.9564, |
|
"train_samples_per_second": 98.68, |
|
"train_steps_per_second": 1.542 |
|
} |
|
], |
|
"max_steps": 4928, |
|
"num_train_epochs": 16, |
|
"total_flos": 1.0508396840353792e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|