|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0726161106939827, |
|
"eval_steps": 200, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.021452322213879653, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 109.0854, |
|
"eval_samples_per_second": 35.761, |
|
"eval_steps_per_second": 0.559, |
|
"eval_wer": 0.5591701685746027, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.042904644427759306, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.5961, |
|
"eval_samples_per_second": 36.943, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.4289474955320485, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05363080553469913, |
|
"grad_norm": 2.6033225059509277, |
|
"learning_rate": 0.0001494, |
|
"loss": 2.1964, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06435696664163895, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.8581, |
|
"eval_samples_per_second": 37.203, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.43744867893542, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08580928885551861, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.5471, |
|
"eval_samples_per_second": 37.313, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.4944452494807516, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.10726161106939826, |
|
"grad_norm": 4.917770862579346, |
|
"learning_rate": 0.0002988, |
|
"loss": 0.8327, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.10726161106939826, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.2475, |
|
"eval_samples_per_second": 37.421, |
|
"eval_steps_per_second": 0.585, |
|
"eval_wer": 0.5149736753127566, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1287139332832779, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.0588, |
|
"eval_samples_per_second": 37.488, |
|
"eval_steps_per_second": 0.586, |
|
"eval_wer": 0.5633966091870743, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.15016625549715756, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.6247, |
|
"eval_samples_per_second": 37.286, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.5355021011447616, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1608924166040974, |
|
"grad_norm": 2.821734666824341, |
|
"learning_rate": 0.00028346666666666665, |
|
"loss": 0.91, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17161857771103722, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.8778, |
|
"eval_samples_per_second": 37.196, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.515239337294112, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.19307089992491688, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.4509, |
|
"eval_samples_per_second": 36.994, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.5594599816451722, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.21452322213879652, |
|
"grad_norm": 9.015162467956543, |
|
"learning_rate": 0.0002668, |
|
"loss": 0.8721, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.21452322213879652, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.3779, |
|
"eval_samples_per_second": 37.019, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.5056513548761049, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23597554435267618, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.6302, |
|
"eval_samples_per_second": 36.931, |
|
"eval_steps_per_second": 0.577, |
|
"eval_wer": 0.5041298362556151, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.2574278665665558, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.5892, |
|
"eval_samples_per_second": 36.945, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.5145631067961165, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.26815402767349567, |
|
"grad_norm": 5.016167163848877, |
|
"learning_rate": 0.0002501333333333333, |
|
"loss": 0.8218, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.27888018878043547, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 106.4951, |
|
"eval_samples_per_second": 36.631, |
|
"eval_steps_per_second": 0.573, |
|
"eval_wer": 0.5018113316910593, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.3003325109943151, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 106.2902, |
|
"eval_samples_per_second": 36.701, |
|
"eval_steps_per_second": 0.574, |
|
"eval_wer": 0.5090566584552964, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.3217848332081948, |
|
"grad_norm": 2.5943267345428467, |
|
"learning_rate": 0.00023346666666666666, |
|
"loss": 0.8469, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3217848332081948, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.8685, |
|
"eval_samples_per_second": 36.848, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.5036709655605468, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.34323715542207445, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.9311, |
|
"eval_samples_per_second": 36.826, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.4703183113558421, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.3646894776359541, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.4279, |
|
"eval_samples_per_second": 37.002, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.47951987634642323, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.3754156387428939, |
|
"grad_norm": 4.555402755737305, |
|
"learning_rate": 0.0002168333333333333, |
|
"loss": 0.8142, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.38614179984983377, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.6085, |
|
"eval_samples_per_second": 36.938, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.4714051103704777, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.40759412206371337, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.5848, |
|
"eval_samples_per_second": 36.947, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.4553929382215138, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.42904644427759303, |
|
"grad_norm": 15.551188468933105, |
|
"learning_rate": 0.0002002, |
|
"loss": 0.8085, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.42904644427759303, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.8874, |
|
"eval_samples_per_second": 36.841, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.4505868714679032, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4504987664914727, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 107.6738, |
|
"eval_samples_per_second": 36.23, |
|
"eval_steps_per_second": 0.567, |
|
"eval_wer": 0.4457566536250785, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.47195108870535235, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.5778, |
|
"eval_samples_per_second": 36.949, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.43669999516978214, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.4826772498122922, |
|
"grad_norm": 4.841684818267822, |
|
"learning_rate": 0.0001835333333333333, |
|
"loss": 0.7802, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.493403410919232, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.9573, |
|
"eval_samples_per_second": 36.817, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.4401052987489736, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.5148557331331116, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.3523, |
|
"eval_samples_per_second": 37.028, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.43336714485823313, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.5363080553469913, |
|
"grad_norm": 7.372885227203369, |
|
"learning_rate": 0.0001669, |
|
"loss": 0.7493, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5363080553469913, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 107.6356, |
|
"eval_samples_per_second": 36.243, |
|
"eval_steps_per_second": 0.567, |
|
"eval_wer": 0.4224267014442351, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5577603775608709, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 107.4854, |
|
"eval_samples_per_second": 36.293, |
|
"eval_steps_per_second": 0.568, |
|
"eval_wer": 0.43278751871709414, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.5792126997747507, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.275, |
|
"eval_samples_per_second": 37.055, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.41764478577983866, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.5899388608816905, |
|
"grad_norm": 2.7961230278015137, |
|
"learning_rate": 0.00015023333333333332, |
|
"loss": 0.7668, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.6006650219886303, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.464, |
|
"eval_samples_per_second": 36.989, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.41829686518862, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.62211734420251, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.96, |
|
"eval_samples_per_second": 37.167, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.40296092353765156, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.6435696664163896, |
|
"grad_norm": 6.007960319519043, |
|
"learning_rate": 0.0001336, |
|
"loss": 0.6999, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6435696664163896, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.9116, |
|
"eval_samples_per_second": 37.184, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.4124523015988021, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6650219886302692, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 108.5507, |
|
"eval_samples_per_second": 35.937, |
|
"eval_steps_per_second": 0.562, |
|
"eval_wer": 0.40759793266676325, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.6864743108441489, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.9858, |
|
"eval_samples_per_second": 37.157, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.39170651596386996, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.6972004719510887, |
|
"grad_norm": 44.30250549316406, |
|
"learning_rate": 0.00011693333333333332, |
|
"loss": 0.6918, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.7079266330580285, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 106.5414, |
|
"eval_samples_per_second": 36.615, |
|
"eval_steps_per_second": 0.573, |
|
"eval_wer": 0.4004250591701686, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.7293789552719082, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.9171, |
|
"eval_samples_per_second": 37.182, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.38653818287204755, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.7508312774857878, |
|
"grad_norm": 3.788344144821167, |
|
"learning_rate": 0.00010029999999999998, |
|
"loss": 0.6888, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7508312774857878, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.3057, |
|
"eval_samples_per_second": 37.045, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.3785200212529585, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7722835996996675, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.7325, |
|
"eval_samples_per_second": 37.247, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.3824083466164324, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.7937359219135471, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.0488, |
|
"eval_samples_per_second": 37.135, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.37426942955127274, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.8044620830204869, |
|
"grad_norm": 5.486635684967041, |
|
"learning_rate": 8.363333333333332e-05, |
|
"loss": 0.646, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.8151882441274267, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 106.6481, |
|
"eval_samples_per_second": 36.578, |
|
"eval_steps_per_second": 0.572, |
|
"eval_wer": 0.3673139158576052, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.8366405663413065, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.8533, |
|
"eval_samples_per_second": 37.204, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.36668598753803794, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.8580928885551861, |
|
"grad_norm": 3.9184212684631348, |
|
"learning_rate": 6.696666666666666e-05, |
|
"loss": 0.6324, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8580928885551861, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.6572, |
|
"eval_samples_per_second": 36.921, |
|
"eval_steps_per_second": 0.577, |
|
"eval_wer": 0.3661546635753272, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8795452107690658, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.6274, |
|
"eval_samples_per_second": 37.285, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.36009274018258225, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.9009975329829454, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 104.9439, |
|
"eval_samples_per_second": 37.172, |
|
"eval_steps_per_second": 0.581, |
|
"eval_wer": 0.35345119064869823, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.9117236940898852, |
|
"grad_norm": 3.5586395263671875, |
|
"learning_rate": 5.0299999999999996e-05, |
|
"loss": 0.6221, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.9224498551968251, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.157, |
|
"eval_samples_per_second": 37.097, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 0.35258175143698983, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.9439021774107047, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.1867, |
|
"eval_samples_per_second": 37.086, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 0.34874172825194416, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.9653544996245844, |
|
"grad_norm": 3.914166212081909, |
|
"learning_rate": 3.363333333333333e-05, |
|
"loss": 0.6215, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.9653544996245844, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.9335, |
|
"eval_samples_per_second": 36.825, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.34811379993237695, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.986806821838464, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.4283, |
|
"eval_samples_per_second": 37.001, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.3447084963531855, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.0082591440523436, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.2731, |
|
"eval_samples_per_second": 37.056, |
|
"eval_steps_per_second": 0.579, |
|
"eval_wer": 0.34103753079263877, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.0189853051592834, |
|
"grad_norm": 3.3699042797088623, |
|
"learning_rate": 1.6966666666666665e-05, |
|
"loss": 0.5603, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.0297114662662232, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.4958, |
|
"eval_samples_per_second": 36.978, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.34053035791914216, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.051163788480103, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 105.0872, |
|
"eval_samples_per_second": 37.122, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 0.34120658841713764, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.0726161106939827, |
|
"grad_norm": 6.575745582580566, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": 0.5284, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0726161106939827, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 108.9867, |
|
"eval_samples_per_second": 35.793, |
|
"eval_steps_per_second": 0.56, |
|
"eval_wer": 0.3401922426701444, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0726161106939827, |
|
"step": 10000, |
|
"total_flos": 4.496412338111517e+18, |
|
"train_loss": 0.8045109680175782, |
|
"train_runtime": 7510.9356, |
|
"train_samples_per_second": 5.326, |
|
"train_steps_per_second": 1.331 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.496412338111517e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|