{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.12499782989878648, "eval_steps": 1000, "global_step": 7200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.736080970816479e-05, "grad_norm": 10.5625, "learning_rate": 2e-06, "loss": 1.0, "step": 1 }, { "epoch": 0.001736080970816479, "grad_norm": 0.1513671875, "learning_rate": 0.0002, "loss": 0.3311, "step": 100 }, { "epoch": 0.003472161941632958, "grad_norm": 0.16796875, "learning_rate": 0.0004, "loss": 0.2169, "step": 200 }, { "epoch": 0.005208242912449436, "grad_norm": 0.10400390625, "learning_rate": 0.0006, "loss": 0.2032, "step": 300 }, { "epoch": 0.006944323883265916, "grad_norm": 0.11279296875, "learning_rate": 0.0008, "loss": 0.188, "step": 400 }, { "epoch": 0.008680404854082394, "grad_norm": 0.10107421875, "learning_rate": 0.001, "loss": 0.1758, "step": 500 }, { "epoch": 0.010416485824898873, "grad_norm": 0.09521484375, "learning_rate": 0.0012, "loss": 0.1637, "step": 600 }, { "epoch": 0.012152566795715351, "grad_norm": 0.08154296875, "learning_rate": 0.0014, "loss": 0.1518, "step": 700 }, { "epoch": 0.013888647766531832, "grad_norm": 0.08642578125, "learning_rate": 0.0016, "loss": 0.1485, "step": 800 }, { "epoch": 0.01562472873734831, "grad_norm": 0.1044921875, "learning_rate": 0.0018000000000000002, "loss": 0.1433, "step": 900 }, { "epoch": 0.01736080970816479, "grad_norm": 0.05419921875, "learning_rate": 0.002, "loss": 0.139, "step": 1000 }, { "epoch": 0.01736080970816479, "eval_covost2-en-de_loss": 1.896493673324585, "eval_covost2-en-de_model_preparation_time": 0.0057, "eval_covost2-en-de_runtime": 9.8697, "eval_covost2-en-de_samples_per_second": 6.485, "eval_covost2-en-de_steps_per_second": 0.811, "step": 1000 }, { "epoch": 0.01736080970816479, "eval_covost2-zh-en_loss": 3.1452860832214355, "eval_covost2-zh-en_model_preparation_time": 0.0057, "eval_covost2-zh-en_runtime": 8.3732, "eval_covost2-zh-en_samples_per_second": 7.643, "eval_covost2-zh-en_steps_per_second": 0.955, "step": 1000 }, { "epoch": 0.01736080970816479, "eval_peoplespeech-clean-transcription_loss": 3.2206106185913086, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, "eval_peoplespeech-clean-transcription_runtime": 9.6941, "eval_peoplespeech-clean-transcription_samples_per_second": 6.602, "eval_peoplespeech-clean-transcription_steps_per_second": 0.825, "step": 1000 }, { "epoch": 0.01909689067898127, "grad_norm": 0.059814453125, "learning_rate": 0.001999725185109816, "loss": 0.1334, "step": 1100 }, { "epoch": 0.020832971649797746, "grad_norm": 0.07373046875, "learning_rate": 0.0019989008914857113, "loss": 0.1288, "step": 1200 }, { "epoch": 0.022569052620614226, "grad_norm": 0.049560546875, "learning_rate": 0.00199752757218401, "loss": 0.1262, "step": 1300 }, { "epoch": 0.024305133591430703, "grad_norm": 0.0517578125, "learning_rate": 0.001995605982021898, "loss": 0.1222, "step": 1400 }, { "epoch": 0.026041214562247183, "grad_norm": 0.058349609375, "learning_rate": 0.0019931371771625545, "loss": 0.1193, "step": 1500 }, { "epoch": 0.027777295533063663, "grad_norm": 0.0498046875, "learning_rate": 0.001990122514534651, "loss": 0.1196, "step": 1600 }, { "epoch": 0.02951337650388014, "grad_norm": 0.05517578125, "learning_rate": 0.0019865636510865464, "loss": 0.115, "step": 1700 }, { "epoch": 0.03124945747469662, "grad_norm": 0.044677734375, "learning_rate": 0.001982462542875576, "loss": 0.115, "step": 1800 }, { "epoch": 0.0329855384455131, "grad_norm": 0.05419921875, "learning_rate": 0.001977821443992945, "loss": 0.1125, "step": 1900 }, { "epoch": 0.03472161941632958, "grad_norm": 0.047119140625, "learning_rate": 0.001972642905324813, "loss": 0.1094, "step": 2000 }, { "epoch": 0.03472161941632958, "eval_covost2-en-de_loss": 1.6700351238250732, "eval_covost2-en-de_model_preparation_time": 0.0057, "eval_covost2-en-de_runtime": 8.1279, "eval_covost2-en-de_samples_per_second": 7.874, "eval_covost2-en-de_steps_per_second": 0.984, "step": 2000 }, { "epoch": 0.03472161941632958, "eval_covost2-zh-en_loss": 3.093877077102661, "eval_covost2-zh-en_model_preparation_time": 0.0057, "eval_covost2-zh-en_runtime": 8.1488, "eval_covost2-zh-en_samples_per_second": 7.854, "eval_covost2-zh-en_steps_per_second": 0.982, "step": 2000 }, { "epoch": 0.03472161941632958, "eval_peoplespeech-clean-transcription_loss": 2.478968620300293, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, "eval_peoplespeech-clean-transcription_runtime": 9.5507, "eval_peoplespeech-clean-transcription_samples_per_second": 6.701, "eval_peoplespeech-clean-transcription_steps_per_second": 0.838, "step": 2000 }, { "epoch": 0.036457700387146054, "grad_norm": 0.048583984375, "learning_rate": 0.0019669297731502505, "loss": 0.1077, "step": 2100 }, { "epoch": 0.03819378135796254, "grad_norm": 0.054443359375, "learning_rate": 0.00196068518757684, "loss": 0.1069, "step": 2200 }, { "epoch": 0.039929862328779014, "grad_norm": 0.047119140625, "learning_rate": 0.001953912580814779, "loss": 0.1043, "step": 2300 }, { "epoch": 0.04166594329959549, "grad_norm": 0.044921875, "learning_rate": 0.0019466156752904343, "loss": 0.1035, "step": 2400 }, { "epoch": 0.043402024270411975, "grad_norm": 0.050537109375, "learning_rate": 0.0019387984816003866, "loss": 0.1033, "step": 2500 }, { "epoch": 0.04513810524122845, "grad_norm": 0.056396484375, "learning_rate": 0.0019304652963070869, "loss": 0.102, "step": 2600 }, { "epoch": 0.04687418621204493, "grad_norm": 0.046875, "learning_rate": 0.0019216206995773372, "loss": 0.0998, "step": 2700 }, { "epoch": 0.048610267182861405, "grad_norm": 0.042236328125, "learning_rate": 0.0019122695526648968, "loss": 0.1002, "step": 2800 }, { "epoch": 0.05034634815367789, "grad_norm": 0.04638671875, "learning_rate": 0.0019024169952385887, "loss": 0.0978, "step": 2900 }, { "epoch": 0.052082429124494366, "grad_norm": 0.05126953125, "learning_rate": 0.0018920684425573864, "loss": 0.097, "step": 3000 }, { "epoch": 0.052082429124494366, "eval_covost2-en-de_loss": 1.749150276184082, "eval_covost2-en-de_model_preparation_time": 0.0057, "eval_covost2-en-de_runtime": 8.1948, "eval_covost2-en-de_samples_per_second": 7.81, "eval_covost2-en-de_steps_per_second": 0.976, "step": 3000 }, { "epoch": 0.052082429124494366, "eval_covost2-zh-en_loss": 3.198117971420288, "eval_covost2-zh-en_model_preparation_time": 0.0057, "eval_covost2-zh-en_runtime": 8.1979, "eval_covost2-zh-en_samples_per_second": 7.807, "eval_covost2-zh-en_steps_per_second": 0.976, "step": 3000 }, { "epoch": 0.052082429124494366, "eval_peoplespeech-clean-transcription_loss": 2.345036506652832, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, "eval_peoplespeech-clean-transcription_runtime": 11.4402, "eval_peoplespeech-clean-transcription_samples_per_second": 5.594, "eval_peoplespeech-clean-transcription_steps_per_second": 0.699, "step": 3000 }, { "epoch": 0.05381851009531084, "grad_norm": 0.06494140625, "learning_rate": 0.0018812295824940284, "loss": 0.0955, "step": 3100 }, { "epoch": 0.055554591066127326, "grad_norm": 0.044677734375, "learning_rate": 0.0018699063724087904, "loss": 0.0951, "step": 3200 }, { "epoch": 0.0572906720369438, "grad_norm": 0.0390625, "learning_rate": 0.0018581050358751443, "loss": 0.0947, "step": 3300 }, { "epoch": 0.05902675300776028, "grad_norm": 0.056396484375, "learning_rate": 0.0018458320592590974, "loss": 0.0939, "step": 3400 }, { "epoch": 0.060762833978576763, "grad_norm": 0.047119140625, "learning_rate": 0.0018330941881540914, "loss": 0.0941, "step": 3500 }, { "epoch": 0.06249891494939324, "grad_norm": 0.046630859375, "learning_rate": 0.0018198984236734246, "loss": 0.0927, "step": 3600 }, { "epoch": 0.06423499592020972, "grad_norm": 0.055419921875, "learning_rate": 0.0018062520186022297, "loss": 0.0948, "step": 3700 }, { "epoch": 0.0659710768910262, "grad_norm": 0.046142578125, "learning_rate": 0.0017921624734111292, "loss": 0.09, "step": 3800 }, { "epoch": 0.06770715786184267, "grad_norm": 0.04736328125, "learning_rate": 0.001777637532133752, "loss": 0.0926, "step": 3900 }, { "epoch": 0.06944323883265915, "grad_norm": 0.048828125, "learning_rate": 0.0017626851781103819, "loss": 0.0906, "step": 4000 }, { "epoch": 0.06944323883265915, "eval_covost2-en-de_loss": 1.7936017513275146, "eval_covost2-en-de_model_preparation_time": 0.0057, "eval_covost2-en-de_runtime": 8.0356, "eval_covost2-en-de_samples_per_second": 7.965, "eval_covost2-en-de_steps_per_second": 0.996, "step": 4000 }, { "epoch": 0.06944323883265915, "eval_covost2-zh-en_loss": 3.2699265480041504, "eval_covost2-zh-en_model_preparation_time": 0.0057, "eval_covost2-zh-en_runtime": 9.5779, "eval_covost2-zh-en_samples_per_second": 6.682, "eval_covost2-zh-en_steps_per_second": 0.835, "step": 4000 }, { "epoch": 0.06944323883265915, "eval_peoplespeech-clean-transcription_loss": 2.3380110263824463, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, "eval_peoplespeech-clean-transcription_runtime": 9.5943, "eval_peoplespeech-clean-transcription_samples_per_second": 6.671, "eval_peoplespeech-clean-transcription_steps_per_second": 0.834, "step": 4000 }, { "epoch": 0.07117931980347564, "grad_norm": 0.041259765625, "learning_rate": 0.001747313629600077, "loss": 0.0926, "step": 4100 }, { "epoch": 0.07291540077429211, "grad_norm": 0.05322265625, "learning_rate": 0.001731531335263669, "loss": 0.0907, "step": 4200 }, { "epoch": 0.07465148174510859, "grad_norm": 0.05126953125, "learning_rate": 0.0017153469695201276, "loss": 0.0898, "step": 4300 }, { "epoch": 0.07638756271592508, "grad_norm": 0.061767578125, "learning_rate": 0.0016987694277788418, "loss": 0.0876, "step": 4400 }, { "epoch": 0.07812364368674155, "grad_norm": 0.042724609375, "learning_rate": 0.001681807821550438, "loss": 0.0874, "step": 4500 }, { "epoch": 0.07985972465755803, "grad_norm": 0.05126953125, "learning_rate": 0.0016644714734388218, "loss": 0.0865, "step": 4600 }, { "epoch": 0.08159580562837451, "grad_norm": 0.042724609375, "learning_rate": 0.0016467699120171987, "loss": 0.0866, "step": 4700 }, { "epoch": 0.08333188659919098, "grad_norm": 0.0419921875, "learning_rate": 0.001628712866590885, "loss": 0.0864, "step": 4800 }, { "epoch": 0.08506796757000747, "grad_norm": 0.051513671875, "learning_rate": 0.0016103102618497923, "loss": 0.0862, "step": 4900 }, { "epoch": 0.08680404854082395, "grad_norm": 0.052734375, "learning_rate": 0.0015915722124135226, "loss": 0.0855, "step": 5000 }, { "epoch": 0.08680404854082395, "eval_covost2-en-de_loss": 1.7862941026687622, "eval_covost2-en-de_model_preparation_time": 0.0057, "eval_covost2-en-de_runtime": 8.2861, "eval_covost2-en-de_samples_per_second": 7.724, "eval_covost2-en-de_steps_per_second": 0.965, "step": 5000 }, { "epoch": 0.08680404854082395, "eval_covost2-zh-en_loss": 3.33290433883667, "eval_covost2-zh-en_model_preparation_time": 0.0057, "eval_covost2-zh-en_runtime": 8.4063, "eval_covost2-zh-en_samples_per_second": 7.613, "eval_covost2-zh-en_steps_per_second": 0.952, "step": 5000 }, { "epoch": 0.08680404854082395, "eval_peoplespeech-clean-transcription_loss": 2.2601113319396973, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, "eval_peoplespeech-clean-transcription_runtime": 9.4946, "eval_peoplespeech-clean-transcription_samples_per_second": 6.741, "eval_peoplespeech-clean-transcription_steps_per_second": 0.843, "step": 5000 }, { "epoch": 0.08854012951164042, "grad_norm": 0.053466796875, "learning_rate": 0.001572509017272072, "loss": 0.0872, "step": 5100 }, { "epoch": 0.0902762104824569, "grad_norm": 0.044189453125, "learning_rate": 0.0015531311541251993, "loss": 0.0859, "step": 5200 }, { "epoch": 0.09201229145327339, "grad_norm": 0.052978515625, "learning_rate": 0.0015334492736235703, "loss": 0.085, "step": 5300 }, { "epoch": 0.09374837242408986, "grad_norm": 0.04833984375, "learning_rate": 0.0015134741935148419, "loss": 0.0844, "step": 5400 }, { "epoch": 0.09548445339490634, "grad_norm": 0.047119140625, "learning_rate": 0.0014932168926979072, "loss": 0.0844, "step": 5500 }, { "epoch": 0.09722053436572281, "grad_norm": 0.05029296875, "learning_rate": 0.0014726885051885652, "loss": 0.0856, "step": 5600 }, { "epoch": 0.0989566153365393, "grad_norm": 0.049560546875, "learning_rate": 0.0014519003139999338, "loss": 0.0841, "step": 5700 }, { "epoch": 0.10069269630735578, "grad_norm": 0.056884765625, "learning_rate": 0.0014308637449409706, "loss": 0.0841, "step": 5800 }, { "epoch": 0.10242877727817225, "grad_norm": 0.041015625, "learning_rate": 0.0014095903603365066, "loss": 0.0825, "step": 5900 }, { "epoch": 0.10416485824898873, "grad_norm": 0.048583984375, "learning_rate": 0.0013880918526722496, "loss": 0.0828, "step": 6000 }, { "epoch": 0.10416485824898873, "eval_covost2-en-de_loss": 1.8097732067108154, "eval_covost2-en-de_model_preparation_time": 0.0057, "eval_covost2-en-de_runtime": 8.2052, "eval_covost2-en-de_samples_per_second": 7.8, "eval_covost2-en-de_steps_per_second": 0.975, "step": 6000 }, { "epoch": 0.10416485824898873, "eval_covost2-zh-en_loss": 3.331326961517334, "eval_covost2-zh-en_model_preparation_time": 0.0057, "eval_covost2-zh-en_runtime": 8.2653, "eval_covost2-zh-en_samples_per_second": 7.743, "eval_covost2-zh-en_steps_per_second": 0.968, "step": 6000 }, { "epoch": 0.10416485824898873, "eval_peoplespeech-clean-transcription_loss": 2.250232219696045, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, "eval_peoplespeech-clean-transcription_runtime": 9.4708, "eval_peoplespeech-clean-transcription_samples_per_second": 6.758, "eval_peoplespeech-clean-transcription_steps_per_second": 0.845, "step": 6000 }, { "epoch": 0.10590093921980522, "grad_norm": 0.04443359375, "learning_rate": 0.0013663800381682463, "loss": 0.0819, "step": 6100 }, { "epoch": 0.10763702019062169, "grad_norm": 0.05419921875, "learning_rate": 0.0013444668502843329, "loss": 0.08, "step": 6200 }, { "epoch": 0.10937310116143817, "grad_norm": 0.0478515625, "learning_rate": 0.0013223643331611537, "loss": 0.0805, "step": 6300 }, { "epoch": 0.11110918213225465, "grad_norm": 0.051513671875, "learning_rate": 0.001300084635000341, "loss": 0.0799, "step": 6400 }, { "epoch": 0.11284526310307112, "grad_norm": 0.0498046875, "learning_rate": 0.0012776400013875004, "loss": 0.0807, "step": 6500 }, { "epoch": 0.1145813440738876, "grad_norm": 0.050537109375, "learning_rate": 0.0012550427685616766, "loss": 0.0799, "step": 6600 }, { "epoch": 0.11631742504470409, "grad_norm": 0.05029296875, "learning_rate": 0.0012323053566349834, "loss": 0.0802, "step": 6700 }, { "epoch": 0.11805350601552056, "grad_norm": 0.047119140625, "learning_rate": 0.0012094402627661448, "loss": 0.0796, "step": 6800 }, { "epoch": 0.11978958698633704, "grad_norm": 0.044677734375, "learning_rate": 0.0011864600542916813, "loss": 0.0784, "step": 6900 }, { "epoch": 0.12152566795715353, "grad_norm": 0.0478515625, "learning_rate": 0.0011633773618185302, "loss": 0.0808, "step": 7000 }, { "epoch": 0.12152566795715353, "eval_covost2-en-de_loss": 1.7786378860473633, "eval_covost2-en-de_model_preparation_time": 0.0057, "eval_covost2-en-de_runtime": 8.0291, "eval_covost2-en-de_samples_per_second": 7.971, "eval_covost2-en-de_steps_per_second": 0.996, "step": 7000 }, { "epoch": 0.12152566795715353, "eval_covost2-zh-en_loss": 3.273571252822876, "eval_covost2-zh-en_model_preparation_time": 0.0057, "eval_covost2-zh-en_runtime": 8.3234, "eval_covost2-zh-en_samples_per_second": 7.689, "eval_covost2-zh-en_steps_per_second": 0.961, "step": 7000 }, { "epoch": 0.12152566795715353, "eval_peoplespeech-clean-transcription_loss": 2.2290830612182617, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0057, "eval_peoplespeech-clean-transcription_runtime": 9.7693, "eval_peoplespeech-clean-transcription_samples_per_second": 6.551, "eval_peoplespeech-clean-transcription_steps_per_second": 0.819, "step": 7000 }, { "epoch": 0.12326174892797, "grad_norm": 0.0478515625, "learning_rate": 0.0011402048722818862, "loss": 0.0786, "step": 7100 }, { "epoch": 0.12499782989878648, "grad_norm": 0.049560546875, "learning_rate": 0.0011169553219720827, "loss": 0.0795, "step": 7200 } ], "logging_steps": 100, "max_steps": 14400, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3600, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.3101190265194086e+17, "train_batch_size": 24, "trial_name": null, "trial_params": null }