|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 8400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5.88e-05, |
|
"loss": 6.951, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.0001188, |
|
"loss": 3.1694, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.00017879999999999998, |
|
"loss": 3.0357, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0002388, |
|
"loss": 2.8568, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.0002988, |
|
"loss": 1.8805, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"eval_loss": 0.7547174692153931, |
|
"eval_runtime": 47.8349, |
|
"eval_samples_per_second": 25.609, |
|
"eval_steps_per_second": 0.815, |
|
"eval_wer": 0.8438120450033091, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.0002962784810126582, |
|
"loss": 1.5197, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.0002924810126582278, |
|
"loss": 1.3887, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.00028868354430379743, |
|
"loss": 1.3124, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.00028488607594936706, |
|
"loss": 1.2433, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 0.0002810886075949367, |
|
"loss": 1.2123, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"eval_loss": 0.47322335839271545, |
|
"eval_runtime": 48.1093, |
|
"eval_samples_per_second": 25.463, |
|
"eval_steps_per_second": 0.811, |
|
"eval_wer": 0.6542025148908008, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 0.0002772911392405063, |
|
"loss": 1.1794, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 0.0002734936708860759, |
|
"loss": 1.1509, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 0.0002696962025316455, |
|
"loss": 1.1197, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 0.00026589873417721515, |
|
"loss": 1.0924, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 0.0002621012658227848, |
|
"loss": 1.0822, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"eval_loss": 0.402739018201828, |
|
"eval_runtime": 47.5558, |
|
"eval_samples_per_second": 25.759, |
|
"eval_steps_per_second": 0.82, |
|
"eval_wer": 0.5777630708140304, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 0.0002583037974683544, |
|
"loss": 1.0552, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 20.24, |
|
"learning_rate": 0.000254506329113924, |
|
"loss": 1.0453, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 0.00025070886075949367, |
|
"loss": 1.0218, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"learning_rate": 0.0002469113924050633, |
|
"loss": 1.024, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 23.81, |
|
"learning_rate": 0.00024311392405063287, |
|
"loss": 0.9938, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 23.81, |
|
"eval_loss": 0.38469693064689636, |
|
"eval_runtime": 47.562, |
|
"eval_samples_per_second": 25.756, |
|
"eval_steps_per_second": 0.82, |
|
"eval_wer": 0.5523935583498787, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.0002393164556962025, |
|
"loss": 0.9897, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 26.19, |
|
"learning_rate": 0.00023551898734177216, |
|
"loss": 0.9857, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 27.38, |
|
"learning_rate": 0.00023172151898734174, |
|
"loss": 0.9537, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 0.0002279240506329114, |
|
"loss": 0.941, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 29.76, |
|
"learning_rate": 0.000224126582278481, |
|
"loss": 0.9383, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 29.76, |
|
"eval_loss": 0.3845490515232086, |
|
"eval_runtime": 47.7826, |
|
"eval_samples_per_second": 25.637, |
|
"eval_steps_per_second": 0.816, |
|
"eval_wer": 0.5204059121994264, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 30.95, |
|
"learning_rate": 0.00022032911392405062, |
|
"loss": 0.9304, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 32.14, |
|
"learning_rate": 0.00021653164556962025, |
|
"loss": 0.925, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.00021273417721518986, |
|
"loss": 0.9119, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 34.52, |
|
"learning_rate": 0.00020893670886075949, |
|
"loss": 0.8988, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"learning_rate": 0.0002051392405063291, |
|
"loss": 0.8932, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"eval_loss": 0.3832888603210449, |
|
"eval_runtime": 47.4524, |
|
"eval_samples_per_second": 25.815, |
|
"eval_steps_per_second": 0.822, |
|
"eval_wer": 0.529671299360247, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"learning_rate": 0.00020134177215189872, |
|
"loss": 0.8709, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 38.1, |
|
"learning_rate": 0.00019754430379746835, |
|
"loss": 0.8767, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 39.29, |
|
"learning_rate": 0.00019374683544303795, |
|
"loss": 0.8677, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 40.48, |
|
"learning_rate": 0.00018994936708860758, |
|
"loss": 0.8642, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.00018615189873417718, |
|
"loss": 0.8495, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"eval_loss": 0.37586063146591187, |
|
"eval_runtime": 47.6167, |
|
"eval_samples_per_second": 25.726, |
|
"eval_steps_per_second": 0.819, |
|
"eval_wer": 0.5036399735274653, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"learning_rate": 0.0001823544303797468, |
|
"loss": 0.8546, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 44.05, |
|
"learning_rate": 0.00017855696202531644, |
|
"loss": 0.8455, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 45.24, |
|
"learning_rate": 0.00017475949367088604, |
|
"loss": 0.8308, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 46.43, |
|
"learning_rate": 0.0001709620253164557, |
|
"loss": 0.8179, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 47.62, |
|
"learning_rate": 0.00016716455696202527, |
|
"loss": 0.8201, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 47.62, |
|
"eval_loss": 0.36158040165901184, |
|
"eval_runtime": 47.7093, |
|
"eval_samples_per_second": 25.676, |
|
"eval_steps_per_second": 0.817, |
|
"eval_wer": 0.48588131480255903, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 48.81, |
|
"learning_rate": 0.00016340506329113924, |
|
"loss": 0.8052, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.00015960759493670884, |
|
"loss": 0.8005, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 51.19, |
|
"learning_rate": 0.00015581012658227847, |
|
"loss": 0.7943, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 52.38, |
|
"learning_rate": 0.00015201265822784808, |
|
"loss": 0.7811, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 53.57, |
|
"learning_rate": 0.0001482151898734177, |
|
"loss": 0.7794, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 53.57, |
|
"eval_loss": 0.38744866847991943, |
|
"eval_runtime": 47.4049, |
|
"eval_samples_per_second": 25.841, |
|
"eval_steps_per_second": 0.823, |
|
"eval_wer": 0.49382307522611957, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 54.76, |
|
"learning_rate": 0.00014441772151898733, |
|
"loss": 0.7793, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 55.95, |
|
"learning_rate": 0.00014062025316455696, |
|
"loss": 0.7729, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"learning_rate": 0.00013682278481012657, |
|
"loss": 0.76, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"learning_rate": 0.0001330253164556962, |
|
"loss": 0.7593, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 59.52, |
|
"learning_rate": 0.0001292278481012658, |
|
"loss": 0.735, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 59.52, |
|
"eval_loss": 0.374796986579895, |
|
"eval_runtime": 47.4755, |
|
"eval_samples_per_second": 25.803, |
|
"eval_steps_per_second": 0.821, |
|
"eval_wer": 0.47816015883520846, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 60.71, |
|
"learning_rate": 0.00012543037974683543, |
|
"loss": 0.7415, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 61.9, |
|
"learning_rate": 0.00012163291139240506, |
|
"loss": 0.7206, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 63.1, |
|
"learning_rate": 0.00011783544303797467, |
|
"loss": 0.7117, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 64.29, |
|
"learning_rate": 0.00011403797468354429, |
|
"loss": 0.7213, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 65.48, |
|
"learning_rate": 0.0001102405063291139, |
|
"loss": 0.7082, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 65.48, |
|
"eval_loss": 0.3615438938140869, |
|
"eval_runtime": 48.0099, |
|
"eval_samples_per_second": 25.516, |
|
"eval_steps_per_second": 0.812, |
|
"eval_wer": 0.4674608427090227, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 0.00010644303797468355, |
|
"loss": 0.705, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 67.86, |
|
"learning_rate": 0.00010264556962025316, |
|
"loss": 0.6942, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 69.05, |
|
"learning_rate": 9.884810126582278e-05, |
|
"loss": 0.6894, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 70.24, |
|
"learning_rate": 9.50506329113924e-05, |
|
"loss": 0.6807, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"learning_rate": 9.129113924050632e-05, |
|
"loss": 0.669, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"eval_loss": 0.3796931207180023, |
|
"eval_runtime": 46.7914, |
|
"eval_samples_per_second": 26.18, |
|
"eval_steps_per_second": 0.833, |
|
"eval_wer": 0.4600705934259872, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 72.62, |
|
"learning_rate": 8.749367088607594e-05, |
|
"loss": 0.6803, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 73.81, |
|
"learning_rate": 8.369620253164557e-05, |
|
"loss": 0.6706, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 7.989873417721518e-05, |
|
"loss": 0.6615, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 76.19, |
|
"learning_rate": 7.61012658227848e-05, |
|
"loss": 0.6541, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 77.38, |
|
"learning_rate": 7.230379746835442e-05, |
|
"loss": 0.6457, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 77.38, |
|
"eval_loss": 0.38117340207099915, |
|
"eval_runtime": 46.7869, |
|
"eval_samples_per_second": 26.183, |
|
"eval_steps_per_second": 0.834, |
|
"eval_wer": 0.4514670196337966, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 78.57, |
|
"learning_rate": 6.850632911392405e-05, |
|
"loss": 0.6379, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 79.76, |
|
"learning_rate": 6.470886075949366e-05, |
|
"loss": 0.6387, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 80.95, |
|
"learning_rate": 6.091139240506329e-05, |
|
"loss": 0.6186, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 82.14, |
|
"learning_rate": 5.711392405063291e-05, |
|
"loss": 0.6095, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 5.331645569620252e-05, |
|
"loss": 0.6098, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_loss": 0.3660217821598053, |
|
"eval_runtime": 46.6679, |
|
"eval_samples_per_second": 26.249, |
|
"eval_steps_per_second": 0.836, |
|
"eval_wer": 0.4342598720494154, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 84.52, |
|
"learning_rate": 4.9518987341772145e-05, |
|
"loss": 0.6124, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 85.71, |
|
"learning_rate": 4.572151898734177e-05, |
|
"loss": 0.5954, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 86.9, |
|
"learning_rate": 4.192405063291139e-05, |
|
"loss": 0.5993, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 88.1, |
|
"learning_rate": 3.812658227848101e-05, |
|
"loss": 0.5861, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 89.29, |
|
"learning_rate": 3.432911392405063e-05, |
|
"loss": 0.5874, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 89.29, |
|
"eval_loss": 0.3640037775039673, |
|
"eval_runtime": 47.0, |
|
"eval_samples_per_second": 26.064, |
|
"eval_steps_per_second": 0.83, |
|
"eval_wer": 0.4256562982572248, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 90.48, |
|
"learning_rate": 3.053164556962025e-05, |
|
"loss": 0.5818, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"learning_rate": 2.673417721518987e-05, |
|
"loss": 0.5755, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 92.86, |
|
"learning_rate": 2.293670886075949e-05, |
|
"loss": 0.5837, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 94.05, |
|
"learning_rate": 1.9139240506329114e-05, |
|
"loss": 0.5727, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 95.24, |
|
"learning_rate": 1.5341772151898733e-05, |
|
"loss": 0.5627, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 95.24, |
|
"eval_loss": 0.3660779297351837, |
|
"eval_runtime": 46.6417, |
|
"eval_samples_per_second": 26.264, |
|
"eval_steps_per_second": 0.836, |
|
"eval_wer": 0.4238914626075447, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 96.43, |
|
"learning_rate": 1.1582278481012656e-05, |
|
"loss": 0.5602, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 97.62, |
|
"learning_rate": 7.784810126582278e-06, |
|
"loss": 0.5564, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 98.81, |
|
"learning_rate": 3.987341772151899e-06, |
|
"loss": 0.5714, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 1.8987341772151897e-07, |
|
"loss": 0.5538, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 8400, |
|
"total_flos": 4.26333780649278e+19, |
|
"train_loss": 0.9825477345784506, |
|
"train_runtime": 16451.1257, |
|
"train_samples_per_second": 16.327, |
|
"train_steps_per_second": 0.511 |
|
} |
|
], |
|
"max_steps": 8400, |
|
"num_train_epochs": 100, |
|
"total_flos": 4.26333780649278e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|