|
{ |
|
"best_metric": 0.6012922525405884, |
|
"best_model_checkpoint": "./wav2vec2-large-xlsr-53-common_voice-ja-demo-kana-only/checkpoint-2600", |
|
"epoch": 15.0, |
|
"eval_steps": 100, |
|
"global_step": 5640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26595744680851063, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 6.830887317657471, |
|
"eval_runtime": 235.9339, |
|
"eval_samples_per_second": 21.027, |
|
"eval_steps_per_second": 2.632, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"eval_cer": 0.9998627637760951, |
|
"eval_loss": 4.129940986633301, |
|
"eval_runtime": 235.1597, |
|
"eval_samples_per_second": 21.096, |
|
"eval_steps_per_second": 2.641, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7978723404255319, |
|
"eval_cer": 0.9869302678528182, |
|
"eval_loss": 3.993030309677124, |
|
"eval_runtime": 234.1893, |
|
"eval_samples_per_second": 21.184, |
|
"eval_steps_per_second": 2.652, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"eval_cer": 0.5876132198847216, |
|
"eval_loss": 2.0400278568267822, |
|
"eval_runtime": 237.5888, |
|
"eval_samples_per_second": 20.881, |
|
"eval_steps_per_second": 2.614, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"grad_norm": 3.4516775608062744, |
|
"learning_rate": 0.00029759999999999997, |
|
"loss": 7.1041, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"eval_cer": 0.43090559762339153, |
|
"eval_loss": 1.0610116720199585, |
|
"eval_runtime": 241.7004, |
|
"eval_samples_per_second": 20.525, |
|
"eval_steps_per_second": 2.569, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"eval_cer": 0.39546636097970517, |
|
"eval_loss": 0.8836929798126221, |
|
"eval_runtime": 237.4265, |
|
"eval_samples_per_second": 20.895, |
|
"eval_steps_per_second": 2.616, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8617021276595744, |
|
"eval_cer": 0.3790545231444855, |
|
"eval_loss": 0.7705639004707336, |
|
"eval_runtime": 238.309, |
|
"eval_samples_per_second": 20.818, |
|
"eval_steps_per_second": 2.606, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"eval_cer": 0.3815893569272002, |
|
"eval_loss": 0.766192615032196, |
|
"eval_runtime": 235.61, |
|
"eval_samples_per_second": 21.056, |
|
"eval_steps_per_second": 2.636, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.393617021276596, |
|
"eval_cer": 0.37896572323489996, |
|
"eval_loss": 0.7620505094528198, |
|
"eval_runtime": 237.8852, |
|
"eval_samples_per_second": 20.855, |
|
"eval_steps_per_second": 2.611, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 2.764026165008545, |
|
"learning_rate": 0.00027105058365758754, |
|
"loss": 0.803, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"eval_cer": 0.36259424899494647, |
|
"eval_loss": 0.6968890428543091, |
|
"eval_runtime": 238.5042, |
|
"eval_samples_per_second": 20.8, |
|
"eval_steps_per_second": 2.604, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.925531914893617, |
|
"eval_cer": 0.3572904725769734, |
|
"eval_loss": 0.6735507249832153, |
|
"eval_runtime": 242.6929, |
|
"eval_samples_per_second": 20.441, |
|
"eval_steps_per_second": 2.559, |
|
"eval_wer": 1.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"eval_cer": 0.3543762209987568, |
|
"eval_loss": 0.6823433637619019, |
|
"eval_runtime": 250.863, |
|
"eval_samples_per_second": 19.776, |
|
"eval_steps_per_second": 2.475, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.4574468085106385, |
|
"eval_cer": 0.3460209567786622, |
|
"eval_loss": 0.6359558701515198, |
|
"eval_runtime": 237.6152, |
|
"eval_samples_per_second": 20.878, |
|
"eval_steps_per_second": 2.613, |
|
"eval_wer": 1.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.723404255319149, |
|
"eval_cer": 0.34429339490127064, |
|
"eval_loss": 0.6503622531890869, |
|
"eval_runtime": 235.7171, |
|
"eval_samples_per_second": 21.046, |
|
"eval_steps_per_second": 2.635, |
|
"eval_wer": 1.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"grad_norm": 1.9848517179489136, |
|
"learning_rate": 0.00024186770428015562, |
|
"loss": 0.5675, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"eval_cer": 0.34139528876116054, |
|
"eval_loss": 0.6246533393859863, |
|
"eval_runtime": 239.1195, |
|
"eval_samples_per_second": 20.747, |
|
"eval_steps_per_second": 2.597, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"eval_cer": 0.3424851058333468, |
|
"eval_loss": 0.6397080421447754, |
|
"eval_runtime": 242.7125, |
|
"eval_samples_per_second": 20.44, |
|
"eval_steps_per_second": 2.559, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.5212765957446805, |
|
"eval_cer": 0.3439462679819817, |
|
"eval_loss": 0.6589427590370178, |
|
"eval_runtime": 243.7764, |
|
"eval_samples_per_second": 20.351, |
|
"eval_steps_per_second": 2.547, |
|
"eval_wer": 1.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.787234042553192, |
|
"eval_cer": 0.3449472851445824, |
|
"eval_loss": 0.6345330476760864, |
|
"eval_runtime": 237.3086, |
|
"eval_samples_per_second": 20.905, |
|
"eval_steps_per_second": 2.617, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.053191489361702, |
|
"eval_cer": 0.3379805286016436, |
|
"eval_loss": 0.652181088924408, |
|
"eval_runtime": 240.2379, |
|
"eval_samples_per_second": 20.65, |
|
"eval_steps_per_second": 2.585, |
|
"eval_wer": 0.999596855472687, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 2.756148099899292, |
|
"learning_rate": 0.00021268482490272372, |
|
"loss": 0.4421, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"eval_cer": 0.3371732566963205, |
|
"eval_loss": 0.6292724609375, |
|
"eval_runtime": 240.5588, |
|
"eval_samples_per_second": 20.623, |
|
"eval_steps_per_second": 2.581, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.585106382978723, |
|
"eval_cer": 0.3342428596799974, |
|
"eval_loss": 0.6095719337463379, |
|
"eval_runtime": 241.1797, |
|
"eval_samples_per_second": 20.57, |
|
"eval_steps_per_second": 2.575, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.851063829787234, |
|
"eval_cer": 0.3320712982546781, |
|
"eval_loss": 0.6107529401779175, |
|
"eval_runtime": 238.2079, |
|
"eval_samples_per_second": 20.826, |
|
"eval_steps_per_second": 2.607, |
|
"eval_wer": 1.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.117021276595745, |
|
"eval_cer": 0.3354295493808224, |
|
"eval_loss": 0.6200144290924072, |
|
"eval_runtime": 235.9894, |
|
"eval_samples_per_second": 21.022, |
|
"eval_steps_per_second": 2.631, |
|
"eval_wer": 1.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"eval_cer": 0.33407333257987953, |
|
"eval_loss": 0.6413326263427734, |
|
"eval_runtime": 236.6024, |
|
"eval_samples_per_second": 20.968, |
|
"eval_steps_per_second": 2.625, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"grad_norm": 4.183356761932373, |
|
"learning_rate": 0.00018350194552529183, |
|
"loss": 0.3699, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"eval_cer": 0.3359462034002293, |
|
"eval_loss": 0.6303403973579407, |
|
"eval_runtime": 237.052, |
|
"eval_samples_per_second": 20.928, |
|
"eval_steps_per_second": 2.62, |
|
"eval_wer": 0.999596855472687, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.914893617021277, |
|
"eval_cer": 0.3307796632061611, |
|
"eval_loss": 0.6012922525405884, |
|
"eval_runtime": 233.4251, |
|
"eval_samples_per_second": 21.253, |
|
"eval_steps_per_second": 2.66, |
|
"eval_wer": 1.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.180851063829787, |
|
"eval_cer": 0.32859195634273536, |
|
"eval_loss": 0.634281575679779, |
|
"eval_runtime": 232.2994, |
|
"eval_samples_per_second": 21.356, |
|
"eval_steps_per_second": 2.673, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"eval_cer": 0.3260086862457013, |
|
"eval_loss": 0.6208234429359436, |
|
"eval_runtime": 235.1613, |
|
"eval_samples_per_second": 21.096, |
|
"eval_steps_per_second": 2.641, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.712765957446808, |
|
"eval_cer": 0.3287291925666403, |
|
"eval_loss": 0.6095162034034729, |
|
"eval_runtime": 241.6677, |
|
"eval_samples_per_second": 20.528, |
|
"eval_steps_per_second": 2.57, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"grad_norm": 1.701768159866333, |
|
"learning_rate": 0.0001543190661478599, |
|
"loss": 0.3146, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"eval_cer": 0.3265818492984807, |
|
"eval_loss": 0.6058351397514343, |
|
"eval_runtime": 235.659, |
|
"eval_samples_per_second": 21.052, |
|
"eval_steps_per_second": 2.635, |
|
"eval_wer": 0.999596855472687, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.24468085106383, |
|
"eval_cer": 0.3251368325879523, |
|
"eval_loss": 0.6612707376480103, |
|
"eval_runtime": 233.8878, |
|
"eval_samples_per_second": 21.211, |
|
"eval_steps_per_second": 2.655, |
|
"eval_wer": 0.999596855472687, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"eval_cer": 0.3244022151541082, |
|
"eval_loss": 0.6538846492767334, |
|
"eval_runtime": 235.5702, |
|
"eval_samples_per_second": 21.06, |
|
"eval_steps_per_second": 2.636, |
|
"eval_wer": 1.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.77659574468085, |
|
"eval_cer": 0.32638810404120316, |
|
"eval_loss": 0.6331284046173096, |
|
"eval_runtime": 236.8675, |
|
"eval_samples_per_second": 20.944, |
|
"eval_steps_per_second": 2.622, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.042553191489361, |
|
"eval_cer": 0.32281996221967485, |
|
"eval_loss": 0.6436325907707214, |
|
"eval_runtime": 236.1998, |
|
"eval_samples_per_second": 21.003, |
|
"eval_steps_per_second": 2.629, |
|
"eval_wer": 1.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"grad_norm": 1.067522406578064, |
|
"learning_rate": 0.000125136186770428, |
|
"loss": 0.2576, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"eval_cer": 0.32351421605825276, |
|
"eval_loss": 0.6328682899475098, |
|
"eval_runtime": 234.2706, |
|
"eval_samples_per_second": 21.176, |
|
"eval_steps_per_second": 2.651, |
|
"eval_wer": 1.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.574468085106384, |
|
"eval_cer": 0.3196958199460742, |
|
"eval_loss": 0.6314510703086853, |
|
"eval_runtime": 234.3665, |
|
"eval_samples_per_second": 21.168, |
|
"eval_steps_per_second": 2.65, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.840425531914894, |
|
"eval_cer": 0.32033356475127955, |
|
"eval_loss": 0.6280702352523804, |
|
"eval_runtime": 236.442, |
|
"eval_samples_per_second": 20.982, |
|
"eval_steps_per_second": 2.626, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 10.106382978723405, |
|
"eval_cer": 0.3196473836317548, |
|
"eval_loss": 0.6696433424949646, |
|
"eval_runtime": 232.6849, |
|
"eval_samples_per_second": 21.321, |
|
"eval_steps_per_second": 2.669, |
|
"eval_wer": 0.999596855472687, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 10.372340425531915, |
|
"eval_cer": 0.3199218560795647, |
|
"eval_loss": 0.6629988551139832, |
|
"eval_runtime": 233.1379, |
|
"eval_samples_per_second": 21.279, |
|
"eval_steps_per_second": 2.664, |
|
"eval_wer": 0.999596855472687, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 1.6601381301879883, |
|
"learning_rate": 9.595330739299609e-05, |
|
"loss": 0.2201, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"eval_cer": 0.3203093465941198, |
|
"eval_loss": 0.6781216859817505, |
|
"eval_runtime": 232.2432, |
|
"eval_samples_per_second": 21.361, |
|
"eval_steps_per_second": 2.674, |
|
"eval_wer": 1.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.904255319148936, |
|
"eval_cer": 0.31962316547459513, |
|
"eval_loss": 0.6531046628952026, |
|
"eval_runtime": 239.9955, |
|
"eval_samples_per_second": 20.671, |
|
"eval_steps_per_second": 2.588, |
|
"eval_wer": 1.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 11.170212765957446, |
|
"eval_cer": 0.3192598931171997, |
|
"eval_loss": 0.6762946248054504, |
|
"eval_runtime": 235.4369, |
|
"eval_samples_per_second": 21.071, |
|
"eval_steps_per_second": 2.638, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 11.436170212765958, |
|
"eval_cer": 0.3184364757737701, |
|
"eval_loss": 0.6785323023796082, |
|
"eval_runtime": 232.8132, |
|
"eval_samples_per_second": 21.309, |
|
"eval_steps_per_second": 2.667, |
|
"eval_wer": 1.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.702127659574469, |
|
"eval_cer": 0.31793596719246975, |
|
"eval_loss": 0.666408360004425, |
|
"eval_runtime": 232.0593, |
|
"eval_samples_per_second": 21.378, |
|
"eval_steps_per_second": 2.676, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"grad_norm": 0.7692495584487915, |
|
"learning_rate": 6.67704280155642e-05, |
|
"loss": 0.1931, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"eval_cer": 0.31842033033566364, |
|
"eval_loss": 0.6682071089744568, |
|
"eval_runtime": 235.6644, |
|
"eval_samples_per_second": 21.051, |
|
"eval_steps_per_second": 2.635, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.23404255319149, |
|
"eval_cer": 0.3168219319631238, |
|
"eval_loss": 0.6799555420875549, |
|
"eval_runtime": 235.3483, |
|
"eval_samples_per_second": 21.079, |
|
"eval_steps_per_second": 2.639, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_cer": 0.3162245507531847, |
|
"eval_loss": 0.6925452351570129, |
|
"eval_runtime": 235.4523, |
|
"eval_samples_per_second": 21.07, |
|
"eval_steps_per_second": 2.637, |
|
"eval_wer": 1.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 12.76595744680851, |
|
"eval_cer": 0.3144646979995802, |
|
"eval_loss": 0.7046905159950256, |
|
"eval_runtime": 235.1343, |
|
"eval_samples_per_second": 21.099, |
|
"eval_steps_per_second": 2.641, |
|
"eval_wer": 1.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.03191489361702, |
|
"eval_cer": 0.3147230250092836, |
|
"eval_loss": 0.6918711066246033, |
|
"eval_runtime": 238.6326, |
|
"eval_samples_per_second": 20.789, |
|
"eval_steps_per_second": 2.602, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"grad_norm": 1.052759051322937, |
|
"learning_rate": 3.758754863813229e-05, |
|
"loss": 0.1694, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"eval_cer": 0.31423866186608973, |
|
"eval_loss": 0.699897289276123, |
|
"eval_runtime": 256.2343, |
|
"eval_samples_per_second": 19.361, |
|
"eval_steps_per_second": 2.424, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.563829787234042, |
|
"eval_cer": 0.3134394626798198, |
|
"eval_loss": 0.6994884610176086, |
|
"eval_runtime": 236.0872, |
|
"eval_samples_per_second": 21.013, |
|
"eval_steps_per_second": 2.63, |
|
"eval_wer": 1.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 13.829787234042554, |
|
"eval_cer": 0.3133829536464472, |
|
"eval_loss": 0.6916852593421936, |
|
"eval_runtime": 234.7745, |
|
"eval_samples_per_second": 21.131, |
|
"eval_steps_per_second": 2.645, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 14.095744680851064, |
|
"eval_cer": 0.3128501541889339, |
|
"eval_loss": 0.6962644457817078, |
|
"eval_runtime": 235.7153, |
|
"eval_samples_per_second": 21.047, |
|
"eval_steps_per_second": 2.635, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 14.361702127659575, |
|
"eval_cer": 0.3128259360317742, |
|
"eval_loss": 0.6961241960525513, |
|
"eval_runtime": 245.639, |
|
"eval_samples_per_second": 20.196, |
|
"eval_steps_per_second": 2.528, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"grad_norm": 2.195582389831543, |
|
"learning_rate": 8.404669260700388e-06, |
|
"loss": 0.1548, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"eval_cer": 0.31290666322230654, |
|
"eval_loss": 0.6963828802108765, |
|
"eval_runtime": 237.6797, |
|
"eval_samples_per_second": 20.873, |
|
"eval_steps_per_second": 2.613, |
|
"eval_wer": 1.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.893617021276595, |
|
"eval_cer": 0.3126725543697628, |
|
"eval_loss": 0.6983669400215149, |
|
"eval_runtime": 245.8138, |
|
"eval_samples_per_second": 20.182, |
|
"eval_steps_per_second": 2.526, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 5640, |
|
"total_flos": 2.6348448154740388e+19, |
|
"train_loss": 0.9431738045198698, |
|
"train_runtime": 36296.3273, |
|
"train_samples_per_second": 4.972, |
|
"train_steps_per_second": 0.155 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.6348448154740388e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|