|
{ |
|
"best_metric": 23.35348393254852, |
|
"best_model_checkpoint": "whisper3/checkpoint-240", |
|
"epoch": 8.333333333333334, |
|
"eval_steps": 10, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1388888888888889, |
|
"grad_norm": 46.06148147583008, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 3.9402, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 43.4765625, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 3.8281, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"eval_loss": 3.7929115295410156, |
|
"eval_runtime": 253.0403, |
|
"eval_samples_per_second": 1.976, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 80.40089086859689, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 40.57815933227539, |
|
"learning_rate": 3e-06, |
|
"loss": 3.5929, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 39.72583770751953, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 3.209, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"eval_loss": 3.0014312267303467, |
|
"eval_runtime": 246.2101, |
|
"eval_samples_per_second": 2.031, |
|
"eval_steps_per_second": 0.256, |
|
"eval_wer": 68.37416481069042, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6944444444444444, |
|
"grad_norm": 39.53627395629883, |
|
"learning_rate": 5e-06, |
|
"loss": 2.7486, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 30.079750061035156, |
|
"learning_rate": 6e-06, |
|
"loss": 2.1066, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"eval_loss": 1.761271595954895, |
|
"eval_runtime": 245.5315, |
|
"eval_samples_per_second": 2.036, |
|
"eval_steps_per_second": 0.257, |
|
"eval_wer": 63.91982182628062, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.9722222222222222, |
|
"grad_norm": 19.831071853637695, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 1.5134, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 9.755999565124512, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.9963, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"eval_loss": 0.8740884065628052, |
|
"eval_runtime": 246.6146, |
|
"eval_samples_per_second": 2.027, |
|
"eval_steps_per_second": 0.255, |
|
"eval_wer": 52.43398027362392, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 6.842897891998291, |
|
"learning_rate": 9e-06, |
|
"loss": 0.786, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 5.720729351043701, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6922, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"eval_loss": 0.7008740901947021, |
|
"eval_runtime": 245.5713, |
|
"eval_samples_per_second": 2.036, |
|
"eval_steps_per_second": 0.257, |
|
"eval_wer": 35.82564428889596, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.5277777777777777, |
|
"grad_norm": 4.806775093078613, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.6427, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 5.128376483917236, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.5816, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"eval_loss": 0.6238442659378052, |
|
"eval_runtime": 245.679, |
|
"eval_samples_per_second": 2.035, |
|
"eval_steps_per_second": 0.256, |
|
"eval_wer": 31.148584155265667, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.8055555555555556, |
|
"grad_norm": 4.993675231933594, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.5805, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 4.856825351715088, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.5684, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"eval_loss": 0.5697694420814514, |
|
"eval_runtime": 245.5413, |
|
"eval_samples_per_second": 2.036, |
|
"eval_steps_per_second": 0.257, |
|
"eval_wer": 35.47566019726376, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 4.464582443237305, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.4534, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 4.251033306121826, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.427, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"eval_loss": 0.5380394458770752, |
|
"eval_runtime": 244.4819, |
|
"eval_samples_per_second": 2.045, |
|
"eval_steps_per_second": 0.258, |
|
"eval_wer": 27.266942411708563, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.361111111111111, |
|
"grad_norm": 4.489510536193848, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.3929, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 4.552371025085449, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.4395, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.5162410140037537, |
|
"eval_runtime": 245.2373, |
|
"eval_samples_per_second": 2.039, |
|
"eval_steps_per_second": 0.257, |
|
"eval_wer": 32.73942093541203, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.638888888888889, |
|
"grad_norm": 4.691618919372559, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.3825, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 4.219367027282715, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3861, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"eval_loss": 0.495292991399765, |
|
"eval_runtime": 243.4193, |
|
"eval_samples_per_second": 2.054, |
|
"eval_steps_per_second": 0.259, |
|
"eval_wer": 24.530703149856826, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.9166666666666665, |
|
"grad_norm": 4.323045253753662, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.3669, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 3.0555555555555554, |
|
"grad_norm": 3.2159509658813477, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.3745, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.0555555555555554, |
|
"eval_loss": 0.4837464392185211, |
|
"eval_runtime": 244.5759, |
|
"eval_samples_per_second": 2.044, |
|
"eval_steps_per_second": 0.258, |
|
"eval_wer": 24.626153356665608, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.1944444444444446, |
|
"grad_norm": 3.675457000732422, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.257, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 2.8939876556396484, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.2487, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"eval_loss": 0.4732927978038788, |
|
"eval_runtime": 244.6891, |
|
"eval_samples_per_second": 2.043, |
|
"eval_steps_per_second": 0.257, |
|
"eval_wer": 23.57620108176901, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.4722222222222223, |
|
"grad_norm": 3.4589827060699463, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.253, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 3.611111111111111, |
|
"grad_norm": 3.1798577308654785, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.2343, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.611111111111111, |
|
"eval_loss": 0.46519017219543457, |
|
"eval_runtime": 244.3925, |
|
"eval_samples_per_second": 2.046, |
|
"eval_steps_per_second": 0.258, |
|
"eval_wer": 24.94432071269488, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 4.061887741088867, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.2354, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"grad_norm": 4.474591255187988, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.2429, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"eval_loss": 0.4581267833709717, |
|
"eval_runtime": 244.836, |
|
"eval_samples_per_second": 2.042, |
|
"eval_steps_per_second": 0.257, |
|
"eval_wer": 24.085268851415844, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.027777777777778, |
|
"grad_norm": 2.3235318660736084, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.2728, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 2.3824808597564697, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1286, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"eval_loss": 0.46725359559059143, |
|
"eval_runtime": 245.6982, |
|
"eval_samples_per_second": 2.035, |
|
"eval_steps_per_second": 0.256, |
|
"eval_wer": 24.276169265033406, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.305555555555555, |
|
"grad_norm": 2.5686404705047607, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.1301, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 2.7436068058013916, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.1304, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"eval_loss": 0.46984970569610596, |
|
"eval_runtime": 245.0991, |
|
"eval_samples_per_second": 2.04, |
|
"eval_steps_per_second": 0.257, |
|
"eval_wer": 31.72128539611836, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.583333333333333, |
|
"grad_norm": 2.83823823928833, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.1408, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 4.722222222222222, |
|
"grad_norm": 2.7204811573028564, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.1361, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.722222222222222, |
|
"eval_loss": 0.4690161943435669, |
|
"eval_runtime": 246.5232, |
|
"eval_samples_per_second": 2.028, |
|
"eval_steps_per_second": 0.256, |
|
"eval_wer": 33.08940502704423, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.861111111111111, |
|
"grad_norm": 3.671097993850708, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1511, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 6.484060764312744, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.1447, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.4811546802520752, |
|
"eval_runtime": 244.9356, |
|
"eval_samples_per_second": 2.041, |
|
"eval_steps_per_second": 0.257, |
|
"eval_wer": 24.657970092268535, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.138888888888889, |
|
"grad_norm": 1.9667352437973022, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.063, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 5.277777777777778, |
|
"grad_norm": 2.1828482151031494, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.0617, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.277777777777778, |
|
"eval_loss": 0.48713362216949463, |
|
"eval_runtime": 244.9851, |
|
"eval_samples_per_second": 2.041, |
|
"eval_steps_per_second": 0.257, |
|
"eval_wer": 29.939548202354437, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.416666666666667, |
|
"grad_norm": 1.8774911165237427, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.0606, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 1.8562583923339844, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0617, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"eval_loss": 0.488438218832016, |
|
"eval_runtime": 244.9014, |
|
"eval_samples_per_second": 2.042, |
|
"eval_steps_per_second": 0.257, |
|
"eval_wer": 24.848870505886094, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.694444444444445, |
|
"grad_norm": 1.9106348752975464, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.0617, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 5.833333333333333, |
|
"grad_norm": 1.8114972114562988, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.0577, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.833333333333333, |
|
"eval_loss": 0.4998014569282532, |
|
"eval_runtime": 244.1029, |
|
"eval_samples_per_second": 2.048, |
|
"eval_steps_per_second": 0.258, |
|
"eval_wer": 26.853324848870507, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.972222222222222, |
|
"grad_norm": 5.00437593460083, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.078, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 6.111111111111111, |
|
"grad_norm": 1.4013047218322754, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.038, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.111111111111111, |
|
"eval_loss": 0.500673770904541, |
|
"eval_runtime": 247.5538, |
|
"eval_samples_per_second": 2.02, |
|
"eval_steps_per_second": 0.254, |
|
"eval_wer": 24.848870505886094, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 1.4778488874435425, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0243, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 6.388888888888889, |
|
"grad_norm": 1.3681198358535767, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.0269, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.388888888888889, |
|
"eval_loss": 0.5122880935668945, |
|
"eval_runtime": 243.6648, |
|
"eval_samples_per_second": 2.052, |
|
"eval_steps_per_second": 0.259, |
|
"eval_wer": 27.139675469296847, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.527777777777778, |
|
"grad_norm": 1.450726866722107, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.0297, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 1.4052125215530396, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.0321, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"eval_loss": 0.500522792339325, |
|
"eval_runtime": 247.602, |
|
"eval_samples_per_second": 2.019, |
|
"eval_steps_per_second": 0.254, |
|
"eval_wer": 23.35348393254852, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.805555555555555, |
|
"grad_norm": 1.2223644256591797, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.0291, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 6.944444444444445, |
|
"grad_norm": 1.463398814201355, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0296, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.944444444444445, |
|
"eval_loss": 0.5332342386245728, |
|
"eval_runtime": 246.3422, |
|
"eval_samples_per_second": 2.03, |
|
"eval_steps_per_second": 0.256, |
|
"eval_wer": 31.880369074132993, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.083333333333333, |
|
"grad_norm": 4.257472991943359, |
|
"learning_rate": 5.1000000000000006e-05, |
|
"loss": 0.027, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 7.222222222222222, |
|
"grad_norm": 2.294562339782715, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 0.0207, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 7.222222222222222, |
|
"eval_loss": 0.5236981511116028, |
|
"eval_runtime": 244.1894, |
|
"eval_samples_per_second": 2.048, |
|
"eval_steps_per_second": 0.258, |
|
"eval_wer": 30.066815144766146, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 7.361111111111111, |
|
"grad_norm": 1.2468712329864502, |
|
"learning_rate": 5.300000000000001e-05, |
|
"loss": 0.0228, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 1.8487240076065063, |
|
"learning_rate": 5.4000000000000005e-05, |
|
"loss": 0.0215, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 0.5222529768943787, |
|
"eval_runtime": 243.6778, |
|
"eval_samples_per_second": 2.052, |
|
"eval_steps_per_second": 0.259, |
|
"eval_wer": 25.548838689150493, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.638888888888889, |
|
"grad_norm": 1.1909741163253784, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 0.0201, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"grad_norm": 1.6141778230667114, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 0.0198, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"eval_loss": 0.5157026648521423, |
|
"eval_runtime": 244.0734, |
|
"eval_samples_per_second": 2.049, |
|
"eval_steps_per_second": 0.258, |
|
"eval_wer": 30.194082087177854, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.916666666666667, |
|
"grad_norm": 1.1372332572937012, |
|
"learning_rate": 5.6999999999999996e-05, |
|
"loss": 0.0193, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 8.055555555555555, |
|
"grad_norm": 2.210016965866089, |
|
"learning_rate": 5.8e-05, |
|
"loss": 0.0273, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 8.055555555555555, |
|
"eval_loss": 0.5289562940597534, |
|
"eval_runtime": 243.9152, |
|
"eval_samples_per_second": 2.05, |
|
"eval_steps_per_second": 0.258, |
|
"eval_wer": 27.553293032134903, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 8.194444444444445, |
|
"grad_norm": 1.942575454711914, |
|
"learning_rate": 5.9e-05, |
|
"loss": 0.0201, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 1.3640440702438354, |
|
"learning_rate": 6e-05, |
|
"loss": 0.0197, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"eval_loss": 0.5509196519851685, |
|
"eval_runtime": 243.9508, |
|
"eval_samples_per_second": 2.05, |
|
"eval_steps_per_second": 0.258, |
|
"eval_wer": 26.948775055679285, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"step": 300, |
|
"total_flos": 9.2409447186432e+17, |
|
"train_loss": 0.5431244759509961, |
|
"train_runtime": 10016.0212, |
|
"train_samples_per_second": 3.834, |
|
"train_steps_per_second": 0.03 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 10, |
|
"total_flos": 9.2409447186432e+17, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|