|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.196406364440918, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5417, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7543859649122807, |
|
"eval_f1": 0.6730602006688964, |
|
"eval_loss": 0.47316503524780273, |
|
"eval_precision": 0.7027985359158151, |
|
"eval_recall": 0.6612111292962357, |
|
"eval_runtime": 1.7797, |
|
"eval_samples_per_second": 224.194, |
|
"eval_steps_per_second": 28.094, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.380220651626587, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4395, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7919799498746867, |
|
"eval_f1": 0.7705259948585406, |
|
"eval_loss": 0.4128379225730896, |
|
"eval_precision": 0.7612839958158996, |
|
"eval_recall": 0.8028277868703401, |
|
"eval_runtime": 1.7767, |
|
"eval_samples_per_second": 224.574, |
|
"eval_steps_per_second": 28.142, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.2583518028259277, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3319, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8315033783783784, |
|
"eval_loss": 0.32298392057418823, |
|
"eval_precision": 0.8438775510204082, |
|
"eval_recall": 0.8217403164211674, |
|
"eval_runtime": 1.7811, |
|
"eval_samples_per_second": 224.023, |
|
"eval_steps_per_second": 28.073, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.8704787492752075, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2873, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8237962290701417, |
|
"eval_loss": 0.322201669216156, |
|
"eval_precision": 0.8201159969225307, |
|
"eval_recall": 0.8278777959629023, |
|
"eval_runtime": 1.7743, |
|
"eval_samples_per_second": 224.873, |
|
"eval_steps_per_second": 28.18, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.8079779148101807, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2571, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8484099018899409, |
|
"eval_loss": 0.29681602120399475, |
|
"eval_precision": 0.8430645161290322, |
|
"eval_recall": 0.8545644662665939, |
|
"eval_runtime": 1.7796, |
|
"eval_samples_per_second": 224.208, |
|
"eval_steps_per_second": 28.096, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.8642373085021973, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2443, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8466330637850383, |
|
"eval_loss": 0.29177311062812805, |
|
"eval_precision": 0.8353276671885485, |
|
"eval_recall": 0.8635206401163849, |
|
"eval_runtime": 1.7765, |
|
"eval_samples_per_second": 224.594, |
|
"eval_steps_per_second": 28.145, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.3272399306297302, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2256, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8447157518450185, |
|
"eval_loss": 0.2981509566307068, |
|
"eval_precision": 0.8325401217487549, |
|
"eval_recall": 0.864248045099109, |
|
"eval_runtime": 1.7799, |
|
"eval_samples_per_second": 224.168, |
|
"eval_steps_per_second": 28.091, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.9290239810943604, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2172, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8646934961080748, |
|
"eval_loss": 0.2722471356391907, |
|
"eval_precision": 0.882551000198059, |
|
"eval_recall": 0.8512456810329151, |
|
"eval_runtime": 1.7818, |
|
"eval_samples_per_second": 223.935, |
|
"eval_steps_per_second": 28.062, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.677186012268066, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2049, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8748655913978494, |
|
"eval_loss": 0.264840692281723, |
|
"eval_precision": 0.86983032873807, |
|
"eval_recall": 0.8805237315875614, |
|
"eval_runtime": 1.7819, |
|
"eval_samples_per_second": 223.922, |
|
"eval_steps_per_second": 28.06, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.770735502243042, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1914, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8848664457009163, |
|
"eval_loss": 0.2680298984050751, |
|
"eval_precision": 0.8977236138837015, |
|
"eval_recall": 0.8743862520458265, |
|
"eval_runtime": 1.7909, |
|
"eval_samples_per_second": 222.787, |
|
"eval_steps_per_second": 27.918, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.3618270754814148, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1724, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8808243727598566, |
|
"eval_loss": 0.264539897441864, |
|
"eval_precision": 0.875706963591375, |
|
"eval_recall": 0.8865702855064557, |
|
"eval_runtime": 1.7802, |
|
"eval_samples_per_second": 224.138, |
|
"eval_steps_per_second": 28.087, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 2.054783582687378, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1689, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.881931703852755, |
|
"eval_loss": 0.27462852001190186, |
|
"eval_precision": 0.8740012737378415, |
|
"eval_recall": 0.8915711947626841, |
|
"eval_runtime": 1.778, |
|
"eval_samples_per_second": 224.405, |
|
"eval_steps_per_second": 28.121, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.6106524467468262, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1473, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8800854160075926, |
|
"eval_loss": 0.28370755910873413, |
|
"eval_precision": 0.9002425410326267, |
|
"eval_recall": 0.8651118385160939, |
|
"eval_runtime": 1.785, |
|
"eval_samples_per_second": 223.535, |
|
"eval_steps_per_second": 28.012, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.3967108726501465, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1577, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.884617951284618, |
|
"eval_loss": 0.2892190217971802, |
|
"eval_precision": 0.8772893772893773, |
|
"eval_recall": 0.8933442444080741, |
|
"eval_runtime": 1.7818, |
|
"eval_samples_per_second": 223.936, |
|
"eval_steps_per_second": 28.062, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.48143357038497925, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1468, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8829621606985718, |
|
"eval_loss": 0.27894169092178345, |
|
"eval_precision": 0.8802419354838709, |
|
"eval_recall": 0.8858428805237315, |
|
"eval_runtime": 1.7862, |
|
"eval_samples_per_second": 223.376, |
|
"eval_steps_per_second": 27.992, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.6658376455307007, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1473, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8775533117267087, |
|
"eval_loss": 0.28521186113357544, |
|
"eval_precision": 0.873246730188791, |
|
"eval_recall": 0.8822967812329514, |
|
"eval_runtime": 1.778, |
|
"eval_samples_per_second": 224.41, |
|
"eval_steps_per_second": 28.121, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.4181182384490967, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1274, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8856836962422341, |
|
"eval_loss": 0.28584179282188416, |
|
"eval_precision": 0.8838235294117647, |
|
"eval_recall": 0.8876159301691217, |
|
"eval_runtime": 1.7792, |
|
"eval_samples_per_second": 224.253, |
|
"eval_steps_per_second": 28.102, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 5.91610050201416, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1318, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8802521008403361, |
|
"eval_loss": 0.29269006848335266, |
|
"eval_precision": 0.8767168083714847, |
|
"eval_recall": 0.8840698308783415, |
|
"eval_runtime": 1.7862, |
|
"eval_samples_per_second": 223.383, |
|
"eval_steps_per_second": 27.993, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 6.210901737213135, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1355, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8884169154604891, |
|
"eval_loss": 0.2884277403354645, |
|
"eval_precision": 0.8874630556728391, |
|
"eval_recall": 0.8893889798145117, |
|
"eval_runtime": 1.7806, |
|
"eval_samples_per_second": 224.08, |
|
"eval_steps_per_second": 28.08, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.5748217105865479, |
|
"learning_rate": 0.0, |
|
"loss": 0.1367, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8884169154604891, |
|
"eval_loss": 0.2881532311439514, |
|
"eval_precision": 0.8874630556728391, |
|
"eval_recall": 0.8893889798145117, |
|
"eval_runtime": 1.7811, |
|
"eval_samples_per_second": 224.016, |
|
"eval_steps_per_second": 28.072, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.2206378909408069, |
|
"train_runtime": 624.8021, |
|
"train_samples_per_second": 116.453, |
|
"train_steps_per_second": 3.905 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|