|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.458889961242676, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5533, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7268170426065163, |
|
"eval_f1": 0.6326680574676724, |
|
"eval_loss": 0.5133728384971619, |
|
"eval_precision": 0.6605799373040753, |
|
"eval_recall": 0.6242044008001455, |
|
"eval_runtime": 1.774, |
|
"eval_samples_per_second": 224.915, |
|
"eval_steps_per_second": 28.185, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.8158535957336426, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4779, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7418546365914787, |
|
"eval_f1": 0.7122401394791937, |
|
"eval_loss": 0.4949621260166168, |
|
"eval_precision": 0.7053803339517626, |
|
"eval_recall": 0.734860883797054, |
|
"eval_runtime": 1.7746, |
|
"eval_samples_per_second": 224.844, |
|
"eval_steps_per_second": 28.176, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.156679630279541, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4097, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8245614035087719, |
|
"eval_f1": 0.7664715719063545, |
|
"eval_loss": 0.3772188425064087, |
|
"eval_precision": 0.8092877840475827, |
|
"eval_recall": 0.7458628841607565, |
|
"eval_runtime": 1.8118, |
|
"eval_samples_per_second": 220.228, |
|
"eval_steps_per_second": 27.597, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.239713191986084, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3451, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8169941409717701, |
|
"eval_loss": 0.3511227071285248, |
|
"eval_precision": 0.8104735988883742, |
|
"eval_recall": 0.8250591016548463, |
|
"eval_runtime": 1.8097, |
|
"eval_samples_per_second": 220.479, |
|
"eval_steps_per_second": 27.629, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.155226707458496, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2959, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8255172205802521, |
|
"eval_loss": 0.32013869285583496, |
|
"eval_precision": 0.8239495798319327, |
|
"eval_recall": 0.8271503909801782, |
|
"eval_runtime": 1.81, |
|
"eval_samples_per_second": 220.445, |
|
"eval_steps_per_second": 27.625, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.97943115234375, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2727, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8447157518450185, |
|
"eval_loss": 0.3176342844963074, |
|
"eval_precision": 0.8325401217487549, |
|
"eval_recall": 0.864248045099109, |
|
"eval_runtime": 1.8128, |
|
"eval_samples_per_second": 220.097, |
|
"eval_steps_per_second": 27.581, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.0954539775848389, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2595, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8524146298159436, |
|
"eval_loss": 0.2958522439002991, |
|
"eval_precision": 0.8451250578971746, |
|
"eval_recall": 0.8613384251682124, |
|
"eval_runtime": 1.8171, |
|
"eval_samples_per_second": 219.582, |
|
"eval_steps_per_second": 27.516, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 6.516312122344971, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2409, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8649122807017544, |
|
"eval_loss": 0.28329744935035706, |
|
"eval_precision": 0.8710116366366366, |
|
"eval_recall": 0.8594744498999818, |
|
"eval_runtime": 1.8171, |
|
"eval_samples_per_second": 219.577, |
|
"eval_steps_per_second": 27.516, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.709987163543701, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2298, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.850729517396184, |
|
"eval_loss": 0.2893889546394348, |
|
"eval_precision": 0.8535087719298247, |
|
"eval_recall": 0.8481087470449173, |
|
"eval_runtime": 1.8128, |
|
"eval_samples_per_second": 220.099, |
|
"eval_steps_per_second": 27.581, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.345912933349609, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2221, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8614765038536611, |
|
"eval_loss": 0.2884393632411957, |
|
"eval_precision": 0.8686536646744258, |
|
"eval_recall": 0.8552009456264775, |
|
"eval_runtime": 1.8154, |
|
"eval_samples_per_second": 219.786, |
|
"eval_steps_per_second": 27.542, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.3872387409210205, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1986, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8587719298245614, |
|
"eval_loss": 0.2855367362499237, |
|
"eval_precision": 0.864771021021021, |
|
"eval_recall": 0.8534278959810875, |
|
"eval_runtime": 1.8165, |
|
"eval_samples_per_second": 219.656, |
|
"eval_steps_per_second": 27.526, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 5.007177829742432, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1964, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8521068445832446, |
|
"eval_loss": 0.29210031032562256, |
|
"eval_precision": 0.8693800752624282, |
|
"eval_recall": 0.8391525731951264, |
|
"eval_runtime": 1.8152, |
|
"eval_samples_per_second": 219.815, |
|
"eval_steps_per_second": 27.546, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.4582099914550781, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1783, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8649122807017544, |
|
"eval_loss": 0.3103856146335602, |
|
"eval_precision": 0.8710116366366366, |
|
"eval_recall": 0.8594744498999818, |
|
"eval_runtime": 1.8138, |
|
"eval_samples_per_second": 219.982, |
|
"eval_steps_per_second": 27.567, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.738508701324463, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1788, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8689068100358424, |
|
"eval_loss": 0.3015482425689697, |
|
"eval_precision": 0.863953693884765, |
|
"eval_recall": 0.8744771776686671, |
|
"eval_runtime": 1.8152, |
|
"eval_samples_per_second": 219.806, |
|
"eval_steps_per_second": 27.545, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 2.6257522106170654, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.172, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8595070422535211, |
|
"eval_loss": 0.3011764883995056, |
|
"eval_precision": 0.8633733523114054, |
|
"eval_recall": 0.8559283506092017, |
|
"eval_runtime": 1.8131, |
|
"eval_samples_per_second": 220.062, |
|
"eval_steps_per_second": 27.577, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 4.563363552093506, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1563, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8695225637671682, |
|
"eval_loss": 0.3159307837486267, |
|
"eval_precision": 0.8631532846715328, |
|
"eval_recall": 0.8769776322967813, |
|
"eval_runtime": 1.8172, |
|
"eval_samples_per_second": 219.574, |
|
"eval_steps_per_second": 27.516, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.206107258796692, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1512, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8572517421602788, |
|
"eval_loss": 0.32489535212516785, |
|
"eval_precision": 0.8679426449878376, |
|
"eval_recall": 0.8484269867248591, |
|
"eval_runtime": 1.8131, |
|
"eval_samples_per_second": 220.063, |
|
"eval_steps_per_second": 27.577, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.8656744956970215, |
|
"learning_rate": 5e-06, |
|
"loss": 0.151, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.855319904024935, |
|
"eval_loss": 0.3245084285736084, |
|
"eval_precision": 0.862378106322743, |
|
"eval_recall": 0.8491543917075832, |
|
"eval_runtime": 1.8159, |
|
"eval_samples_per_second": 219.726, |
|
"eval_steps_per_second": 27.535, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.600020170211792, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1461, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8614765038536611, |
|
"eval_loss": 0.328171044588089, |
|
"eval_precision": 0.8686536646744258, |
|
"eval_recall": 0.8552009456264775, |
|
"eval_runtime": 1.8138, |
|
"eval_samples_per_second": 219.976, |
|
"eval_steps_per_second": 27.566, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.394913673400879, |
|
"learning_rate": 0.0, |
|
"loss": 0.1555, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8622036668943447, |
|
"eval_loss": 0.32483023405075073, |
|
"eval_precision": 0.8671602787456446, |
|
"eval_recall": 0.8577014002545917, |
|
"eval_runtime": 1.8125, |
|
"eval_samples_per_second": 220.138, |
|
"eval_steps_per_second": 27.586, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.2495564101172275, |
|
"train_runtime": 624.4298, |
|
"train_samples_per_second": 116.522, |
|
"train_steps_per_second": 3.908 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|