w11wo's picture
End of training
18d423e
{
"best_metric": 0.6202898550724638,
"best_model_checkpoint": "outputs/xlm-roberta-large-reddit-indonesia-sarcastic/checkpoint-1545",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 2472,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 9.997627677617663e-06,
"loss": 0.5177,
"step": 309
},
{
"epoch": 1.0,
"eval_accuracy": 0.7866761162296244,
"eval_f1": 0.48013816925734026,
"eval_loss": 0.46185678243637085,
"eval_precision": 0.6150442477876106,
"eval_recall": 0.3937677053824363,
"eval_runtime": 17.7197,
"eval_samples_per_second": 79.629,
"eval_steps_per_second": 1.298,
"step": 309
},
{
"epoch": 2.0,
"learning_rate": 9.990324230393262e-06,
"loss": 0.4158,
"step": 618
},
{
"epoch": 2.0,
"eval_accuracy": 0.8143160878809355,
"eval_f1": 0.5704918032786885,
"eval_loss": 0.4047999382019043,
"eval_precision": 0.6770428015564203,
"eval_recall": 0.49291784702549574,
"eval_runtime": 17.7187,
"eval_samples_per_second": 79.634,
"eval_steps_per_second": 1.298,
"step": 618
},
{
"epoch": 3.0,
"learning_rate": 9.978095935642609e-06,
"loss": 0.3535,
"step": 927
},
{
"epoch": 3.0,
"eval_accuracy": 0.8051027639971652,
"eval_f1": 0.4741873804971319,
"eval_loss": 0.47256213426589966,
"eval_precision": 0.7294117647058823,
"eval_recall": 0.35127478753541075,
"eval_runtime": 17.7187,
"eval_samples_per_second": 79.633,
"eval_steps_per_second": 1.298,
"step": 927
},
{
"epoch": 4.0,
"learning_rate": 9.960954861216283e-06,
"loss": 0.2983,
"step": 1236
},
{
"epoch": 4.0,
"eval_accuracy": 0.8065201984408221,
"eval_f1": 0.5806451612903226,
"eval_loss": 0.505993664264679,
"eval_precision": 0.6342281879194631,
"eval_recall": 0.5354107648725213,
"eval_runtime": 17.7201,
"eval_samples_per_second": 79.627,
"eval_steps_per_second": 1.298,
"step": 1236
},
{
"epoch": 5.0,
"learning_rate": 9.938917923285274e-06,
"loss": 0.2439,
"step": 1545
},
{
"epoch": 5.0,
"eval_accuracy": 0.8143160878809355,
"eval_f1": 0.6202898550724638,
"eval_loss": 0.45980918407440186,
"eval_precision": 0.6350148367952523,
"eval_recall": 0.6062322946175638,
"eval_runtime": 17.7319,
"eval_samples_per_second": 79.574,
"eval_steps_per_second": 1.297,
"step": 1545
},
{
"epoch": 6.0,
"learning_rate": 9.912006869646771e-06,
"loss": 0.198,
"step": 1854
},
{
"epoch": 6.0,
"eval_accuracy": 0.8058114812189936,
"eval_f1": 0.5594855305466238,
"eval_loss": 0.5417402982711792,
"eval_precision": 0.6468401486988847,
"eval_recall": 0.49291784702549574,
"eval_runtime": 17.7267,
"eval_samples_per_second": 79.597,
"eval_steps_per_second": 1.297,
"step": 1854
},
{
"epoch": 7.0,
"learning_rate": 9.880248258261703e-06,
"loss": 0.1655,
"step": 2163
},
{
"epoch": 7.0,
"eval_accuracy": 0.8072289156626506,
"eval_f1": 0.575,
"eval_loss": 0.6252104043960571,
"eval_precision": 0.6411149825783972,
"eval_recall": 0.5212464589235127,
"eval_runtime": 17.7396,
"eval_samples_per_second": 79.54,
"eval_steps_per_second": 1.297,
"step": 2163
},
{
"epoch": 8.0,
"learning_rate": 9.843673431045243e-06,
"loss": 0.1242,
"step": 2472
},
{
"epoch": 8.0,
"eval_accuracy": 0.81218993621545,
"eval_f1": 0.6050670640834576,
"eval_loss": 0.84306401014328,
"eval_precision": 0.6383647798742138,
"eval_recall": 0.5750708215297451,
"eval_runtime": 17.7031,
"eval_samples_per_second": 79.704,
"eval_steps_per_second": 1.299,
"step": 2472
},
{
"epoch": 8.0,
"step": 2472,
"total_flos": 1.8416827602087936e+16,
"train_loss": 0.28959052385249956,
"train_runtime": 3461.0498,
"train_samples_per_second": 285.491,
"train_steps_per_second": 8.928
}
],
"logging_steps": 500,
"max_steps": 30900,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 1.8416827602087936e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}