{ "best_metric": 0.5326957106590271, "best_model_checkpoint": "bert_uncased_L-4_H-128_A-2_mrpc/checkpoint-165", "epoch": 16.0, "eval_steps": 500, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.4740382730960846, "learning_rate": 4.9e-05, "loss": 0.6437, "step": 15 }, { "epoch": 1.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6180539727210999, "eval_runtime": 0.1512, "eval_samples_per_second": 2698.339, "eval_steps_per_second": 13.227, "step": 15 }, { "epoch": 2.0, "grad_norm": 0.8819882273674011, "learning_rate": 4.8e-05, "loss": 0.6197, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.6911764705882353, "eval_combined_score": 0.7534829721362228, "eval_f1": 0.8157894736842105, "eval_loss": 0.6046980023384094, "eval_runtime": 0.1487, "eval_samples_per_second": 2742.909, "eval_steps_per_second": 13.446, "step": 30 }, { "epoch": 3.0, "grad_norm": 2.03017258644104, "learning_rate": 4.7e-05, "loss": 0.595, "step": 45 }, { "epoch": 3.0, "eval_accuracy": 0.6985294117647058, "eval_combined_score": 0.7573364547612766, "eval_f1": 0.8161434977578476, "eval_loss": 0.5876799821853638, "eval_runtime": 0.1465, "eval_samples_per_second": 2785.842, "eval_steps_per_second": 13.656, "step": 45 }, { "epoch": 4.0, "grad_norm": 0.8701429963111877, "learning_rate": 4.600000000000001e-05, "loss": 0.582, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.7279411764705882, "eval_combined_score": 0.7781900627329756, "eval_f1": 0.8284389489953632, "eval_loss": 0.5686801075935364, "eval_runtime": 0.1522, "eval_samples_per_second": 2680.994, "eval_steps_per_second": 13.142, "step": 60 }, { "epoch": 5.0, "grad_norm": 1.5039589405059814, "learning_rate": 4.5e-05, "loss": 0.5617, "step": 75 }, { "epoch": 5.0, "eval_accuracy": 0.7279411764705882, "eval_combined_score": 0.778717132014096, "eval_f1": 0.8294930875576038, "eval_loss": 0.5593908429145813, "eval_runtime": 0.1469, "eval_samples_per_second": 2777.387, "eval_steps_per_second": 13.615, "step": 75 }, { "epoch": 6.0, "grad_norm": 1.3441897630691528, "learning_rate": 4.4000000000000006e-05, "loss": 0.5409, "step": 90 }, { "epoch": 6.0, "eval_accuracy": 0.7132352941176471, "eval_combined_score": 0.7670311233222232, "eval_f1": 0.8208269525267994, "eval_loss": 0.5550417304039001, "eval_runtime": 0.1443, "eval_samples_per_second": 2828.2, "eval_steps_per_second": 13.864, "step": 90 }, { "epoch": 7.0, "grad_norm": 2.370797872543335, "learning_rate": 4.3e-05, "loss": 0.5213, "step": 105 }, { "epoch": 7.0, "eval_accuracy": 0.7254901960784313, "eval_combined_score": 0.7749708033683693, "eval_f1": 0.8244514106583073, "eval_loss": 0.541745662689209, "eval_runtime": 0.1475, "eval_samples_per_second": 2766.445, "eval_steps_per_second": 13.561, "step": 105 }, { "epoch": 8.0, "grad_norm": 3.4701988697052, "learning_rate": 4.2e-05, "loss": 0.4968, "step": 120 }, { "epoch": 8.0, "eval_accuracy": 0.7328431372549019, "eval_combined_score": 0.7819254445964432, "eval_f1": 0.8310077519379845, "eval_loss": 0.5529593825340271, "eval_runtime": 0.1516, "eval_samples_per_second": 2690.55, "eval_steps_per_second": 13.189, "step": 120 }, { "epoch": 9.0, "grad_norm": 2.4742236137390137, "learning_rate": 4.1e-05, "loss": 0.4741, "step": 135 }, { "epoch": 9.0, "eval_accuracy": 0.7352941176470589, "eval_combined_score": 0.7843137254901962, "eval_f1": 0.8333333333333334, "eval_loss": 0.5579618811607361, "eval_runtime": 0.1396, "eval_samples_per_second": 2922.56, "eval_steps_per_second": 14.326, "step": 135 }, { "epoch": 10.0, "grad_norm": 3.9524009227752686, "learning_rate": 4e-05, "loss": 0.4545, "step": 150 }, { "epoch": 10.0, "eval_accuracy": 0.7549019607843137, "eval_combined_score": 0.7973227752639517, "eval_f1": 0.8397435897435896, "eval_loss": 0.539033830165863, "eval_runtime": 0.1482, "eval_samples_per_second": 2753.474, "eval_steps_per_second": 13.497, "step": 150 }, { "epoch": 11.0, "grad_norm": 2.9584977626800537, "learning_rate": 3.9000000000000006e-05, "loss": 0.4366, "step": 165 }, { "epoch": 11.0, "eval_accuracy": 0.7573529411764706, "eval_combined_score": 0.7987087807659412, "eval_f1": 0.840064620355412, "eval_loss": 0.5326957106590271, "eval_runtime": 0.1409, "eval_samples_per_second": 2895.937, "eval_steps_per_second": 14.196, "step": 165 }, { "epoch": 12.0, "grad_norm": 5.0168890953063965, "learning_rate": 3.8e-05, "loss": 0.4206, "step": 180 }, { "epoch": 12.0, "eval_accuracy": 0.7598039215686274, "eval_combined_score": 0.8011238257360822, "eval_f1": 0.842443729903537, "eval_loss": 0.535027027130127, "eval_runtime": 0.1403, "eval_samples_per_second": 2907.08, "eval_steps_per_second": 14.25, "step": 180 }, { "epoch": 13.0, "grad_norm": 2.828728437423706, "learning_rate": 3.7e-05, "loss": 0.397, "step": 195 }, { "epoch": 13.0, "eval_accuracy": 0.7549019607843137, "eval_combined_score": 0.7998112288393618, "eval_f1": 0.84472049689441, "eval_loss": 0.5649162530899048, "eval_runtime": 0.1409, "eval_samples_per_second": 2895.59, "eval_steps_per_second": 14.194, "step": 195 }, { "epoch": 14.0, "grad_norm": 6.568640232086182, "learning_rate": 3.6e-05, "loss": 0.3873, "step": 210 }, { "epoch": 14.0, "eval_accuracy": 0.7622549019607843, "eval_combined_score": 0.8052276074749147, "eval_f1": 0.8482003129890453, "eval_loss": 0.5601567029953003, "eval_runtime": 0.1451, "eval_samples_per_second": 2812.318, "eval_steps_per_second": 13.786, "step": 210 }, { "epoch": 15.0, "grad_norm": 3.909018039703369, "learning_rate": 3.5e-05, "loss": 0.3725, "step": 225 }, { "epoch": 15.0, "eval_accuracy": 0.7524509803921569, "eval_combined_score": 0.7961937944750008, "eval_f1": 0.8399366085578448, "eval_loss": 0.5621684193611145, "eval_runtime": 0.1459, "eval_samples_per_second": 2795.865, "eval_steps_per_second": 13.705, "step": 225 }, { "epoch": 16.0, "grad_norm": 4.69514799118042, "learning_rate": 3.4000000000000007e-05, "loss": 0.3506, "step": 240 }, { "epoch": 16.0, "eval_accuracy": 0.7524509803921569, "eval_combined_score": 0.7949050393104101, "eval_f1": 0.8373590982286634, "eval_loss": 0.5587612390518188, "eval_runtime": 0.1412, "eval_samples_per_second": 2889.737, "eval_steps_per_second": 14.165, "step": 240 }, { "epoch": 16.0, "step": 240, "total_flos": 73027538583552.0, "train_loss": 0.490890504916509, "train_runtime": 29.3983, "train_samples_per_second": 6238.45, "train_steps_per_second": 25.512 } ], "logging_steps": 1, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 73027538583552.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }