{ "best_metric": 0.5112138390541077, "best_model_checkpoint": "/tmp/model/checkpoint-144", "epoch": 3.0, "eval_steps": 500, "global_step": 144, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 6.666666666666667e-06, "loss": 0.6931, "step": 2 }, { "epoch": 0.08, "learning_rate": 1.3333333333333333e-05, "loss": 0.6931, "step": 4 }, { "epoch": 0.12, "learning_rate": 2e-05, "loss": 0.6931, "step": 6 }, { "epoch": 0.17, "learning_rate": 2.6666666666666667e-05, "loss": 0.6916, "step": 8 }, { "epoch": 0.21, "learning_rate": 3.3333333333333335e-05, "loss": 0.6917, "step": 10 }, { "epoch": 0.25, "learning_rate": 4e-05, "loss": 0.6899, "step": 12 }, { "epoch": 0.29, "learning_rate": 4.666666666666667e-05, "loss": 0.6889, "step": 14 }, { "epoch": 0.33, "learning_rate": 4.96124031007752e-05, "loss": 0.6856, "step": 16 }, { "epoch": 0.38, "learning_rate": 4.883720930232558e-05, "loss": 0.6874, "step": 18 }, { "epoch": 0.42, "learning_rate": 4.8062015503875976e-05, "loss": 0.6862, "step": 20 }, { "epoch": 0.46, "learning_rate": 4.728682170542636e-05, "loss": 0.6849, "step": 22 }, { "epoch": 0.5, "learning_rate": 4.651162790697675e-05, "loss": 0.677, "step": 24 }, { "epoch": 0.54, "learning_rate": 4.573643410852713e-05, "loss": 0.6701, "step": 26 }, { "epoch": 0.58, "learning_rate": 4.496124031007753e-05, "loss": 0.6784, "step": 28 }, { "epoch": 0.62, "learning_rate": 4.418604651162791e-05, "loss": 0.6784, "step": 30 }, { "epoch": 0.67, "learning_rate": 4.34108527131783e-05, "loss": 0.6687, "step": 32 }, { "epoch": 0.71, "learning_rate": 4.263565891472868e-05, "loss": 0.6648, "step": 34 }, { "epoch": 0.75, "learning_rate": 4.186046511627907e-05, "loss": 0.6661, "step": 36 }, { "epoch": 0.79, "learning_rate": 4.108527131782946e-05, "loss": 0.6421, "step": 38 }, { "epoch": 0.83, "learning_rate": 4.0310077519379843e-05, "loss": 0.6458, "step": 40 }, { "epoch": 0.88, "learning_rate": 3.953488372093023e-05, "loss": 0.6615, "step": 42 }, { "epoch": 0.92, "learning_rate": 3.875968992248062e-05, "loss": 0.6294, "step": 44 }, { "epoch": 0.96, "learning_rate": 3.798449612403101e-05, "loss": 0.6471, "step": 46 }, { "epoch": 1.0, "learning_rate": 3.7209302325581394e-05, "loss": 0.6923, "step": 48 }, { "epoch": 1.0, "eval_accuracy": 0.6, "eval_auc": 1.0, "eval_f1": 0.7499999999999999, "eval_loss": 0.6188636422157288, "eval_precision": 0.6, "eval_recall": 1.0, "eval_runtime": 13.3176, "eval_samples_per_second": 7.133, "eval_steps_per_second": 0.451, "step": 48 }, { "epoch": 1.04, "learning_rate": 3.6434108527131784e-05, "loss": 0.6528, "step": 50 }, { "epoch": 1.08, "learning_rate": 3.565891472868217e-05, "loss": 0.6468, "step": 52 }, { "epoch": 1.12, "learning_rate": 3.488372093023256e-05, "loss": 0.6816, "step": 54 }, { "epoch": 1.17, "learning_rate": 3.4108527131782945e-05, "loss": 0.6155, "step": 56 }, { "epoch": 1.21, "learning_rate": 3.3333333333333335e-05, "loss": 0.6151, "step": 58 }, { "epoch": 1.25, "learning_rate": 3.2558139534883724e-05, "loss": 0.6055, "step": 60 }, { "epoch": 1.29, "learning_rate": 3.1782945736434114e-05, "loss": 0.63, "step": 62 }, { "epoch": 1.33, "learning_rate": 3.1007751937984497e-05, "loss": 0.6136, "step": 64 }, { "epoch": 1.38, "learning_rate": 3.0232558139534883e-05, "loss": 0.6024, "step": 66 }, { "epoch": 1.42, "learning_rate": 2.9457364341085275e-05, "loss": 0.6039, "step": 68 }, { "epoch": 1.46, "learning_rate": 2.868217054263566e-05, "loss": 0.6381, "step": 70 }, { "epoch": 1.5, "learning_rate": 2.7906976744186048e-05, "loss": 0.6171, "step": 72 }, { "epoch": 1.54, "learning_rate": 2.7131782945736434e-05, "loss": 0.5989, "step": 74 }, { "epoch": 1.58, "learning_rate": 2.6356589147286826e-05, "loss": 0.6496, "step": 76 }, { "epoch": 1.62, "learning_rate": 2.5581395348837212e-05, "loss": 0.5904, "step": 78 }, { "epoch": 1.67, "learning_rate": 2.48062015503876e-05, "loss": 0.5641, "step": 80 }, { "epoch": 1.71, "learning_rate": 2.4031007751937988e-05, "loss": 0.6415, "step": 82 }, { "epoch": 1.75, "learning_rate": 2.3255813953488374e-05, "loss": 0.5581, "step": 84 }, { "epoch": 1.79, "learning_rate": 2.2480620155038764e-05, "loss": 0.5596, "step": 86 }, { "epoch": 1.83, "learning_rate": 2.170542635658915e-05, "loss": 0.6017, "step": 88 }, { "epoch": 1.88, "learning_rate": 2.0930232558139536e-05, "loss": 0.5437, "step": 90 }, { "epoch": 1.92, "learning_rate": 2.0155038759689922e-05, "loss": 0.5613, "step": 92 }, { "epoch": 1.96, "learning_rate": 1.937984496124031e-05, "loss": 0.617, "step": 94 }, { "epoch": 2.0, "learning_rate": 1.8604651162790697e-05, "loss": 0.7115, "step": 96 }, { "epoch": 2.0, "eval_accuracy": 0.6, "eval_auc": 1.0, "eval_f1": 0.7499999999999999, "eval_loss": 0.5825784206390381, "eval_precision": 0.6, "eval_recall": 1.0, "eval_runtime": 12.6935, "eval_samples_per_second": 7.484, "eval_steps_per_second": 0.473, "step": 96 }, { "epoch": 2.04, "learning_rate": 1.7829457364341087e-05, "loss": 0.6016, "step": 98 }, { "epoch": 2.08, "learning_rate": 1.7054263565891473e-05, "loss": 0.5401, "step": 100 }, { "epoch": 2.12, "learning_rate": 1.6279069767441862e-05, "loss": 0.5289, "step": 102 }, { "epoch": 2.17, "learning_rate": 1.5503875968992248e-05, "loss": 0.521, "step": 104 }, { "epoch": 2.21, "learning_rate": 1.4728682170542638e-05, "loss": 0.5441, "step": 106 }, { "epoch": 2.25, "learning_rate": 1.3953488372093024e-05, "loss": 0.5942, "step": 108 }, { "epoch": 2.29, "learning_rate": 1.3178294573643413e-05, "loss": 0.5576, "step": 110 }, { "epoch": 2.33, "learning_rate": 1.24031007751938e-05, "loss": 0.5702, "step": 112 }, { "epoch": 2.38, "learning_rate": 1.1627906976744187e-05, "loss": 0.5567, "step": 114 }, { "epoch": 2.42, "learning_rate": 1.0852713178294575e-05, "loss": 0.5222, "step": 116 }, { "epoch": 2.46, "learning_rate": 1.0077519379844961e-05, "loss": 0.565, "step": 118 }, { "epoch": 2.5, "learning_rate": 9.302325581395349e-06, "loss": 0.5274, "step": 120 }, { "epoch": 2.54, "learning_rate": 8.527131782945736e-06, "loss": 0.5156, "step": 122 }, { "epoch": 2.58, "learning_rate": 7.751937984496124e-06, "loss": 0.562, "step": 124 }, { "epoch": 2.62, "learning_rate": 6.976744186046512e-06, "loss": 0.5159, "step": 126 }, { "epoch": 2.67, "learning_rate": 6.2015503875969e-06, "loss": 0.5327, "step": 128 }, { "epoch": 2.71, "learning_rate": 5.426356589147287e-06, "loss": 0.5364, "step": 130 }, { "epoch": 2.75, "learning_rate": 4.651162790697674e-06, "loss": 0.5982, "step": 132 }, { "epoch": 2.79, "learning_rate": 3.875968992248062e-06, "loss": 0.6285, "step": 134 }, { "epoch": 2.83, "learning_rate": 3.10077519379845e-06, "loss": 0.5116, "step": 136 }, { "epoch": 2.88, "learning_rate": 2.325581395348837e-06, "loss": 0.5596, "step": 138 }, { "epoch": 2.92, "learning_rate": 1.550387596899225e-06, "loss": 0.5538, "step": 140 }, { "epoch": 2.96, "learning_rate": 7.751937984496125e-07, "loss": 0.568, "step": 142 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.5701, "step": 144 }, { "epoch": 3.0, "eval_accuracy": 0.6, "eval_auc": 1.0, "eval_f1": 0.7499999999999999, "eval_loss": 0.5112138390541077, "eval_precision": 0.6, "eval_recall": 1.0, "eval_runtime": 11.597, "eval_samples_per_second": 8.192, "eval_steps_per_second": 0.517, "step": 144 } ], "logging_steps": 2, "max_steps": 144, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.401724127688704e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }