{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 2000, "global_step": 12575, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 4.801192842942346e-05, "loss": 0.2793, "step": 500 }, { "epoch": 0.4, "learning_rate": 4.602385685884692e-05, "loss": 0.167, "step": 1000 }, { "epoch": 0.6, "learning_rate": 4.403578528827038e-05, "loss": 0.1142, "step": 1500 }, { "epoch": 0.8, "learning_rate": 4.204771371769384e-05, "loss": 0.1172, "step": 2000 }, { "epoch": 0.8, "eval_f1": 0.8002317832826308, "eval_loss": 0.6091572739421863, "eval_runtime": 11.3587, "eval_samples_per_second": 7.571, "eval_steps_per_second": 7.571, "step": 2000 }, { "epoch": 0.99, "learning_rate": 4.00596421471173e-05, "loss": 0.1046, "step": 2500 }, { "epoch": 1.19, "learning_rate": 3.8071570576540756e-05, "loss": 0.1103, "step": 3000 }, { "epoch": 1.39, "learning_rate": 3.6083499005964215e-05, "loss": 0.1216, "step": 3500 }, { "epoch": 1.59, "learning_rate": 3.409542743538768e-05, "loss": 0.1201, "step": 4000 }, { "epoch": 1.59, "eval_f1": 0.797415004214667, "eval_loss": 0.570012144717545, "eval_runtime": 11.2237, "eval_samples_per_second": 7.662, "eval_steps_per_second": 7.662, "step": 4000 }, { "epoch": 1.79, "learning_rate": 3.210735586481113e-05, "loss": 0.1239, "step": 4500 }, { "epoch": 1.99, "learning_rate": 3.0119284294234595e-05, "loss": 0.1259, "step": 5000 }, { "epoch": 2.19, "learning_rate": 2.813121272365805e-05, "loss": 0.1254, "step": 5500 }, { "epoch": 2.39, "learning_rate": 2.6143141153081513e-05, "loss": 0.1738, "step": 6000 }, { "epoch": 2.39, "eval_f1": 0.785323775968898, "eval_loss": 1.3082878860688905, "eval_runtime": 11.1911, "eval_samples_per_second": 7.685, "eval_steps_per_second": 7.685, "step": 6000 }, { "epoch": 2.58, "learning_rate": 2.415506958250497e-05, "loss": 0.1656, "step": 6500 }, { "epoch": 2.78, "learning_rate": 2.216699801192843e-05, "loss": 0.1316, "step": 7000 }, { "epoch": 2.98, "learning_rate": 2.017892644135189e-05, "loss": 0.1284, "step": 7500 }, { "epoch": 3.18, "learning_rate": 1.8190854870775348e-05, "loss": 0.1431, "step": 8000 }, { "epoch": 3.18, "eval_f1": 0.7811158798283263, "eval_loss": 0.6317664683518408, "eval_runtime": 11.2067, "eval_samples_per_second": 7.674, "eval_steps_per_second": 7.674, "step": 8000 }, { "epoch": 3.38, "learning_rate": 1.6202783300198807e-05, "loss": 0.1353, "step": 8500 }, { "epoch": 3.58, "learning_rate": 1.4214711729622268e-05, "loss": 0.1013, "step": 9000 }, { "epoch": 3.78, "learning_rate": 1.2226640159045727e-05, "loss": 0.1174, "step": 9500 }, { "epoch": 3.98, "learning_rate": 1.0238568588469186e-05, "loss": 0.1213, "step": 10000 }, { "epoch": 3.98, "eval_f1": 0.8029407524866656, "eval_loss": 0.4704339896264236, "eval_runtime": 11.4371, "eval_samples_per_second": 7.519, "eval_steps_per_second": 7.519, "step": 10000 }, { "epoch": 4.17, "learning_rate": 8.250497017892645e-06, "loss": 0.0851, "step": 10500 }, { "epoch": 4.37, "learning_rate": 6.2624254473161034e-06, "loss": 0.096, "step": 11000 }, { "epoch": 4.57, "learning_rate": 4.274353876739562e-06, "loss": 0.0727, "step": 11500 }, { "epoch": 4.77, "learning_rate": 2.286282306163022e-06, "loss": 0.0764, "step": 12000 }, { "epoch": 4.77, "eval_f1": 0.8055235903337169, "eval_loss": 0.4711939039551899, "eval_runtime": 11.2703, "eval_samples_per_second": 7.631, "eval_steps_per_second": 7.631, "step": 12000 }, { "epoch": 4.97, "learning_rate": 2.982107355864811e-07, "loss": 0.09, "step": 12500 }, { "epoch": 5.0, "step": 12575, "total_flos": 1.933020837593007e+16, "train_loss": 0.12557823169776508, "train_runtime": 2775.7723, "train_samples_per_second": 4.53, "train_steps_per_second": 4.53 } ], "logging_steps": 500, "max_steps": 12575, "num_train_epochs": 5, "save_steps": 500, "total_flos": 1.933020837593007e+16, "trial_name": null, "trial_params": null }