{ "best_metric": 0.9555453658103943, "best_model_checkpoint": "./ec-model/checkpoint-500", "epoch": 0.10704345964461572, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.5e-05, "loss": 3.3479, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.997500535599515e-05, "loss": 2.1638, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.9939298721702496e-05, "loss": 1.7877, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.990716275083911e-05, "loss": 1.5925, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.987145611654645e-05, "loss": 1.6327, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.9835749482253806e-05, "loss": 1.4538, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.980004284796115e-05, "loss": 1.523, "step": 70 }, { "epoch": 0.02, "learning_rate": 4.97643362136685e-05, "loss": 1.4908, "step": 80 }, { "epoch": 0.02, "learning_rate": 4.972862957937585e-05, "loss": 1.4713, "step": 90 }, { "epoch": 0.02, "learning_rate": 4.96929229450832e-05, "loss": 1.4672, "step": 100 }, { "epoch": 0.02, "learning_rate": 4.965721631079055e-05, "loss": 1.2186, "step": 110 }, { "epoch": 0.03, "learning_rate": 4.96215096764979e-05, "loss": 1.2957, "step": 120 }, { "epoch": 0.03, "learning_rate": 4.958580304220524e-05, "loss": 1.2997, "step": 130 }, { "epoch": 0.03, "learning_rate": 4.9550096407912596e-05, "loss": 1.4143, "step": 140 }, { "epoch": 0.03, "learning_rate": 4.951438977361994e-05, "loss": 1.3641, "step": 150 }, { "epoch": 0.03, "learning_rate": 4.947868313932729e-05, "loss": 1.2587, "step": 160 }, { "epoch": 0.04, "learning_rate": 4.944297650503464e-05, "loss": 1.2986, "step": 170 }, { "epoch": 0.04, "learning_rate": 4.940726987074199e-05, "loss": 1.2285, "step": 180 }, { "epoch": 0.04, "learning_rate": 4.9371563236449334e-05, "loss": 1.2279, "step": 190 }, { "epoch": 0.04, "learning_rate": 4.933585660215668e-05, "loss": 1.2148, "step": 200 }, { "epoch": 0.04, "learning_rate": 4.930014996786403e-05, "loss": 1.149, "step": 210 }, { "epoch": 0.05, "learning_rate": 4.926444333357138e-05, "loss": 1.1807, "step": 220 }, { "epoch": 0.05, "learning_rate": 4.9228736699278726e-05, "loss": 1.2904, "step": 230 }, { "epoch": 0.05, "learning_rate": 4.919303006498608e-05, "loss": 1.2082, "step": 240 }, { "epoch": 0.05, "learning_rate": 4.9157323430693425e-05, "loss": 1.1615, "step": 250 }, { "epoch": 0.06, "learning_rate": 4.912161679640077e-05, "loss": 1.2131, "step": 260 }, { "epoch": 0.06, "learning_rate": 4.9085910162108124e-05, "loss": 1.1906, "step": 270 }, { "epoch": 0.06, "learning_rate": 4.905020352781547e-05, "loss": 1.2294, "step": 280 }, { "epoch": 0.06, "learning_rate": 4.9014496893522824e-05, "loss": 1.1824, "step": 290 }, { "epoch": 0.06, "learning_rate": 4.897879025923017e-05, "loss": 1.1318, "step": 300 }, { "epoch": 0.07, "learning_rate": 4.8943083624937516e-05, "loss": 1.1093, "step": 310 }, { "epoch": 0.07, "learning_rate": 4.890737699064486e-05, "loss": 1.204, "step": 320 }, { "epoch": 0.07, "learning_rate": 4.887167035635221e-05, "loss": 1.1846, "step": 330 }, { "epoch": 0.07, "learning_rate": 4.883596372205956e-05, "loss": 1.2242, "step": 340 }, { "epoch": 0.07, "learning_rate": 4.880025708776691e-05, "loss": 1.1337, "step": 350 }, { "epoch": 0.08, "learning_rate": 4.8764550453474254e-05, "loss": 1.1435, "step": 360 }, { "epoch": 0.08, "learning_rate": 4.872884381918161e-05, "loss": 1.242, "step": 370 }, { "epoch": 0.08, "learning_rate": 4.869313718488895e-05, "loss": 1.1702, "step": 380 }, { "epoch": 0.08, "learning_rate": 4.8657430550596306e-05, "loss": 1.1043, "step": 390 }, { "epoch": 0.09, "learning_rate": 4.862172391630365e-05, "loss": 1.1109, "step": 400 }, { "epoch": 0.09, "learning_rate": 4.8586017282011e-05, "loss": 1.1437, "step": 410 }, { "epoch": 0.09, "learning_rate": 4.855031064771835e-05, "loss": 0.9969, "step": 420 }, { "epoch": 0.09, "learning_rate": 4.851817467685496e-05, "loss": 1.2689, "step": 430 }, { "epoch": 0.09, "learning_rate": 4.848246804256231e-05, "loss": 1.1224, "step": 440 }, { "epoch": 0.1, "learning_rate": 4.8446761408269655e-05, "loss": 1.1185, "step": 450 }, { "epoch": 0.1, "learning_rate": 4.8411054773977e-05, "loss": 1.1684, "step": 460 }, { "epoch": 0.1, "learning_rate": 4.8375348139684355e-05, "loss": 1.163, "step": 470 }, { "epoch": 0.1, "learning_rate": 4.8343212168820965e-05, "loss": 1.1046, "step": 480 }, { "epoch": 0.1, "learning_rate": 4.830750553452832e-05, "loss": 1.0975, "step": 490 }, { "epoch": 0.11, "learning_rate": 4.8271798900235665e-05, "loss": 0.9991, "step": 500 }, { "epoch": 0.11, "eval_loss": 0.9555453658103943, "eval_runtime": 12.0489, "eval_samples_per_second": 775.259, "eval_steps_per_second": 48.469, "step": 500 } ], "logging_steps": 10, "max_steps": 14013, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 275019724062720.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }