{ "best_metric": 2.5673868656158447, "best_model_checkpoint": "./robot22/checkpoint-400", "epoch": 1.0, "global_step": 430, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00019534883720930232, "loss": 4.9495, "step": 10 }, { "epoch": 0.05, "learning_rate": 0.00019069767441860466, "loss": 4.8535, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.000186046511627907, "loss": 4.6491, "step": 30 }, { "epoch": 0.09, "learning_rate": 0.0001813953488372093, "loss": 4.5284, "step": 40 }, { "epoch": 0.12, "learning_rate": 0.00017674418604651164, "loss": 4.4311, "step": 50 }, { "epoch": 0.14, "learning_rate": 0.00017209302325581395, "loss": 4.2551, "step": 60 }, { "epoch": 0.16, "learning_rate": 0.00016744186046511629, "loss": 4.2248, "step": 70 }, { "epoch": 0.19, "learning_rate": 0.00016279069767441862, "loss": 4.0917, "step": 80 }, { "epoch": 0.21, "learning_rate": 0.00015813953488372093, "loss": 3.964, "step": 90 }, { "epoch": 0.23, "learning_rate": 0.00015348837209302327, "loss": 3.9154, "step": 100 }, { "epoch": 0.23, "eval_accuracy": 0.22130177514792898, "eval_loss": 3.8417413234710693, "eval_runtime": 33.9476, "eval_samples_per_second": 49.783, "eval_steps_per_second": 6.245, "step": 100 }, { "epoch": 0.26, "learning_rate": 0.00014883720930232558, "loss": 3.8266, "step": 110 }, { "epoch": 0.28, "learning_rate": 0.00014418604651162791, "loss": 3.721, "step": 120 }, { "epoch": 0.3, "learning_rate": 0.00013953488372093025, "loss": 3.5688, "step": 130 }, { "epoch": 0.33, "learning_rate": 0.00013488372093023256, "loss": 3.5956, "step": 140 }, { "epoch": 0.35, "learning_rate": 0.0001302325581395349, "loss": 3.5324, "step": 150 }, { "epoch": 0.37, "learning_rate": 0.0001255813953488372, "loss": 3.4582, "step": 160 }, { "epoch": 0.4, "learning_rate": 0.00012093023255813953, "loss": 3.4678, "step": 170 }, { "epoch": 0.42, "learning_rate": 0.00011627906976744187, "loss": 3.3492, "step": 180 }, { "epoch": 0.44, "learning_rate": 0.00011162790697674419, "loss": 3.4026, "step": 190 }, { "epoch": 0.47, "learning_rate": 0.00010697674418604651, "loss": 3.1764, "step": 200 }, { "epoch": 0.47, "eval_accuracy": 0.3201183431952663, "eval_loss": 3.2242767810821533, "eval_runtime": 31.328, "eval_samples_per_second": 53.945, "eval_steps_per_second": 6.767, "step": 200 }, { "epoch": 0.49, "learning_rate": 0.00010232558139534885, "loss": 3.1029, "step": 210 }, { "epoch": 0.51, "learning_rate": 9.767441860465116e-05, "loss": 3.144, "step": 220 }, { "epoch": 0.53, "learning_rate": 9.30232558139535e-05, "loss": 3.092, "step": 230 }, { "epoch": 0.56, "learning_rate": 8.837209302325582e-05, "loss": 3.1839, "step": 240 }, { "epoch": 0.58, "learning_rate": 8.372093023255814e-05, "loss": 3.0839, "step": 250 }, { "epoch": 0.6, "learning_rate": 7.906976744186047e-05, "loss": 2.9506, "step": 260 }, { "epoch": 0.63, "learning_rate": 7.441860465116279e-05, "loss": 2.8607, "step": 270 }, { "epoch": 0.65, "learning_rate": 6.976744186046513e-05, "loss": 3.0074, "step": 280 }, { "epoch": 0.67, "learning_rate": 6.511627906976745e-05, "loss": 2.8763, "step": 290 }, { "epoch": 0.7, "learning_rate": 6.0465116279069765e-05, "loss": 2.8186, "step": 300 }, { "epoch": 0.7, "eval_accuracy": 0.4284023668639053, "eval_loss": 2.7973387241363525, "eval_runtime": 30.8947, "eval_samples_per_second": 54.702, "eval_steps_per_second": 6.862, "step": 300 }, { "epoch": 0.72, "learning_rate": 5.5813953488372095e-05, "loss": 2.8303, "step": 310 }, { "epoch": 0.74, "learning_rate": 5.1162790697674425e-05, "loss": 2.8085, "step": 320 }, { "epoch": 0.77, "learning_rate": 4.651162790697675e-05, "loss": 2.8264, "step": 330 }, { "epoch": 0.79, "learning_rate": 4.186046511627907e-05, "loss": 2.7512, "step": 340 }, { "epoch": 0.81, "learning_rate": 3.7209302325581394e-05, "loss": 2.6599, "step": 350 }, { "epoch": 0.84, "learning_rate": 3.2558139534883724e-05, "loss": 2.5858, "step": 360 }, { "epoch": 0.86, "learning_rate": 2.7906976744186048e-05, "loss": 2.6807, "step": 370 }, { "epoch": 0.88, "learning_rate": 2.3255813953488374e-05, "loss": 2.6653, "step": 380 }, { "epoch": 0.91, "learning_rate": 1.8604651162790697e-05, "loss": 2.4515, "step": 390 }, { "epoch": 0.93, "learning_rate": 1.3953488372093024e-05, "loss": 2.632, "step": 400 }, { "epoch": 0.93, "eval_accuracy": 0.5076923076923077, "eval_loss": 2.5673868656158447, "eval_runtime": 33.1328, "eval_samples_per_second": 51.007, "eval_steps_per_second": 6.398, "step": 400 }, { "epoch": 0.95, "learning_rate": 9.302325581395349e-06, "loss": 2.6872, "step": 410 }, { "epoch": 0.98, "learning_rate": 4.651162790697674e-06, "loss": 2.556, "step": 420 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 2.5929, "step": 430 }, { "epoch": 1.0, "step": 430, "total_flos": 5.338473146715341e+17, "train_loss": 3.3443971855695858, "train_runtime": 396.8195, "train_samples_per_second": 17.338, "train_steps_per_second": 1.084 } ], "max_steps": 430, "num_train_epochs": 1, "total_flos": 5.338473146715341e+17, "trial_name": null, "trial_params": null }