{ "best_metric": 0.5938899517059326, "best_model_checkpoint": "ckpt/origin_vehicle_view/vehicle_rewrite/checkpoint-96", "epoch": 14.328358208955224, "eval_steps": 6, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 5e-05, "loss": 1.4214, "step": 1 }, { "epoch": 0.24, "learning_rate": 0.0001, "loss": 1.3775, "step": 2 }, { "epoch": 0.36, "learning_rate": 9.998228055617263e-05, "loss": 1.4409, "step": 3 }, { "epoch": 0.48, "learning_rate": 9.99291347838381e-05, "loss": 1.3779, "step": 4 }, { "epoch": 0.6, "learning_rate": 9.98406003515375e-05, "loss": 1.3939, "step": 5 }, { "epoch": 0.72, "learning_rate": 9.971674001050686e-05, "loss": 1.3003, "step": 6 }, { "epoch": 0.72, "eval_loss": 1.2484569549560547, "eval_runtime": 126.7576, "eval_samples_per_second": 1.459, "eval_steps_per_second": 1.459, "step": 6 }, { "epoch": 0.84, "learning_rate": 9.955764155020037e-05, "loss": 1.2047, "step": 7 }, { "epoch": 0.96, "learning_rate": 9.936341773606723e-05, "loss": 1.2169, "step": 8 }, { "epoch": 1.07, "learning_rate": 9.913420622962606e-05, "loss": 1.1941, "step": 9 }, { "epoch": 1.19, "learning_rate": 9.887016949089333e-05, "loss": 1.1275, "step": 10 }, { "epoch": 1.31, "learning_rate": 9.857149466323549e-05, "loss": 1.1525, "step": 11 }, { "epoch": 1.43, "learning_rate": 9.82383934407258e-05, "loss": 1.0776, "step": 12 }, { "epoch": 1.43, "eval_loss": 1.0902503728866577, "eval_runtime": 126.1532, "eval_samples_per_second": 1.466, "eval_steps_per_second": 1.466, "step": 12 }, { "epoch": 1.55, "learning_rate": 9.787110191810027e-05, "loss": 1.0459, "step": 13 }, { "epoch": 1.67, "learning_rate": 9.746988042341906e-05, "loss": 1.0572, "step": 14 }, { "epoch": 1.79, "learning_rate": 9.703501333355168e-05, "loss": 0.9835, "step": 15 }, { "epoch": 1.91, "learning_rate": 9.656680887261693e-05, "loss": 1.0297, "step": 16 }, { "epoch": 2.03, "learning_rate": 9.606559889352064e-05, "loss": 0.9979, "step": 17 }, { "epoch": 2.15, "learning_rate": 9.553173864274567e-05, "loss": 0.9683, "step": 18 }, { "epoch": 2.15, "eval_loss": 0.9658757448196411, "eval_runtime": 125.5373, "eval_samples_per_second": 1.474, "eval_steps_per_second": 1.474, "step": 18 }, { "epoch": 2.27, "learning_rate": 9.496560650856097e-05, "loss": 0.8934, "step": 19 }, { "epoch": 2.39, "learning_rate": 9.436760375282859e-05, "loss": 0.9647, "step": 20 }, { "epoch": 2.51, "learning_rate": 9.373815422659806e-05, "loss": 0.9392, "step": 21 }, { "epoch": 2.63, "learning_rate": 9.30777040696903e-05, "loss": 0.8951, "step": 22 }, { "epoch": 2.75, "learning_rate": 9.238672139448354e-05, "loss": 0.8752, "step": 23 }, { "epoch": 2.87, "learning_rate": 9.166569595412575e-05, "loss": 0.8834, "step": 24 }, { "epoch": 2.87, "eval_loss": 0.855851948261261, "eval_runtime": 125.6401, "eval_samples_per_second": 1.472, "eval_steps_per_second": 1.472, "step": 24 }, { "epoch": 2.99, "learning_rate": 9.091513879540845e-05, "loss": 0.7849, "step": 25 }, { "epoch": 3.1, "learning_rate": 9.013558189654819e-05, "loss": 0.8131, "step": 26 }, { "epoch": 3.22, "learning_rate": 8.932757779013214e-05, "loss": 0.8302, "step": 27 }, { "epoch": 3.34, "learning_rate": 8.849169917149531e-05, "loss": 0.8371, "step": 28 }, { "epoch": 3.46, "learning_rate": 8.762853849280693e-05, "loss": 0.8085, "step": 29 }, { "epoch": 3.58, "learning_rate": 8.673870754315336e-05, "loss": 0.7759, "step": 30 }, { "epoch": 3.58, "eval_loss": 0.7806031703948975, "eval_runtime": 125.7778, "eval_samples_per_second": 1.471, "eval_steps_per_second": 1.471, "step": 30 }, { "epoch": 3.7, "learning_rate": 8.582283701491576e-05, "loss": 0.7816, "step": 31 }, { "epoch": 3.82, "learning_rate": 8.488157605674925e-05, "loss": 0.7651, "step": 32 }, { "epoch": 3.94, "learning_rate": 8.391559181348082e-05, "loss": 0.6714, "step": 33 }, { "epoch": 4.06, "learning_rate": 8.292556895325194e-05, "loss": 0.7076, "step": 34 }, { "epoch": 4.18, "learning_rate": 8.191220918224101e-05, "loss": 0.6913, "step": 35 }, { "epoch": 4.3, "learning_rate": 8.08762307473096e-05, "loss": 0.7119, "step": 36 }, { "epoch": 4.3, "eval_loss": 0.72127765417099, "eval_runtime": 125.4909, "eval_samples_per_second": 1.474, "eval_steps_per_second": 1.474, "step": 36 }, { "epoch": 4.42, "learning_rate": 7.981836792692508e-05, "loss": 0.6698, "step": 37 }, { "epoch": 4.54, "learning_rate": 7.873937051072035e-05, "loss": 0.6715, "step": 38 }, { "epoch": 4.66, "learning_rate": 7.764000326805967e-05, "loss": 0.6827, "step": 39 }, { "epoch": 4.78, "learning_rate": 7.652104540598712e-05, "loss": 0.6639, "step": 40 }, { "epoch": 4.9, "learning_rate": 7.5383290016942e-05, "loss": 0.6691, "step": 41 }, { "epoch": 5.01, "learning_rate": 7.422754351663252e-05, "loss": 0.6763, "step": 42 }, { "epoch": 5.01, "eval_loss": 0.6810438632965088, "eval_runtime": 125.784, "eval_samples_per_second": 1.471, "eval_steps_per_second": 1.471, "step": 42 }, { "epoch": 5.13, "learning_rate": 7.30546250724663e-05, "loss": 0.5823, "step": 43 }, { "epoch": 5.25, "learning_rate": 7.186536602294278e-05, "loss": 0.6099, "step": 44 }, { "epoch": 5.37, "learning_rate": 7.066060928841892e-05, "loss": 0.619, "step": 45 }, { "epoch": 5.49, "learning_rate": 6.944120877366604e-05, "loss": 0.649, "step": 46 }, { "epoch": 5.61, "learning_rate": 6.820802876264112e-05, "loss": 0.589, "step": 47 }, { "epoch": 5.73, "learning_rate": 6.696194330590151e-05, "loss": 0.6466, "step": 48 }, { "epoch": 5.73, "eval_loss": 0.6540355682373047, "eval_runtime": 125.4258, "eval_samples_per_second": 1.475, "eval_steps_per_second": 1.475, "step": 48 }, { "epoch": 5.85, "learning_rate": 6.570383560109745e-05, "loss": 0.6435, "step": 49 }, { "epoch": 5.97, "learning_rate": 6.443459736698105e-05, "loss": 0.6231, "step": 50 }, { "epoch": 6.09, "learning_rate": 6.315512821137606e-05, "loss": 0.6046, "step": 51 }, { "epoch": 6.21, "learning_rate": 6.186633499355576e-05, "loss": 0.5498, "step": 52 }, { "epoch": 6.33, "learning_rate": 6.056913118148122e-05, "loss": 0.5987, "step": 53 }, { "epoch": 6.45, "learning_rate": 5.9264436204355724e-05, "loss": 0.565, "step": 54 }, { "epoch": 6.45, "eval_loss": 0.6351470351219177, "eval_runtime": 125.5235, "eval_samples_per_second": 1.474, "eval_steps_per_second": 1.474, "step": 54 }, { "epoch": 6.57, "learning_rate": 5.7953174800953604e-05, "loss": 0.5997, "step": 55 }, { "epoch": 6.69, "learning_rate": 5.6636276364186105e-05, "loss": 0.5635, "step": 56 }, { "epoch": 6.81, "learning_rate": 5.5314674282368275e-05, "loss": 0.5874, "step": 57 }, { "epoch": 6.93, "learning_rate": 5.3989305277654156e-05, "loss": 0.5615, "step": 58 }, { "epoch": 7.04, "learning_rate": 5.2661108742108935e-05, "loss": 0.5729, "step": 59 }, { "epoch": 7.16, "learning_rate": 5.133102607188874e-05, "loss": 0.5736, "step": 60 }, { "epoch": 7.16, "eval_loss": 0.6185852885246277, "eval_runtime": 125.5344, "eval_samples_per_second": 1.474, "eval_steps_per_second": 1.474, "step": 60 }, { "epoch": 7.28, "learning_rate": 5e-05, "loss": 0.5458, "step": 61 }, { "epoch": 7.4, "learning_rate": 4.866897392811126e-05, "loss": 0.5271, "step": 62 }, { "epoch": 7.52, "learning_rate": 4.7338891257891084e-05, "loss": 0.5498, "step": 63 }, { "epoch": 7.64, "learning_rate": 4.601069472234584e-05, "loss": 0.5443, "step": 64 }, { "epoch": 7.76, "learning_rate": 4.4685325717631736e-05, "loss": 0.5452, "step": 65 }, { "epoch": 7.88, "learning_rate": 4.336372363581391e-05, "loss": 0.5062, "step": 66 }, { "epoch": 7.88, "eval_loss": 0.6089844703674316, "eval_runtime": 125.5193, "eval_samples_per_second": 1.474, "eval_steps_per_second": 1.474, "step": 66 }, { "epoch": 8.0, "learning_rate": 4.204682519904641e-05, "loss": 0.5397, "step": 67 }, { "epoch": 8.12, "learning_rate": 4.0735563795644294e-05, "loss": 0.568, "step": 68 }, { "epoch": 8.24, "learning_rate": 3.9430868818518784e-05, "loss": 0.5134, "step": 69 }, { "epoch": 8.36, "learning_rate": 3.8133665006444255e-05, "loss": 0.4863, "step": 70 }, { "epoch": 8.48, "learning_rate": 3.6844871788623945e-05, "loss": 0.5397, "step": 71 }, { "epoch": 8.6, "learning_rate": 3.556540263301896e-05, "loss": 0.5159, "step": 72 }, { "epoch": 8.6, "eval_loss": 0.6018280982971191, "eval_runtime": 125.5416, "eval_samples_per_second": 1.474, "eval_steps_per_second": 1.474, "step": 72 }, { "epoch": 8.72, "learning_rate": 3.429616439890258e-05, "loss": 0.5079, "step": 73 }, { "epoch": 8.84, "learning_rate": 3.303805669409848e-05, "loss": 0.4993, "step": 74 }, { "epoch": 8.96, "learning_rate": 3.179197123735889e-05, "loss": 0.5091, "step": 75 }, { "epoch": 9.07, "learning_rate": 3.055879122633397e-05, "loss": 0.4833, "step": 76 }, { "epoch": 9.19, "learning_rate": 2.9339390711581105e-05, "loss": 0.4786, "step": 77 }, { "epoch": 9.31, "learning_rate": 2.8134633977057235e-05, "loss": 0.5351, "step": 78 }, { "epoch": 9.31, "eval_loss": 0.600007176399231, "eval_runtime": 125.6028, "eval_samples_per_second": 1.473, "eval_steps_per_second": 1.473, "step": 78 }, { "epoch": 9.43, "learning_rate": 2.69453749275337e-05, "loss": 0.5191, "step": 79 }, { "epoch": 9.55, "learning_rate": 2.5772456483367497e-05, "loss": 0.5325, "step": 80 }, { "epoch": 9.67, "learning_rate": 2.4616709983058018e-05, "loss": 0.4952, "step": 81 }, { "epoch": 9.79, "learning_rate": 2.347895459401288e-05, "loss": 0.467, "step": 82 }, { "epoch": 9.91, "learning_rate": 2.235999673194035e-05, "loss": 0.4942, "step": 83 }, { "epoch": 10.03, "learning_rate": 2.126062948927966e-05, "loss": 0.4777, "step": 84 }, { "epoch": 10.03, "eval_loss": 0.5978866219520569, "eval_runtime": 125.5491, "eval_samples_per_second": 1.474, "eval_steps_per_second": 1.474, "step": 84 }, { "epoch": 10.15, "learning_rate": 2.0181632073074926e-05, "loss": 0.4731, "step": 85 }, { "epoch": 10.27, "learning_rate": 1.912376925269041e-05, "loss": 0.5137, "step": 86 }, { "epoch": 10.39, "learning_rate": 1.808779081775901e-05, "loss": 0.5017, "step": 87 }, { "epoch": 10.51, "learning_rate": 1.7074431046748075e-05, "loss": 0.468, "step": 88 }, { "epoch": 10.63, "learning_rate": 1.6084408186519196e-05, "loss": 0.4862, "step": 89 }, { "epoch": 10.75, "learning_rate": 1.5118423943250771e-05, "loss": 0.5243, "step": 90 }, { "epoch": 10.75, "eval_loss": 0.5947938561439514, "eval_runtime": 125.4894, "eval_samples_per_second": 1.474, "eval_steps_per_second": 1.474, "step": 90 }, { "epoch": 10.87, "learning_rate": 1.4177162985084242e-05, "loss": 0.4662, "step": 91 }, { "epoch": 10.99, "learning_rate": 1.3261292456846647e-05, "loss": 0.4586, "step": 92 }, { "epoch": 11.1, "learning_rate": 1.2371461507193078e-05, "loss": 0.4615, "step": 93 }, { "epoch": 11.22, "learning_rate": 1.150830082850468e-05, "loss": 0.4715, "step": 94 }, { "epoch": 11.34, "learning_rate": 1.0672422209867878e-05, "loss": 0.5028, "step": 95 }, { "epoch": 11.46, "learning_rate": 9.864418103451828e-06, "loss": 0.4697, "step": 96 }, { "epoch": 11.46, "eval_loss": 0.5938899517059326, "eval_runtime": 125.7051, "eval_samples_per_second": 1.472, "eval_steps_per_second": 1.472, "step": 96 }, { "epoch": 11.58, "learning_rate": 9.084861204591549e-06, "loss": 0.4815, "step": 97 }, { "epoch": 11.7, "learning_rate": 8.334304045874247e-06, "loss": 0.482, "step": 98 }, { "epoch": 11.82, "learning_rate": 7.613278605516455e-06, "loss": 0.5112, "step": 99 }, { "epoch": 11.94, "learning_rate": 6.922295930309691e-06, "loss": 0.4344, "step": 100 }, { "epoch": 12.06, "learning_rate": 6.2618457734019364e-06, "loss": 0.4614, "step": 101 }, { "epoch": 12.18, "learning_rate": 5.6323962471714286e-06, "loss": 0.4704, "step": 102 }, { "epoch": 12.18, "eval_loss": 0.5940476655960083, "eval_runtime": 125.6268, "eval_samples_per_second": 1.473, "eval_steps_per_second": 1.473, "step": 102 }, { "epoch": 12.3, "learning_rate": 5.034393491439043e-06, "loss": 0.4692, "step": 103 }, { "epoch": 12.42, "learning_rate": 4.468261357254339e-06, "loss": 0.4727, "step": 104 }, { "epoch": 12.54, "learning_rate": 3.9344011064793516e-06, "loss": 0.4875, "step": 105 }, { "epoch": 12.66, "learning_rate": 3.4331911273830784e-06, "loss": 0.4759, "step": 106 }, { "epoch": 12.78, "learning_rate": 2.9649866664483385e-06, "loss": 0.4917, "step": 107 }, { "epoch": 12.9, "learning_rate": 2.530119576580936e-06, "loss": 0.4917, "step": 108 }, { "epoch": 12.9, "eval_loss": 0.5947091579437256, "eval_runtime": 125.4985, "eval_samples_per_second": 1.474, "eval_steps_per_second": 1.474, "step": 108 }, { "epoch": 13.01, "learning_rate": 2.1288980818997275e-06, "loss": 0.4611, "step": 109 }, { "epoch": 13.13, "learning_rate": 1.7616065592742038e-06, "loss": 0.4269, "step": 110 }, { "epoch": 13.25, "learning_rate": 1.4285053367645074e-06, "loss": 0.4576, "step": 111 }, { "epoch": 13.37, "learning_rate": 1.1298305091066664e-06, "loss": 0.515, "step": 112 }, { "epoch": 13.49, "learning_rate": 8.657937703739516e-07, "loss": 0.4944, "step": 113 }, { "epoch": 13.61, "learning_rate": 6.365822639327723e-07, "loss": 0.4665, "step": 114 }, { "epoch": 13.61, "eval_loss": 0.594657838344574, "eval_runtime": 125.7169, "eval_samples_per_second": 1.472, "eval_steps_per_second": 1.472, "step": 114 }, { "epoch": 13.73, "learning_rate": 4.423584497996458e-07, "loss": 0.4911, "step": 115 }, { "epoch": 13.85, "learning_rate": 2.8325998949314536e-07, "loss": 0.4548, "step": 116 }, { "epoch": 13.97, "learning_rate": 1.5939964846249378e-07, "loss": 0.4959, "step": 117 }, { "epoch": 14.09, "learning_rate": 7.086521616190279e-08, "loss": 0.4568, "step": 118 }, { "epoch": 14.21, "learning_rate": 1.7719443827368677e-08, "loss": 0.4821, "step": 119 }, { "epoch": 14.33, "learning_rate": 0.0, "loss": 0.4433, "step": 120 }, { "epoch": 14.33, "eval_loss": 0.5946800708770752, "eval_runtime": 126.38, "eval_samples_per_second": 1.464, "eval_steps_per_second": 1.464, "step": 120 }, { "epoch": 14.33, "step": 120, "total_flos": 4.71865270275072e+17, "train_loss": 0.6693517704804738, "train_runtime": 12311.8994, "train_samples_per_second": 0.487, "train_steps_per_second": 0.01 } ], "logging_steps": 1.0, "max_steps": 120, "num_train_epochs": 15, "save_steps": 12, "total_flos": 4.71865270275072e+17, "trial_name": null, "trial_params": null }