{ "best_metric": 1.227269172668457, "best_model_checkpoint": "./outputs/checkpoint-4000", "epoch": 2.9143897996357016, "eval_steps": 100, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 2.1908, "step": 100 }, { "epoch": 0.07, "eval_loss": 2.0238757133483887, "eval_runtime": 146.4096, "eval_samples_per_second": 42.852, "eval_steps_per_second": 5.362, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 1.9736, "step": 200 }, { "epoch": 0.15, "eval_loss": 1.9310673475265503, "eval_runtime": 159.7901, "eval_samples_per_second": 39.264, "eval_steps_per_second": 4.913, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 1.9004, "step": 300 }, { "epoch": 0.22, "eval_loss": 1.8707531690597534, "eval_runtime": 143.355, "eval_samples_per_second": 43.765, "eval_steps_per_second": 5.476, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 1.8488, "step": 400 }, { "epoch": 0.29, "eval_loss": 1.825339436531067, "eval_runtime": 143.4115, "eval_samples_per_second": 43.748, "eval_steps_per_second": 5.474, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 1.7978, "step": 500 }, { "epoch": 0.36, "eval_loss": 1.7843894958496094, "eval_runtime": 143.3888, "eval_samples_per_second": 43.755, "eval_steps_per_second": 5.475, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.7696, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.7558746337890625, "eval_runtime": 143.3583, "eval_samples_per_second": 43.764, "eval_steps_per_second": 5.476, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.7307, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.7239936590194702, "eval_runtime": 146.9189, "eval_samples_per_second": 42.704, "eval_steps_per_second": 5.343, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.7092, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.695021390914917, "eval_runtime": 143.2343, "eval_samples_per_second": 43.802, "eval_steps_per_second": 5.481, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.6739, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.6701653003692627, "eval_runtime": 143.2538, "eval_samples_per_second": 43.796, "eval_steps_per_second": 5.48, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.6592, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.6471772193908691, "eval_runtime": 143.2508, "eval_samples_per_second": 43.797, "eval_steps_per_second": 5.48, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.6386, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.624908447265625, "eval_runtime": 143.2378, "eval_samples_per_second": 43.801, "eval_steps_per_second": 5.48, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.604, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.607008934020996, "eval_runtime": 143.2934, "eval_samples_per_second": 43.784, "eval_steps_per_second": 5.478, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.6109, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.584315299987793, "eval_runtime": 143.3844, "eval_samples_per_second": 43.757, "eval_steps_per_second": 5.475, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.5645, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.5640443563461304, "eval_runtime": 143.4327, "eval_samples_per_second": 43.742, "eval_steps_per_second": 5.473, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.5361, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.5473461151123047, "eval_runtime": 143.4327, "eval_samples_per_second": 43.742, "eval_steps_per_second": 5.473, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.5177, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.529100775718689, "eval_runtime": 143.2933, "eval_samples_per_second": 43.784, "eval_steps_per_second": 5.478, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.5157, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.512880802154541, "eval_runtime": 143.3422, "eval_samples_per_second": 43.769, "eval_steps_per_second": 5.476, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.4888, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.4976742267608643, "eval_runtime": 143.3396, "eval_samples_per_second": 43.77, "eval_steps_per_second": 5.477, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.4783, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.4826266765594482, "eval_runtime": 143.3758, "eval_samples_per_second": 43.759, "eval_steps_per_second": 5.475, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.4618, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.4686999320983887, "eval_runtime": 143.318, "eval_samples_per_second": 43.777, "eval_steps_per_second": 5.477, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.4472, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.4544299840927124, "eval_runtime": 143.2701, "eval_samples_per_second": 43.791, "eval_steps_per_second": 5.479, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.422, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.4396847486495972, "eval_runtime": 143.3, "eval_samples_per_second": 43.782, "eval_steps_per_second": 5.478, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.4211, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.4261609315872192, "eval_runtime": 143.4641, "eval_samples_per_second": 43.732, "eval_steps_per_second": 5.472, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.4204, "step": 2400 }, { "epoch": 1.75, "eval_loss": 1.4128549098968506, "eval_runtime": 143.3377, "eval_samples_per_second": 43.771, "eval_steps_per_second": 5.477, "step": 2400 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 1.3915, "step": 2500 }, { "epoch": 1.82, "eval_loss": 1.4013662338256836, "eval_runtime": 143.3012, "eval_samples_per_second": 43.782, "eval_steps_per_second": 5.478, "step": 2500 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 1.3818, "step": 2600 }, { "epoch": 1.89, "eval_loss": 1.3869951963424683, "eval_runtime": 161.3719, "eval_samples_per_second": 38.879, "eval_steps_per_second": 4.865, "step": 2600 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 1.3698, "step": 2700 }, { "epoch": 1.97, "eval_loss": 1.3734662532806396, "eval_runtime": 143.5647, "eval_samples_per_second": 43.702, "eval_steps_per_second": 5.468, "step": 2700 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 1.341, "step": 2800 }, { "epoch": 2.04, "eval_loss": 1.3625913858413696, "eval_runtime": 150.8911, "eval_samples_per_second": 41.58, "eval_steps_per_second": 5.202, "step": 2800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 1.3155, "step": 2900 }, { "epoch": 2.11, "eval_loss": 1.3497363328933716, "eval_runtime": 150.1999, "eval_samples_per_second": 41.771, "eval_steps_per_second": 5.226, "step": 2900 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 1.3161, "step": 3000 }, { "epoch": 2.19, "eval_loss": 1.3376851081848145, "eval_runtime": 143.5158, "eval_samples_per_second": 43.716, "eval_steps_per_second": 5.47, "step": 3000 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 1.2955, "step": 3100 }, { "epoch": 2.26, "eval_loss": 1.3271404504776, "eval_runtime": 143.547, "eval_samples_per_second": 43.707, "eval_steps_per_second": 5.469, "step": 3100 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 1.2998, "step": 3200 }, { "epoch": 2.33, "eval_loss": 1.3150701522827148, "eval_runtime": 143.5206, "eval_samples_per_second": 43.715, "eval_steps_per_second": 5.47, "step": 3200 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 1.2812, "step": 3300 }, { "epoch": 2.4, "eval_loss": 1.3021332025527954, "eval_runtime": 143.6618, "eval_samples_per_second": 43.672, "eval_steps_per_second": 5.464, "step": 3300 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 1.2634, "step": 3400 }, { "epoch": 2.48, "eval_loss": 1.2930917739868164, "eval_runtime": 143.6376, "eval_samples_per_second": 43.679, "eval_steps_per_second": 5.465, "step": 3400 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 1.2634, "step": 3500 }, { "epoch": 2.55, "eval_loss": 1.2815780639648438, "eval_runtime": 143.69, "eval_samples_per_second": 43.663, "eval_steps_per_second": 5.463, "step": 3500 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 1.2516, "step": 3600 }, { "epoch": 2.62, "eval_loss": 1.270579218864441, "eval_runtime": 143.5786, "eval_samples_per_second": 43.697, "eval_steps_per_second": 5.467, "step": 3600 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 1.2358, "step": 3700 }, { "epoch": 2.7, "eval_loss": 1.2581326961517334, "eval_runtime": 143.5689, "eval_samples_per_second": 43.7, "eval_steps_per_second": 5.468, "step": 3700 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 1.2124, "step": 3800 }, { "epoch": 2.77, "eval_loss": 1.2480058670043945, "eval_runtime": 143.4136, "eval_samples_per_second": 43.748, "eval_steps_per_second": 5.474, "step": 3800 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 1.2226, "step": 3900 }, { "epoch": 2.84, "eval_loss": 1.235005497932434, "eval_runtime": 143.4509, "eval_samples_per_second": 43.736, "eval_steps_per_second": 5.472, "step": 3900 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 1.2043, "step": 4000 }, { "epoch": 2.91, "eval_loss": 1.227269172668457, "eval_runtime": 173.3331, "eval_samples_per_second": 36.196, "eval_steps_per_second": 4.529, "step": 4000 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 2.3840611478920397e+17, "trial_name": null, "trial_params": null }