{"train/loss": 2.0369, "train/learning_rate": 1.2271520073786623e-08, "train/epoch": 5.0, "train/global_step": 1365, "_timestamp": 1706815561.5711305, "_runtime": 5231.487163543701, "_step": 143, "eval/loss": 2.118281126022339, "eval/runtime": 30.3009, "eval/samples_per_second": 509.26, "eval/steps_per_second": 2.013, "train/train_runtime": 5141.5129, "train/train_samples_per_second": 135.588, "train/train_steps_per_second": 0.265, "train/total_flos": 457285168005120.0, "train/train_loss": 3.477488596011431} |