{"train/loss": 3.0915, "train/grad_norm": 10.732579231262207, "train/learning_rate": 0.0, "train/epoch": 0.0, "train/global_step": 40, "_timestamp": 1712602635.084315, "_runtime": 598.2633030414581, "_step": 8, "train/train_runtime": 62.7568, "train/train_samples_per_second": 2.55, "train/train_steps_per_second": 0.637, "train/total_flos": 114007284056064.0, "train/train_loss": 3.2310258865356447}