{"train/loss": 2.8146, "train/grad_norm": 0.0, "train/learning_rate": 0.0, "train/epoch": 0.0, "train/global_step": 200, "_timestamp": 1712701028.9067254, "_runtime": 129.62409138679504, "_step": 2, "train_runtime": 131.3409, "train_samples_per_second": 3.046, "train_steps_per_second": 1.523, "total_flos": 205645259857920.0, "train_loss": 2.809670715332031} |