{ "best_metric": null, "best_model_checkpoint": null, "epoch": 13.0, "eval_steps": 500, "global_step": 4134, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9968553459119497, "grad_norm": 0.3114970326423645, "learning_rate": 0.00018516884358804785, "loss": 0.285, "step": 317 }, { "epoch": 1.0, "eval_accuracy": 0.8787096774193548, "eval_loss": 0.06010065972805023, "eval_runtime": 2.5938, "eval_samples_per_second": 1195.147, "eval_steps_per_second": 25.06, "step": 318 }, { "epoch": 1.9937106918238994, "grad_norm": 0.2971077263355255, "learning_rate": 0.00016979066087455266, "loss": 0.0516, "step": 634 }, { "epoch": 2.0, "eval_accuracy": 0.9296774193548387, "eval_loss": 0.03248392790555954, "eval_runtime": 2.6049, "eval_samples_per_second": 1190.073, "eval_steps_per_second": 24.953, "step": 636 }, { "epoch": 2.990566037735849, "grad_norm": 0.24367552995681763, "learning_rate": 0.00015441247816105745, "loss": 0.0306, "step": 951 }, { "epoch": 3.0, "eval_accuracy": 0.9354838709677419, "eval_loss": 0.02789517492055893, "eval_runtime": 2.6186, "eval_samples_per_second": 1183.836, "eval_steps_per_second": 24.822, "step": 954 }, { "epoch": 3.9874213836477987, "grad_norm": 0.1115935891866684, "learning_rate": 0.00013903429544756226, "loss": 0.0247, "step": 1268 }, { "epoch": 4.0, "eval_accuracy": 0.9432258064516129, "eval_loss": 0.022768640890717506, "eval_runtime": 2.6313, "eval_samples_per_second": 1178.135, "eval_steps_per_second": 24.703, "step": 1272 }, { "epoch": 4.984276729559748, "grad_norm": 0.09492151439189911, "learning_rate": 0.00012365611273406705, "loss": 0.0211, "step": 1585 }, { "epoch": 5.0, "eval_accuracy": 0.9416129032258065, "eval_loss": 0.021043915301561356, "eval_runtime": 2.6325, "eval_samples_per_second": 1177.607, "eval_steps_per_second": 24.692, "step": 1590 }, { "epoch": 5.981132075471698, "grad_norm": 0.1175580695271492, "learning_rate": 0.00010827793002057186, "loss": 0.0195, "step": 1902 }, { "epoch": 6.0, "eval_accuracy": 0.9429032258064516, "eval_loss": 0.020259153097867966, "eval_runtime": 2.6508, "eval_samples_per_second": 1169.473, "eval_steps_per_second": 24.521, "step": 1908 }, { "epoch": 6.977987421383648, "grad_norm": 0.11239363253116608, "learning_rate": 9.289974730707666e-05, "loss": 0.0179, "step": 2219 }, { "epoch": 7.0, "eval_accuracy": 0.9454838709677419, "eval_loss": 0.01921679638326168, "eval_runtime": 2.6313, "eval_samples_per_second": 1178.137, "eval_steps_per_second": 24.703, "step": 2226 }, { "epoch": 7.9748427672955975, "grad_norm": 0.06852811574935913, "learning_rate": 7.752156459358147e-05, "loss": 0.0166, "step": 2536 }, { "epoch": 8.0, "eval_accuracy": 0.944516129032258, "eval_loss": 0.01865958236157894, "eval_runtime": 2.6643, "eval_samples_per_second": 1163.539, "eval_steps_per_second": 24.397, "step": 2544 }, { "epoch": 8.971698113207546, "grad_norm": 0.06738817691802979, "learning_rate": 6.214338188008627e-05, "loss": 0.0158, "step": 2853 }, { "epoch": 9.0, "eval_accuracy": 0.944516129032258, "eval_loss": 0.017864950001239777, "eval_runtime": 2.6486, "eval_samples_per_second": 1170.45, "eval_steps_per_second": 24.542, "step": 2862 }, { "epoch": 9.968553459119496, "grad_norm": 0.062413159757852554, "learning_rate": 4.6765199166591074e-05, "loss": 0.015, "step": 3170 }, { "epoch": 10.0, "eval_accuracy": 0.9467741935483871, "eval_loss": 0.017261695116758347, "eval_runtime": 2.652, "eval_samples_per_second": 1168.93, "eval_steps_per_second": 24.51, "step": 3180 }, { "epoch": 10.965408805031446, "grad_norm": 0.05933304503560066, "learning_rate": 3.138701645309588e-05, "loss": 0.0143, "step": 3487 }, { "epoch": 11.0, "eval_accuracy": 0.9464516129032258, "eval_loss": 0.016805030405521393, "eval_runtime": 2.653, "eval_samples_per_second": 1168.48, "eval_steps_per_second": 24.5, "step": 3498 }, { "epoch": 11.962264150943396, "grad_norm": 0.05458727478981018, "learning_rate": 1.6008833739600677e-05, "loss": 0.0138, "step": 3804 }, { "epoch": 12.0, "eval_accuracy": 0.9458064516129032, "eval_loss": 0.016528310254216194, "eval_runtime": 2.6453, "eval_samples_per_second": 1171.873, "eval_steps_per_second": 24.572, "step": 3816 }, { "epoch": 12.959119496855346, "grad_norm": 0.05435788258910179, "learning_rate": 6.306510261054813e-07, "loss": 0.0133, "step": 4121 } ], "logging_steps": 317, "max_steps": 4134, "num_input_tokens_seen": 0, "num_train_epochs": 13, "save_steps": 1000000000.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1072409708161512.0, "train_batch_size": 48, "trial_name": null, "trial_params": { "alpha": 0.3513465070451599, "fp16": false, "learning_rate": 0.00018909828459685892, "lr_scheduler": "cosine", "num_train_epochs": 13, "temperature": 9, "warmup_steps": 236, "weight_decay": 0.09716219849882443 } }