{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.08279516476237787, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008279516476237788, "eval_accuracy": 0.5510729122994245, "eval_loss": 2.3376457691192627, "eval_runtime": 6.8445, "eval_samples_per_second": 59.61, "eval_steps_per_second": 1.899, "step": 100 }, { "epoch": 0.016559032952475575, "eval_accuracy": 0.5764662925666846, "eval_loss": 2.173574447631836, "eval_runtime": 6.6808, "eval_samples_per_second": 61.071, "eval_steps_per_second": 1.946, "step": 200 }, { "epoch": 0.024838549428713365, "eval_accuracy": 0.5929699147520929, "eval_loss": 2.0678670406341553, "eval_runtime": 6.4137, "eval_samples_per_second": 63.614, "eval_steps_per_second": 2.027, "step": 300 }, { "epoch": 0.03311806590495115, "eval_accuracy": 0.6055573666749765, "eval_loss": 1.9839483499526978, "eval_runtime": 6.4017, "eval_samples_per_second": 63.734, "eval_steps_per_second": 2.031, "step": 400 }, { "epoch": 0.04139758238118894, "grad_norm": 8.5625, "learning_rate": 4.931004029364685e-05, "loss": 2.2761, "step": 500 }, { "epoch": 0.04139758238118894, "eval_accuracy": 0.6084943562272814, "eval_loss": 1.9611371755599976, "eval_runtime": 6.4249, "eval_samples_per_second": 63.503, "eval_steps_per_second": 2.023, "step": 500 }, { "epoch": 0.04967709885742673, "eval_accuracy": 0.6203082851637765, "eval_loss": 1.905377984046936, "eval_runtime": 6.4365, "eval_samples_per_second": 63.388, "eval_steps_per_second": 2.02, "step": 600 }, { "epoch": 0.057956615333664516, "eval_accuracy": 0.6241699612328715, "eval_loss": 1.8838109970092773, "eval_runtime": 6.4118, "eval_samples_per_second": 63.632, "eval_steps_per_second": 2.028, "step": 700 }, { "epoch": 0.0662361318099023, "eval_accuracy": 0.6295839990759813, "eval_loss": 1.8403326272964478, "eval_runtime": 6.397, "eval_samples_per_second": 63.78, "eval_steps_per_second": 2.032, "step": 800 }, { "epoch": 0.07451564828614009, "eval_accuracy": 0.6300428691724719, "eval_loss": 1.8234734535217285, "eval_runtime": 6.4304, "eval_samples_per_second": 63.449, "eval_steps_per_second": 2.022, "step": 900 }, { "epoch": 0.08279516476237787, "grad_norm": 7.65625, "learning_rate": 4.862008058729371e-05, "loss": 1.8887, "step": 1000 }, { "epoch": 0.08279516476237787, "eval_accuracy": 0.6351211866350639, "eval_loss": 1.7919981479644775, "eval_runtime": 6.4, "eval_samples_per_second": 63.75, "eval_steps_per_second": 2.031, "step": 1000 }, { "epoch": 0.08279516476237787, "step": 1000, "total_flos": 2.198926000128e+16, "train_loss": 2.0824306030273436, "train_runtime": 944.4845, "train_samples_per_second": 613.773, "train_steps_per_second": 38.364 } ], "logging_steps": 500, "max_steps": 36234, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "total_flos": 2.198926000128e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }