{ "best_metric": 0.967032967032967, "best_model_checkpoint": "document-crop/checkpoint-72", "epoch": 19.310344827586206, "eval_steps": 500, "global_step": 140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9655172413793104, "eval_accuracy": 0.9120879120879121, "eval_loss": 0.3032292127609253, "eval_runtime": 2.5391, "eval_samples_per_second": 35.839, "eval_steps_per_second": 2.363, "step": 7 }, { "epoch": 1.9310344827586206, "eval_accuracy": 0.8241758241758241, "eval_loss": 0.3624477684497833, "eval_runtime": 2.5753, "eval_samples_per_second": 35.335, "eval_steps_per_second": 2.33, "step": 14 }, { "epoch": 2.0689655172413794, "grad_norm": 17.86758804321289, "learning_rate": 4.960317460317461e-05, "loss": 0.48, "step": 15 }, { "epoch": 2.896551724137931, "eval_accuracy": 0.8351648351648352, "eval_loss": 0.3741466701030731, "eval_runtime": 2.5668, "eval_samples_per_second": 35.453, "eval_steps_per_second": 2.338, "step": 21 }, { "epoch": 4.0, "eval_accuracy": 0.8901098901098901, "eval_loss": 0.23425163328647614, "eval_runtime": 2.5564, "eval_samples_per_second": 35.596, "eval_steps_per_second": 2.347, "step": 29 }, { "epoch": 4.137931034482759, "grad_norm": 19.562450408935547, "learning_rate": 4.3650793650793655e-05, "loss": 0.3749, "step": 30 }, { "epoch": 4.9655172413793105, "eval_accuracy": 0.9120879120879121, "eval_loss": 0.20266202092170715, "eval_runtime": 2.6328, "eval_samples_per_second": 34.564, "eval_steps_per_second": 2.279, "step": 36 }, { "epoch": 5.931034482758621, "eval_accuracy": 0.945054945054945, "eval_loss": 0.19039569795131683, "eval_runtime": 2.6292, "eval_samples_per_second": 34.611, "eval_steps_per_second": 2.282, "step": 43 }, { "epoch": 6.206896551724138, "grad_norm": 13.474059104919434, "learning_rate": 3.76984126984127e-05, "loss": 0.2323, "step": 45 }, { "epoch": 6.896551724137931, "eval_accuracy": 0.9120879120879121, "eval_loss": 0.19586102664470673, "eval_runtime": 2.6461, "eval_samples_per_second": 34.39, "eval_steps_per_second": 2.267, "step": 50 }, { "epoch": 8.0, "eval_accuracy": 0.8901098901098901, "eval_loss": 0.2905385494232178, "eval_runtime": 2.6111, "eval_samples_per_second": 34.852, "eval_steps_per_second": 2.298, "step": 58 }, { "epoch": 8.275862068965518, "grad_norm": 24.003192901611328, "learning_rate": 3.1746031746031745e-05, "loss": 0.1447, "step": 60 }, { "epoch": 8.96551724137931, "eval_accuracy": 0.9010989010989011, "eval_loss": 0.3998720347881317, "eval_runtime": 2.6866, "eval_samples_per_second": 33.872, "eval_steps_per_second": 2.233, "step": 65 }, { "epoch": 9.931034482758621, "eval_accuracy": 0.967032967032967, "eval_loss": 0.10719592869281769, "eval_runtime": 2.6592, "eval_samples_per_second": 34.22, "eval_steps_per_second": 2.256, "step": 72 }, { "epoch": 10.344827586206897, "grad_norm": 12.631780624389648, "learning_rate": 2.5793650793650796e-05, "loss": 0.125, "step": 75 }, { "epoch": 10.89655172413793, "eval_accuracy": 0.9010989010989011, "eval_loss": 0.2653524875640869, "eval_runtime": 2.6798, "eval_samples_per_second": 33.958, "eval_steps_per_second": 2.239, "step": 79 }, { "epoch": 12.0, "eval_accuracy": 0.945054945054945, "eval_loss": 0.17991583049297333, "eval_runtime": 2.7033, "eval_samples_per_second": 33.663, "eval_steps_per_second": 2.22, "step": 87 }, { "epoch": 12.413793103448276, "grad_norm": 22.67920684814453, "learning_rate": 1.984126984126984e-05, "loss": 0.1979, "step": 90 }, { "epoch": 12.96551724137931, "eval_accuracy": 0.9120879120879121, "eval_loss": 0.25460970401763916, "eval_runtime": 2.605, "eval_samples_per_second": 34.933, "eval_steps_per_second": 2.303, "step": 94 }, { "epoch": 13.931034482758621, "eval_accuracy": 0.9010989010989011, "eval_loss": 0.25756293535232544, "eval_runtime": 2.6309, "eval_samples_per_second": 34.589, "eval_steps_per_second": 2.281, "step": 101 }, { "epoch": 14.482758620689655, "grad_norm": 3.1801395416259766, "learning_rate": 1.388888888888889e-05, "loss": 0.0633, "step": 105 }, { "epoch": 14.89655172413793, "eval_accuracy": 0.9340659340659341, "eval_loss": 0.1996360421180725, "eval_runtime": 2.7759, "eval_samples_per_second": 32.782, "eval_steps_per_second": 2.161, "step": 108 }, { "epoch": 16.0, "eval_accuracy": 0.9560439560439561, "eval_loss": 0.1824313998222351, "eval_runtime": 2.6954, "eval_samples_per_second": 33.762, "eval_steps_per_second": 2.226, "step": 116 }, { "epoch": 16.551724137931036, "grad_norm": 0.8367649912834167, "learning_rate": 7.936507936507936e-06, "loss": 0.0311, "step": 120 }, { "epoch": 16.96551724137931, "eval_accuracy": 0.9560439560439561, "eval_loss": 0.18344764411449432, "eval_runtime": 2.7125, "eval_samples_per_second": 33.549, "eval_steps_per_second": 2.212, "step": 123 }, { "epoch": 17.93103448275862, "eval_accuracy": 0.9230769230769231, "eval_loss": 0.2769739627838135, "eval_runtime": 2.693, "eval_samples_per_second": 33.791, "eval_steps_per_second": 2.228, "step": 130 }, { "epoch": 18.620689655172413, "grad_norm": 0.14054416120052338, "learning_rate": 1.984126984126984e-06, "loss": 0.0154, "step": 135 }, { "epoch": 18.896551724137932, "eval_accuracy": 0.945054945054945, "eval_loss": 0.2113342434167862, "eval_runtime": 2.672, "eval_samples_per_second": 34.056, "eval_steps_per_second": 2.245, "step": 137 }, { "epoch": 19.310344827586206, "eval_accuracy": 0.9340659340659341, "eval_loss": 0.20573703944683075, "eval_runtime": 2.6636, "eval_samples_per_second": 34.164, "eval_steps_per_second": 2.253, "step": 140 }, { "epoch": 19.310344827586206, "step": 140, "total_flos": 6.939281828084982e+17, "train_loss": 0.17856683683182512, "train_runtime": 404.7192, "train_samples_per_second": 22.485, "train_steps_per_second": 0.346 } ], "logging_steps": 15, "max_steps": 140, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.939281828084982e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }