{ "best_metric": 0.311672180891037, "best_model_checkpoint": "/kaggle/working/wev2vec-base960-agu-amharic/checkpoint-1500", "epoch": 3.9893617021276597, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13297872340425532, "grad_norm": 2.105905532836914, "learning_rate": 9.73404255319149e-06, "loss": 1.6043, "step": 100 }, { "epoch": 0.26595744680851063, "grad_norm": 4.957370281219482, "learning_rate": 9.46808510638298e-06, "loss": 1.4442, "step": 200 }, { "epoch": 0.39893617021276595, "grad_norm": 4.0505690574646, "learning_rate": 9.204787234042554e-06, "loss": 1.2199, "step": 300 }, { "epoch": 0.5319148936170213, "grad_norm": 3.2562971115112305, "learning_rate": 8.941489361702127e-06, "loss": 1.0649, "step": 400 }, { "epoch": 0.6648936170212766, "grad_norm": 3.3451857566833496, "learning_rate": 8.675531914893619e-06, "loss": 0.8682, "step": 500 }, { "epoch": 0.6648936170212766, "eval_accuracy": 0.7563527822494507, "eval_loss": 0.7632076144218445, "eval_runtime": 59.9292, "eval_samples_per_second": 22.326, "eval_steps_per_second": 2.803, "step": 500 }, { "epoch": 0.7978723404255319, "grad_norm": 7.531016826629639, "learning_rate": 8.409574468085107e-06, "loss": 0.7386, "step": 600 }, { "epoch": 0.9308510638297872, "grad_norm": 14.800432205200195, "learning_rate": 8.143617021276596e-06, "loss": 0.6506, "step": 700 }, { "epoch": 1.0638297872340425, "grad_norm": 18.182409286499023, "learning_rate": 7.877659574468086e-06, "loss": 0.5668, "step": 800 }, { "epoch": 1.196808510638298, "grad_norm": 11.308631896972656, "learning_rate": 7.6117021276595745e-06, "loss": 0.4825, "step": 900 }, { "epoch": 1.3297872340425532, "grad_norm": 3.6028597354888916, "learning_rate": 7.348404255319149e-06, "loss": 0.4482, "step": 1000 }, { "epoch": 1.3297872340425532, "eval_accuracy": 0.9103139042854309, "eval_loss": 0.35010650753974915, "eval_runtime": 60.1508, "eval_samples_per_second": 22.244, "eval_steps_per_second": 2.793, "step": 1000 }, { "epoch": 1.4627659574468086, "grad_norm": 13.081503868103027, "learning_rate": 7.0824468085106394e-06, "loss": 0.3877, "step": 1100 }, { "epoch": 1.5957446808510638, "grad_norm": 0.6290038228034973, "learning_rate": 6.816489361702127e-06, "loss": 0.3846, "step": 1200 }, { "epoch": 1.728723404255319, "grad_norm": 5.16023063659668, "learning_rate": 6.550531914893618e-06, "loss": 0.3553, "step": 1300 }, { "epoch": 1.8617021276595744, "grad_norm": 1.3795862197875977, "learning_rate": 6.284574468085107e-06, "loss": 0.3397, "step": 1400 }, { "epoch": 1.9946808510638299, "grad_norm": 17.843442916870117, "learning_rate": 6.018617021276596e-06, "loss": 0.2724, "step": 1500 }, { "epoch": 1.9946808510638299, "eval_accuracy": 0.9230194091796875, "eval_loss": 0.311672180891037, "eval_runtime": 60.1853, "eval_samples_per_second": 22.231, "eval_steps_per_second": 2.791, "step": 1500 }, { "epoch": 2.127659574468085, "grad_norm": 0.7912893295288086, "learning_rate": 5.755319148936171e-06, "loss": 0.228, "step": 1600 }, { "epoch": 2.2606382978723403, "grad_norm": 14.161015510559082, "learning_rate": 5.48936170212766e-06, "loss": 0.2633, "step": 1700 }, { "epoch": 2.393617021276596, "grad_norm": 12.629347801208496, "learning_rate": 5.223404255319149e-06, "loss": 0.2218, "step": 1800 }, { "epoch": 2.526595744680851, "grad_norm": 2.9135618209838867, "learning_rate": 4.957446808510639e-06, "loss": 0.2379, "step": 1900 }, { "epoch": 2.6595744680851063, "grad_norm": 14.890237808227539, "learning_rate": 4.691489361702128e-06, "loss": 0.2269, "step": 2000 }, { "epoch": 2.6595744680851063, "eval_accuracy": 0.9267563819885254, "eval_loss": 0.34563255310058594, "eval_runtime": 60.1272, "eval_samples_per_second": 22.253, "eval_steps_per_second": 2.794, "step": 2000 }, { "epoch": 2.7925531914893615, "grad_norm": 11.785533905029297, "learning_rate": 4.425531914893617e-06, "loss": 0.2319, "step": 2100 }, { "epoch": 2.925531914893617, "grad_norm": 12.325530052185059, "learning_rate": 4.1595744680851066e-06, "loss": 0.1945, "step": 2200 }, { "epoch": 3.0585106382978724, "grad_norm": 27.31170082092285, "learning_rate": 3.893617021276596e-06, "loss": 0.1832, "step": 2300 }, { "epoch": 3.1914893617021276, "grad_norm": 21.935894012451172, "learning_rate": 3.6276595744680853e-06, "loss": 0.1982, "step": 2400 }, { "epoch": 3.324468085106383, "grad_norm": 3.217214822769165, "learning_rate": 3.3617021276595745e-06, "loss": 0.1663, "step": 2500 }, { "epoch": 3.324468085106383, "eval_accuracy": 0.927503764629364, "eval_loss": 0.37431710958480835, "eval_runtime": 60.1697, "eval_samples_per_second": 22.237, "eval_steps_per_second": 2.792, "step": 2500 }, { "epoch": 3.4574468085106385, "grad_norm": 0.2889520823955536, "learning_rate": 3.0957446808510637e-06, "loss": 0.1712, "step": 2600 }, { "epoch": 3.5904255319148937, "grad_norm": 19.394306182861328, "learning_rate": 2.8297872340425537e-06, "loss": 0.1615, "step": 2700 }, { "epoch": 3.723404255319149, "grad_norm": 0.08302600681781769, "learning_rate": 2.563829787234043e-06, "loss": 0.1709, "step": 2800 }, { "epoch": 3.8563829787234045, "grad_norm": 59.50064468383789, "learning_rate": 2.297872340425532e-06, "loss": 0.1614, "step": 2900 }, { "epoch": 3.9893617021276597, "grad_norm": 1.0584908723831177, "learning_rate": 2.0319148936170213e-06, "loss": 0.1737, "step": 3000 }, { "epoch": 3.9893617021276597, "eval_accuracy": 0.9327354431152344, "eval_loss": 0.37135931849479675, "eval_runtime": 60.1293, "eval_samples_per_second": 22.252, "eval_steps_per_second": 2.794, "step": 3000 }, { "epoch": 3.9893617021276597, "step": 3000, "total_flos": 1.8714792090048768e+18, "train_loss": 0.4606133778889974, "train_runtime": 3208.6756, "train_samples_per_second": 18.727, "train_steps_per_second": 1.172 } ], "logging_steps": 100, "max_steps": 3760, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8714792090048768e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }