{ "best_metric": 77.37530328335532, "best_model_checkpoint": "/root/turkic_qa/en_kaz_models/en_kaz_xlm_roberta_large_squad_model/checkpoint-3260", "epoch": 5.0, "eval_steps": 500, "global_step": 3260, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 652, "train_exact_match": 68.63136863136863, "train_f1": 83.20724970751985, "train_runtime": 28.0635, "train_samples_per_second": 43.722, "train_steps_per_second": 1.568 }, { "epoch": 1.0, "grad_norm": 45.333194732666016, "learning_rate": 1e-05, "loss": 1.2106, "step": 652 }, { "epoch": 1.0, "eval_exact_match": 60.65625, "eval_f1": 75.59481049132827, "eval_runtime": 87.9399, "eval_samples_per_second": 43.871, "eval_steps_per_second": 1.569, "step": 652 }, { "epoch": 2.0, "step": 1304, "train_exact_match": 75.52447552447552, "train_f1": 89.04467674156216, "train_runtime": 28.5171, "train_samples_per_second": 43.518, "train_steps_per_second": 1.578 }, { "epoch": 2.0, "grad_norm": 26.666345596313477, "learning_rate": 7.500000000000001e-06, "loss": 0.8497, "step": 1304 }, { "epoch": 2.0, "eval_exact_match": 62.71875, "eval_f1": 77.20303172730397, "eval_runtime": 88.007, "eval_samples_per_second": 43.837, "eval_steps_per_second": 1.568, "step": 1304 }, { "epoch": 3.0, "step": 1956, "train_exact_match": 83.91608391608392, "train_f1": 93.33096969615053, "train_runtime": 27.7404, "train_samples_per_second": 43.474, "train_steps_per_second": 1.586 }, { "epoch": 3.0, "grad_norm": 30.600830078125, "learning_rate": 5e-06, "loss": 0.5689, "step": 1956 }, { "epoch": 3.0, "eval_exact_match": 63.40625, "eval_f1": 77.30509811671915, "eval_runtime": 88.124, "eval_samples_per_second": 43.779, "eval_steps_per_second": 1.566, "step": 1956 }, { "epoch": 4.0, "step": 2608, "train_exact_match": 86.61338661338661, "train_f1": 95.19866566649596, "train_runtime": 28.7793, "train_samples_per_second": 43.782, "train_steps_per_second": 1.564 }, { "epoch": 4.0, "grad_norm": 76.71251678466797, "learning_rate": 2.5e-06, "loss": 0.4001, "step": 2608 }, { "epoch": 4.0, "eval_exact_match": 62.71875, "eval_f1": 77.01339638544334, "eval_runtime": 88.1308, "eval_samples_per_second": 43.776, "eval_steps_per_second": 1.566, "step": 2608 }, { "epoch": 5.0, "step": 3260, "train_exact_match": 90.20979020979021, "train_f1": 96.69185942456909, "train_runtime": 27.889, "train_samples_per_second": 43.386, "train_steps_per_second": 1.578 }, { "epoch": 5.0, "grad_norm": 29.443225860595703, "learning_rate": 0.0, "loss": 0.3113, "step": 3260 }, { "epoch": 5.0, "eval_exact_match": 63.40625, "eval_f1": 77.37530328335532, "eval_runtime": 88.2621, "eval_samples_per_second": 43.711, "eval_steps_per_second": 1.564, "step": 3260 }, { "epoch": 5.0, "step": 3260, "total_flos": 6.354793682009856e+16, "train_loss": 0.6681148458843582, "train_runtime": 5891.9748, "train_samples_per_second": 15.485, "train_steps_per_second": 0.553 } ], "logging_steps": 500, "max_steps": 3260, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 6.354793682009856e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }