{ "best_metric": 0.5005918962125724, "best_model_checkpoint": "./output/modernBERT-base-sentiment-v2/checkpoint-40", "epoch": 5.0, "eval_steps": 500, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.625, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.8373, "step": 5 }, { "epoch": 1.0, "eval_f1": 0.12910686958067819, "eval_loss": 1.8330078125, "eval_precision": 0.16504066117321736, "eval_recall": 0.1890018282051825, "eval_runtime": 8.9078, "eval_samples_per_second": 224.523, "eval_steps_per_second": 0.112, "step": 8 }, { "epoch": 1.25, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.8336, "step": 10 }, { "epoch": 1.875, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.8364, "step": 15 }, { "epoch": 2.0, "eval_f1": 0.12910686958067819, "eval_loss": 1.8330078125, "eval_precision": 0.16504066117321736, "eval_recall": 0.1890018282051825, "eval_runtime": 0.2841, "eval_samples_per_second": 7040.66, "eval_steps_per_second": 3.52, "step": 16 }, { "epoch": 2.5, "grad_norm": 5.113753318786621, "learning_rate": 5.961150787913738e-05, "loss": 1.7937, "step": 20 }, { "epoch": 3.0, "eval_f1": 0.17970771787293252, "eval_loss": 1.53515625, "eval_precision": 0.22861304458868634, "eval_recall": 0.23211174242424246, "eval_runtime": 0.2207, "eval_samples_per_second": 9062.407, "eval_steps_per_second": 4.531, "step": 24 }, { "epoch": 3.125, "grad_norm": 1.7188981771469116, "learning_rate": 5.535570256631384e-05, "loss": 1.5896, "step": 25 }, { "epoch": 3.75, "grad_norm": 1.9793833494186401, "learning_rate": 4.704194240193467e-05, "loss": 1.4145, "step": 30 }, { "epoch": 4.0, "eval_f1": 0.31283295562034524, "eval_loss": 1.15625, "eval_precision": 0.5734842356008, "eval_recall": 0.33733943473317013, "eval_runtime": 0.1992, "eval_samples_per_second": 10041.198, "eval_steps_per_second": 5.021, "step": 32 }, { "epoch": 4.375, "grad_norm": 11.112994194030762, "learning_rate": 3.6000770813281334e-05, "loss": 1.2037, "step": 35 }, { "epoch": 5.0, "grad_norm": 2.136584758758545, "learning_rate": 2.399922918671867e-05, "loss": 1.0167, "step": 40 }, { "epoch": 5.0, "eval_f1": 0.5005918962125724, "eval_loss": 0.916015625, "eval_precision": 0.5713280704919877, "eval_recall": 0.49814126531372666, "eval_runtime": 0.2727, "eval_samples_per_second": 7333.058, "eval_steps_per_second": 3.667, "step": 40 }, { "epoch": 5.0, "step": 40, "total_flos": 3738898811322368.0, "train_loss": 1.565679931640625, "train_runtime": 80.5078, "train_samples_per_second": 993.693, "train_steps_per_second": 0.497 } ], "logging_steps": 5, "max_steps": 40, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 5.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3738898811322368.0, "train_batch_size": 1024, "trial_name": null, "trial_params": null }