{ "best_metric": 0.7926666666666666, "best_model_checkpoint": "Bert_tinybert-distilled/run-2/checkpoint-1720", "epoch": 10.0, "eval_steps": 500, "global_step": 1720, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.39246517419815063, "learning_rate": 4.639522353304605e-05, "loss": 0.2489, "step": 172 }, { "epoch": 1.0, "eval_AUC": 0.786481777777778, "eval_BAC": 0.6966666666666667, "eval_MCC": 0.4004741770824115, "eval_PR_AUC": 0.7486102682312443, "eval_accuracy": 0.6966666666666667, "eval_loss": 0.23812349140644073, "eval_runtime": 6.2157, "eval_samples_per_second": 241.324, "eval_steps_per_second": 7.561, "step": 172 }, { "epoch": 2.0, "grad_norm": 0.45007961988449097, "learning_rate": 4.1240198696040935e-05, "loss": 0.2402, "step": 344 }, { "epoch": 2.0, "eval_AUC": 0.8126222222222222, "eval_BAC": 0.7553333333333334, "eval_MCC": 0.5408380051918336, "eval_PR_AUC": 0.7628286445519327, "eval_accuracy": 0.7553333333333333, "eval_loss": 0.23252661526203156, "eval_runtime": 6.2733, "eval_samples_per_second": 239.107, "eval_steps_per_second": 7.492, "step": 344 }, { "epoch": 3.0, "grad_norm": 0.5242908596992493, "learning_rate": 3.608517385903581e-05, "loss": 0.2384, "step": 516 }, { "epoch": 3.0, "eval_AUC": 0.8210719999999999, "eval_BAC": 0.754, "eval_MCC": 0.5442085309980096, "eval_PR_AUC": 0.7771659186047205, "eval_accuracy": 0.754, "eval_loss": 0.23356583714485168, "eval_runtime": 6.2379, "eval_samples_per_second": 240.465, "eval_steps_per_second": 7.535, "step": 516 }, { "epoch": 4.0, "grad_norm": 0.7818685173988342, "learning_rate": 3.0930149022030695e-05, "loss": 0.237, "step": 688 }, { "epoch": 4.0, "eval_AUC": 0.8297351111111111, "eval_BAC": 0.7686666666666666, "eval_MCC": 0.5525864831739952, "eval_PR_AUC": 0.7854176937233266, "eval_accuracy": 0.7686666666666667, "eval_loss": 0.2301863133907318, "eval_runtime": 6.2464, "eval_samples_per_second": 240.136, "eval_steps_per_second": 7.524, "step": 688 }, { "epoch": 5.0, "grad_norm": 0.7255853414535522, "learning_rate": 2.577512418502558e-05, "loss": 0.2362, "step": 860 }, { "epoch": 5.0, "eval_AUC": 0.836256, "eval_BAC": 0.7666666666666666, "eval_MCC": 0.5542293013644021, "eval_PR_AUC": 0.7972297123276614, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.22993715107440948, "eval_runtime": 6.2748, "eval_samples_per_second": 239.051, "eval_steps_per_second": 7.49, "step": 860 }, { "epoch": 6.0, "grad_norm": 0.7448613047599792, "learning_rate": 2.0620099348020468e-05, "loss": 0.235, "step": 1032 }, { "epoch": 6.0, "eval_AUC": 0.8438133333333333, "eval_BAC": 0.778, "eval_MCC": 0.5714092913131209, "eval_PR_AUC": 0.8061543385382132, "eval_accuracy": 0.778, "eval_loss": 0.22893689572811127, "eval_runtime": 6.2414, "eval_samples_per_second": 240.331, "eval_steps_per_second": 7.53, "step": 1032 }, { "epoch": 7.0, "grad_norm": 0.7925253510475159, "learning_rate": 1.5465074511015347e-05, "loss": 0.2345, "step": 1204 }, { "epoch": 7.0, "eval_AUC": 0.8473991111111112, "eval_BAC": 0.782, "eval_MCC": 0.5792569729386494, "eval_PR_AUC": 0.8108360182006333, "eval_accuracy": 0.782, "eval_loss": 0.2287604957818985, "eval_runtime": 6.2713, "eval_samples_per_second": 239.186, "eval_steps_per_second": 7.494, "step": 1204 }, { "epoch": 8.0, "grad_norm": 0.35333141684532166, "learning_rate": 1.0310049674010234e-05, "loss": 0.234, "step": 1376 }, { "epoch": 8.0, "eval_AUC": 0.8490613333333333, "eval_BAC": 0.788, "eval_MCC": 0.5830155950947427, "eval_PR_AUC": 0.8120541754208705, "eval_accuracy": 0.788, "eval_loss": 0.2279704362154007, "eval_runtime": 6.2416, "eval_samples_per_second": 240.322, "eval_steps_per_second": 7.53, "step": 1376 }, { "epoch": 9.0, "grad_norm": 0.44008931517601013, "learning_rate": 5.155024837005117e-06, "loss": 0.2331, "step": 1548 }, { "epoch": 9.0, "eval_AUC": 0.8531626666666667, "eval_BAC": 0.79, "eval_MCC": 0.5917962039710227, "eval_PR_AUC": 0.81588940138238, "eval_accuracy": 0.79, "eval_loss": 0.22790811955928802, "eval_runtime": 6.2673, "eval_samples_per_second": 239.338, "eval_steps_per_second": 7.499, "step": 1548 }, { "epoch": 10.0, "grad_norm": 0.5285153388977051, "learning_rate": 0.0, "loss": 0.2334, "step": 1720 }, { "epoch": 10.0, "eval_AUC": 0.8541422222222221, "eval_BAC": 0.7926666666666666, "eval_MCC": 0.5969110830375018, "eval_PR_AUC": 0.8163240976491174, "eval_accuracy": 0.7926666666666666, "eval_loss": 0.22778959572315216, "eval_runtime": 6.2536, "eval_samples_per_second": 239.861, "eval_steps_per_second": 7.516, "step": 1720 } ], "logging_steps": 500, "max_steps": 1720, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4503777300000.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.3558486326039224, "learning_rate": 5.155024837005116e-05, "num_train_epochs": 10, "temperature": 4 } }