|
{ |
|
"best_metric": 0.79, |
|
"best_model_checkpoint": "Bert_tinybert-distilled/run-2/checkpoint-1548", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 1548, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.39246517419815063, |
|
"learning_rate": 4.639522353304605e-05, |
|
"loss": 0.2489, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_AUC": 0.786481777777778, |
|
"eval_BAC": 0.6966666666666667, |
|
"eval_MCC": 0.4004741770824115, |
|
"eval_PR_AUC": 0.7486102682312443, |
|
"eval_accuracy": 0.6966666666666667, |
|
"eval_loss": 0.23812349140644073, |
|
"eval_runtime": 6.2157, |
|
"eval_samples_per_second": 241.324, |
|
"eval_steps_per_second": 7.561, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.45007961988449097, |
|
"learning_rate": 4.1240198696040935e-05, |
|
"loss": 0.2402, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_AUC": 0.8126222222222222, |
|
"eval_BAC": 0.7553333333333334, |
|
"eval_MCC": 0.5408380051918336, |
|
"eval_PR_AUC": 0.7628286445519327, |
|
"eval_accuracy": 0.7553333333333333, |
|
"eval_loss": 0.23252661526203156, |
|
"eval_runtime": 6.2733, |
|
"eval_samples_per_second": 239.107, |
|
"eval_steps_per_second": 7.492, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.5242908596992493, |
|
"learning_rate": 3.608517385903581e-05, |
|
"loss": 0.2384, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_AUC": 0.8210719999999999, |
|
"eval_BAC": 0.754, |
|
"eval_MCC": 0.5442085309980096, |
|
"eval_PR_AUC": 0.7771659186047205, |
|
"eval_accuracy": 0.754, |
|
"eval_loss": 0.23356583714485168, |
|
"eval_runtime": 6.2379, |
|
"eval_samples_per_second": 240.465, |
|
"eval_steps_per_second": 7.535, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.7818685173988342, |
|
"learning_rate": 3.0930149022030695e-05, |
|
"loss": 0.237, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_AUC": 0.8297351111111111, |
|
"eval_BAC": 0.7686666666666666, |
|
"eval_MCC": 0.5525864831739952, |
|
"eval_PR_AUC": 0.7854176937233266, |
|
"eval_accuracy": 0.7686666666666667, |
|
"eval_loss": 0.2301863133907318, |
|
"eval_runtime": 6.2464, |
|
"eval_samples_per_second": 240.136, |
|
"eval_steps_per_second": 7.524, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.7255853414535522, |
|
"learning_rate": 2.577512418502558e-05, |
|
"loss": 0.2362, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_AUC": 0.836256, |
|
"eval_BAC": 0.7666666666666666, |
|
"eval_MCC": 0.5542293013644021, |
|
"eval_PR_AUC": 0.7972297123276614, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.22993715107440948, |
|
"eval_runtime": 6.2748, |
|
"eval_samples_per_second": 239.051, |
|
"eval_steps_per_second": 7.49, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.7448613047599792, |
|
"learning_rate": 2.0620099348020468e-05, |
|
"loss": 0.235, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_AUC": 0.8438133333333333, |
|
"eval_BAC": 0.778, |
|
"eval_MCC": 0.5714092913131209, |
|
"eval_PR_AUC": 0.8061543385382132, |
|
"eval_accuracy": 0.778, |
|
"eval_loss": 0.22893689572811127, |
|
"eval_runtime": 6.2414, |
|
"eval_samples_per_second": 240.331, |
|
"eval_steps_per_second": 7.53, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.7925253510475159, |
|
"learning_rate": 1.5465074511015347e-05, |
|
"loss": 0.2345, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_AUC": 0.8473991111111112, |
|
"eval_BAC": 0.782, |
|
"eval_MCC": 0.5792569729386494, |
|
"eval_PR_AUC": 0.8108360182006333, |
|
"eval_accuracy": 0.782, |
|
"eval_loss": 0.2287604957818985, |
|
"eval_runtime": 6.2713, |
|
"eval_samples_per_second": 239.186, |
|
"eval_steps_per_second": 7.494, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.35333141684532166, |
|
"learning_rate": 1.0310049674010234e-05, |
|
"loss": 0.234, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_AUC": 0.8490613333333333, |
|
"eval_BAC": 0.788, |
|
"eval_MCC": 0.5830155950947427, |
|
"eval_PR_AUC": 0.8120541754208705, |
|
"eval_accuracy": 0.788, |
|
"eval_loss": 0.2279704362154007, |
|
"eval_runtime": 6.2416, |
|
"eval_samples_per_second": 240.322, |
|
"eval_steps_per_second": 7.53, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.44008931517601013, |
|
"learning_rate": 5.155024837005117e-06, |
|
"loss": 0.2331, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_AUC": 0.8531626666666667, |
|
"eval_BAC": 0.79, |
|
"eval_MCC": 0.5917962039710227, |
|
"eval_PR_AUC": 0.81588940138238, |
|
"eval_accuracy": 0.79, |
|
"eval_loss": 0.22790811955928802, |
|
"eval_runtime": 6.2673, |
|
"eval_samples_per_second": 239.338, |
|
"eval_steps_per_second": 7.499, |
|
"step": 1548 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4053399570000.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.3558486326039224, |
|
"learning_rate": 5.155024837005116e-05, |
|
"num_train_epochs": 10, |
|
"temperature": 4 |
|
} |
|
} |
|
|