{ "best_metric": 0.9651834647200833, "best_model_checkpoint": "./results/checkpoint-3000", "epoch": 0.9363295880149812, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_accuracy": 0.6045787545787545, "eval_ap": 0.6438655017635215, "eval_auc": 0.6252253659085975, "eval_eer": 0.41503637833468066, "eval_f1": 0.673324254804055, "eval_loss": 0.6694703102111816, "eval_precision": 0.6141319348606128, "eval_recall": 0.7451440053583389, "eval_runtime": 1161.2986, "eval_samples_per_second": 70.524, "eval_steps_per_second": 0.276, "step": 0 }, { "epoch": 0.031210986267166042, "grad_norm": 1570057.0, "learning_rate": 1e-05, "loss": 0.3999, "step": 100 }, { "epoch": 0.062421972534332085, "grad_norm": 1873799.375, "learning_rate": 2e-05, "loss": 0.183, "step": 200 }, { "epoch": 0.09363295880149813, "grad_norm": 937146.625, "learning_rate": 3e-05, "loss": 0.1499, "step": 300 }, { "epoch": 0.12484394506866417, "grad_norm": 1296423.5, "learning_rate": 4e-05, "loss": 0.1939, "step": 400 }, { "epoch": 0.1560549313358302, "grad_norm": 698965.0, "learning_rate": 5e-05, "loss": 0.1542, "step": 500 }, { "epoch": 0.1560549313358302, "eval_accuracy": 0.7873870573870574, "eval_ap": 0.9824733762472688, "eval_auc": 0.9823328646050201, "eval_eer": 0.06329830234438157, "eval_f1": 0.7611024983193624, "eval_loss": 0.586156964302063, "eval_precision": 0.9871525677070359, "eval_recall": 0.6192900200937709, "eval_runtime": 896.8541, "eval_samples_per_second": 91.319, "eval_steps_per_second": 0.357, "step": 500 }, { "epoch": 0.18726591760299627, "grad_norm": 854220.375, "learning_rate": 4.81508875739645e-05, "loss": 0.1201, "step": 600 }, { "epoch": 0.2184769038701623, "grad_norm": 1128798.0, "learning_rate": 4.6301775147928994e-05, "loss": 0.1162, "step": 700 }, { "epoch": 0.24968789013732834, "grad_norm": 622432.0, "learning_rate": 4.44526627218935e-05, "loss": 0.1071, "step": 800 }, { "epoch": 0.2808988764044944, "grad_norm": 790906.0625, "learning_rate": 4.260355029585799e-05, "loss": 0.0874, "step": 900 }, { "epoch": 0.3121098626716604, "grad_norm": 1022445.375, "learning_rate": 4.075443786982249e-05, "loss": 0.0748, "step": 1000 }, { "epoch": 0.3121098626716604, "eval_accuracy": 0.9135164835164835, "eval_ap": 0.9891846925529121, "eval_auc": 0.9885782446290119, "eval_eer": 0.05095661546752897, "eval_f1": 0.9160672599509415, "eval_loss": 0.23737847805023193, "eval_precision": 0.9761105078411071, "eval_recall": 0.8629828086626479, "eval_runtime": 899.3174, "eval_samples_per_second": 91.069, "eval_steps_per_second": 0.356, "step": 1000 }, { "epoch": 0.3433208489388265, "grad_norm": 857955.5625, "learning_rate": 3.8905325443786985e-05, "loss": 0.0764, "step": 1100 }, { "epoch": 0.37453183520599254, "grad_norm": 874027.0, "learning_rate": 3.705621301775148e-05, "loss": 0.0709, "step": 1200 }, { "epoch": 0.40574282147315854, "grad_norm": 582799.875, "learning_rate": 3.520710059171598e-05, "loss": 0.0636, "step": 1300 }, { "epoch": 0.4369538077403246, "grad_norm": 521187.625, "learning_rate": 3.335798816568047e-05, "loss": 0.0533, "step": 1400 }, { "epoch": 0.4681647940074906, "grad_norm": 522330.4375, "learning_rate": 3.150887573964497e-05, "loss": 0.0465, "step": 1500 }, { "epoch": 0.4681647940074906, "eval_accuracy": 0.9357875457875457, "eval_ap": 0.9924125253776173, "eval_auc": 0.9918219498411971, "eval_eer": 0.042576125033683646, "eval_f1": 0.9388722931898225, "eval_loss": 0.18004347383975983, "eval_precision": 0.9792449627815629, "eval_recall": 0.9016968073230632, "eval_runtime": 904.8241, "eval_samples_per_second": 90.515, "eval_steps_per_second": 0.354, "step": 1500 }, { "epoch": 0.4993757802746567, "grad_norm": 1439895.375, "learning_rate": 2.965976331360947e-05, "loss": 0.0479, "step": 1600 }, { "epoch": 0.5305867665418227, "grad_norm": 522678.71875, "learning_rate": 2.7810650887573965e-05, "loss": 0.0429, "step": 1700 }, { "epoch": 0.5617977528089888, "grad_norm": 1075267.0, "learning_rate": 2.5961538461538464e-05, "loss": 0.0419, "step": 1800 }, { "epoch": 0.5930087390761548, "grad_norm": 621287.5625, "learning_rate": 2.411242603550296e-05, "loss": 0.0449, "step": 1900 }, { "epoch": 0.6242197253433208, "grad_norm": 431768.9375, "learning_rate": 2.2263313609467456e-05, "loss": 0.0373, "step": 2000 }, { "epoch": 0.6242197253433208, "eval_accuracy": 0.9467277167277167, "eval_ap": 0.9953183185775565, "eval_auc": 0.9948050755617595, "eval_eer": 0.03290218270008084, "eval_f1": 0.9495787636800684, "eval_loss": 0.16710303723812103, "eval_precision": 0.9842600800172492, "eval_recall": 0.9172583165885242, "eval_runtime": 883.8932, "eval_samples_per_second": 92.658, "eval_steps_per_second": 0.362, "step": 2000 }, { "epoch": 0.6554307116104869, "grad_norm": 300561.40625, "learning_rate": 2.0414201183431952e-05, "loss": 0.0367, "step": 2100 }, { "epoch": 0.686641697877653, "grad_norm": 759234.625, "learning_rate": 1.856508875739645e-05, "loss": 0.0385, "step": 2200 }, { "epoch": 0.717852684144819, "grad_norm": 110611.8984375, "learning_rate": 1.6715976331360947e-05, "loss": 0.029, "step": 2300 }, { "epoch": 0.7490636704119851, "grad_norm": 890414.25, "learning_rate": 1.4866863905325445e-05, "loss": 0.0243, "step": 2400 }, { "epoch": 0.7802746566791511, "grad_norm": 110954.5390625, "learning_rate": 1.3017751479289941e-05, "loss": 0.028, "step": 2500 }, { "epoch": 0.7802746566791511, "eval_accuracy": 0.9541514041514042, "eval_ap": 0.9963480520212278, "eval_auc": 0.9958883442351322, "eval_eer": 0.02915656157369981, "eval_f1": 0.9567799633982114, "eval_loss": 0.1855895221233368, "eval_precision": 0.9874557506355278, "eval_recall": 0.9279526680062514, "eval_runtime": 916.7443, "eval_samples_per_second": 89.338, "eval_steps_per_second": 0.349, "step": 2500 }, { "epoch": 0.8114856429463171, "grad_norm": 969664.1875, "learning_rate": 1.1168639053254439e-05, "loss": 0.0268, "step": 2600 }, { "epoch": 0.8426966292134831, "grad_norm": 21713.740234375, "learning_rate": 9.319526627218936e-06, "loss": 0.0259, "step": 2700 }, { "epoch": 0.8739076154806492, "grad_norm": 625.3981323242188, "learning_rate": 7.470414201183433e-06, "loss": 0.0272, "step": 2800 }, { "epoch": 0.9051186017478152, "grad_norm": 51626.33203125, "learning_rate": 5.621301775147929e-06, "loss": 0.0255, "step": 2900 }, { "epoch": 0.9363295880149812, "grad_norm": 330527.75, "learning_rate": 3.772189349112426e-06, "loss": 0.0196, "step": 3000 }, { "epoch": 0.9363295880149812, "eval_accuracy": 0.9628449328449329, "eval_ap": 0.9971936987222482, "eval_auc": 0.996572647864952, "eval_eer": 0.02357855025599569, "eval_f1": 0.9651834647200833, "eval_loss": 0.2491898238658905, "eval_precision": 0.9898617727816761, "eval_recall": 0.9417057378879214, "eval_runtime": 887.8512, "eval_samples_per_second": 92.245, "eval_steps_per_second": 0.36, "step": 3000 } ], "logging_steps": 100, "max_steps": 3204, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.6506508689408e+18, "train_batch_size": 128, "trial_name": null, "trial_params": null }