|
{ |
|
"best_metric": 0.9651834647200833, |
|
"best_model_checkpoint": "./results/checkpoint-3000", |
|
"epoch": 0.9363295880149812, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0, |
|
"eval_accuracy": 0.6045787545787545, |
|
"eval_ap": 0.6438655017635215, |
|
"eval_auc": 0.6252253659085975, |
|
"eval_eer": 0.41503637833468066, |
|
"eval_f1": 0.673324254804055, |
|
"eval_loss": 0.6694703102111816, |
|
"eval_precision": 0.6141319348606128, |
|
"eval_recall": 0.7451440053583389, |
|
"eval_runtime": 1161.2986, |
|
"eval_samples_per_second": 70.524, |
|
"eval_steps_per_second": 0.276, |
|
"step": 0 |
|
}, |
|
{ |
|
"epoch": 0.031210986267166042, |
|
"grad_norm": 1570057.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3999, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.062421972534332085, |
|
"grad_norm": 1873799.375, |
|
"learning_rate": 2e-05, |
|
"loss": 0.183, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09363295880149813, |
|
"grad_norm": 937146.625, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1499, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12484394506866417, |
|
"grad_norm": 1296423.5, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1939, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1560549313358302, |
|
"grad_norm": 698965.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1542, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1560549313358302, |
|
"eval_accuracy": 0.7873870573870574, |
|
"eval_ap": 0.9824733762472688, |
|
"eval_auc": 0.9823328646050201, |
|
"eval_eer": 0.06329830234438157, |
|
"eval_f1": 0.7611024983193624, |
|
"eval_loss": 0.586156964302063, |
|
"eval_precision": 0.9871525677070359, |
|
"eval_recall": 0.6192900200937709, |
|
"eval_runtime": 896.8541, |
|
"eval_samples_per_second": 91.319, |
|
"eval_steps_per_second": 0.357, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18726591760299627, |
|
"grad_norm": 854220.375, |
|
"learning_rate": 4.81508875739645e-05, |
|
"loss": 0.1201, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2184769038701623, |
|
"grad_norm": 1128798.0, |
|
"learning_rate": 4.6301775147928994e-05, |
|
"loss": 0.1162, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24968789013732834, |
|
"grad_norm": 622432.0, |
|
"learning_rate": 4.44526627218935e-05, |
|
"loss": 0.1071, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2808988764044944, |
|
"grad_norm": 790906.0625, |
|
"learning_rate": 4.260355029585799e-05, |
|
"loss": 0.0874, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3121098626716604, |
|
"grad_norm": 1022445.375, |
|
"learning_rate": 4.075443786982249e-05, |
|
"loss": 0.0748, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3121098626716604, |
|
"eval_accuracy": 0.9135164835164835, |
|
"eval_ap": 0.9891846925529121, |
|
"eval_auc": 0.9885782446290119, |
|
"eval_eer": 0.05095661546752897, |
|
"eval_f1": 0.9160672599509415, |
|
"eval_loss": 0.23737847805023193, |
|
"eval_precision": 0.9761105078411071, |
|
"eval_recall": 0.8629828086626479, |
|
"eval_runtime": 899.3174, |
|
"eval_samples_per_second": 91.069, |
|
"eval_steps_per_second": 0.356, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3433208489388265, |
|
"grad_norm": 857955.5625, |
|
"learning_rate": 3.8905325443786985e-05, |
|
"loss": 0.0764, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.37453183520599254, |
|
"grad_norm": 874027.0, |
|
"learning_rate": 3.705621301775148e-05, |
|
"loss": 0.0709, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.40574282147315854, |
|
"grad_norm": 582799.875, |
|
"learning_rate": 3.520710059171598e-05, |
|
"loss": 0.0636, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4369538077403246, |
|
"grad_norm": 521187.625, |
|
"learning_rate": 3.335798816568047e-05, |
|
"loss": 0.0533, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4681647940074906, |
|
"grad_norm": 522330.4375, |
|
"learning_rate": 3.150887573964497e-05, |
|
"loss": 0.0465, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4681647940074906, |
|
"eval_accuracy": 0.9357875457875457, |
|
"eval_ap": 0.9924125253776173, |
|
"eval_auc": 0.9918219498411971, |
|
"eval_eer": 0.042576125033683646, |
|
"eval_f1": 0.9388722931898225, |
|
"eval_loss": 0.18004347383975983, |
|
"eval_precision": 0.9792449627815629, |
|
"eval_recall": 0.9016968073230632, |
|
"eval_runtime": 904.8241, |
|
"eval_samples_per_second": 90.515, |
|
"eval_steps_per_second": 0.354, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4993757802746567, |
|
"grad_norm": 1439895.375, |
|
"learning_rate": 2.965976331360947e-05, |
|
"loss": 0.0479, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5305867665418227, |
|
"grad_norm": 522678.71875, |
|
"learning_rate": 2.7810650887573965e-05, |
|
"loss": 0.0429, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5617977528089888, |
|
"grad_norm": 1075267.0, |
|
"learning_rate": 2.5961538461538464e-05, |
|
"loss": 0.0419, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5930087390761548, |
|
"grad_norm": 621287.5625, |
|
"learning_rate": 2.411242603550296e-05, |
|
"loss": 0.0449, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6242197253433208, |
|
"grad_norm": 431768.9375, |
|
"learning_rate": 2.2263313609467456e-05, |
|
"loss": 0.0373, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6242197253433208, |
|
"eval_accuracy": 0.9467277167277167, |
|
"eval_ap": 0.9953183185775565, |
|
"eval_auc": 0.9948050755617595, |
|
"eval_eer": 0.03290218270008084, |
|
"eval_f1": 0.9495787636800684, |
|
"eval_loss": 0.16710303723812103, |
|
"eval_precision": 0.9842600800172492, |
|
"eval_recall": 0.9172583165885242, |
|
"eval_runtime": 883.8932, |
|
"eval_samples_per_second": 92.658, |
|
"eval_steps_per_second": 0.362, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6554307116104869, |
|
"grad_norm": 300561.40625, |
|
"learning_rate": 2.0414201183431952e-05, |
|
"loss": 0.0367, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.686641697877653, |
|
"grad_norm": 759234.625, |
|
"learning_rate": 1.856508875739645e-05, |
|
"loss": 0.0385, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.717852684144819, |
|
"grad_norm": 110611.8984375, |
|
"learning_rate": 1.6715976331360947e-05, |
|
"loss": 0.029, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7490636704119851, |
|
"grad_norm": 890414.25, |
|
"learning_rate": 1.4866863905325445e-05, |
|
"loss": 0.0243, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7802746566791511, |
|
"grad_norm": 110954.5390625, |
|
"learning_rate": 1.3017751479289941e-05, |
|
"loss": 0.028, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7802746566791511, |
|
"eval_accuracy": 0.9541514041514042, |
|
"eval_ap": 0.9963480520212278, |
|
"eval_auc": 0.9958883442351322, |
|
"eval_eer": 0.02915656157369981, |
|
"eval_f1": 0.9567799633982114, |
|
"eval_loss": 0.1855895221233368, |
|
"eval_precision": 0.9874557506355278, |
|
"eval_recall": 0.9279526680062514, |
|
"eval_runtime": 916.7443, |
|
"eval_samples_per_second": 89.338, |
|
"eval_steps_per_second": 0.349, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8114856429463171, |
|
"grad_norm": 969664.1875, |
|
"learning_rate": 1.1168639053254439e-05, |
|
"loss": 0.0268, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8426966292134831, |
|
"grad_norm": 21713.740234375, |
|
"learning_rate": 9.319526627218936e-06, |
|
"loss": 0.0259, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8739076154806492, |
|
"grad_norm": 625.3981323242188, |
|
"learning_rate": 7.470414201183433e-06, |
|
"loss": 0.0272, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9051186017478152, |
|
"grad_norm": 51626.33203125, |
|
"learning_rate": 5.621301775147929e-06, |
|
"loss": 0.0255, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9363295880149812, |
|
"grad_norm": 330527.75, |
|
"learning_rate": 3.772189349112426e-06, |
|
"loss": 0.0196, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9363295880149812, |
|
"eval_accuracy": 0.9628449328449329, |
|
"eval_ap": 0.9971936987222482, |
|
"eval_auc": 0.996572647864952, |
|
"eval_eer": 0.02357855025599569, |
|
"eval_f1": 0.9651834647200833, |
|
"eval_loss": 0.2491898238658905, |
|
"eval_precision": 0.9898617727816761, |
|
"eval_recall": 0.9417057378879214, |
|
"eval_runtime": 887.8512, |
|
"eval_samples_per_second": 92.245, |
|
"eval_steps_per_second": 0.36, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3204, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.6506508689408e+18, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|