|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 621, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04830917874396135, |
|
"grad_norm": 37.65997314453125, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.8115, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0966183574879227, |
|
"grad_norm": 49.085182189941406, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.7151, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14492753623188406, |
|
"grad_norm": 57.3139533996582, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.5399, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1932367149758454, |
|
"grad_norm": 14.93040943145752, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.4486, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24154589371980675, |
|
"grad_norm": 40.2352409362793, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.2895, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2898550724637681, |
|
"grad_norm": 22.021743774414062, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.208, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.33816425120772947, |
|
"grad_norm": 3.9624946117401123, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.158, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3864734299516908, |
|
"grad_norm": 3.648684501647949, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.1099, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.43478260869565216, |
|
"grad_norm": 1.9831331968307495, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.0898, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4830917874396135, |
|
"grad_norm": 1.1971267461776733, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0561, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4830917874396135, |
|
"eval_accuracy": 0.9926650366748166, |
|
"eval_accuracy_label_Clickbait": 0.9933110367892977, |
|
"eval_accuracy_label_Factual": 0.9922928709055877, |
|
"eval_f1": 0.9926701815332624, |
|
"eval_loss": 0.04882814362645149, |
|
"eval_precision": 0.9926880698400764, |
|
"eval_recall": 0.9926650366748166, |
|
"eval_runtime": 0.8226, |
|
"eval_samples_per_second": 994.464, |
|
"eval_steps_per_second": 63.218, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5314009661835749, |
|
"grad_norm": 0.8438642621040344, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.061, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5797101449275363, |
|
"grad_norm": 0.5306077003479004, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.0523, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6280193236714976, |
|
"grad_norm": 42.116844177246094, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.0555, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6763285024154589, |
|
"grad_norm": 0.45348191261291504, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.0511, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7246376811594203, |
|
"grad_norm": 0.273034930229187, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0461, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7729468599033816, |
|
"grad_norm": 0.32376888394355774, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.0355, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.821256038647343, |
|
"grad_norm": 0.46599268913269043, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.0057, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 76.78250122070312, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.0284, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9178743961352657, |
|
"grad_norm": 0.27929720282554626, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.0154, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.966183574879227, |
|
"grad_norm": 0.09468149393796921, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0037, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.966183574879227, |
|
"eval_accuracy": 0.9987775061124694, |
|
"eval_accuracy_label_Clickbait": 0.9966555183946488, |
|
"eval_accuracy_label_Factual": 1.0, |
|
"eval_f1": 0.998777070551364, |
|
"eval_loss": 0.00973883830010891, |
|
"eval_precision": 0.9987798570622531, |
|
"eval_recall": 0.9987775061124694, |
|
"eval_runtime": 0.8161, |
|
"eval_samples_per_second": 1002.369, |
|
"eval_steps_per_second": 63.72, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0144927536231885, |
|
"grad_norm": 11.390016555786133, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.033, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0628019323671498, |
|
"grad_norm": 0.6215488910675049, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.0279, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 0.1523633599281311, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.0093, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.1594202898550725, |
|
"grad_norm": 0.10952762514352798, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.022, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2077294685990339, |
|
"grad_norm": 0.07856310158967972, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0309, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2560386473429952, |
|
"grad_norm": 0.05758531391620636, |
|
"learning_rate": 1.04e-05, |
|
"loss": 0.0015, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3043478260869565, |
|
"grad_norm": 0.049695733934640884, |
|
"learning_rate": 1.0800000000000002e-05, |
|
"loss": 0.0071, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3526570048309179, |
|
"grad_norm": 0.19512628018856049, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 0.0054, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4009661835748792, |
|
"grad_norm": 0.049039632081985474, |
|
"learning_rate": 1.16e-05, |
|
"loss": 0.023, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.4492753623188406, |
|
"grad_norm": 0.06413820385932922, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0012, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4492753623188406, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_Clickbait": 1.0, |
|
"eval_accuracy_label_Factual": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0015956248389557004, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.8233, |
|
"eval_samples_per_second": 993.556, |
|
"eval_steps_per_second": 63.16, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.497584541062802, |
|
"grad_norm": 0.04210774227976799, |
|
"learning_rate": 1.2400000000000002e-05, |
|
"loss": 0.0012, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5458937198067633, |
|
"grad_norm": 0.02976871468126774, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 0.0079, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.5942028985507246, |
|
"grad_norm": 0.029957927763462067, |
|
"learning_rate": 1.3200000000000002e-05, |
|
"loss": 0.0008, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.642512077294686, |
|
"grad_norm": 12.84114933013916, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 0.0168, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.6908212560386473, |
|
"grad_norm": 0.6662724018096924, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.0209, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 0.036532897502183914, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 0.0008, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.78743961352657, |
|
"grad_norm": 0.05894944816827774, |
|
"learning_rate": 1.48e-05, |
|
"loss": 0.0351, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.8357487922705316, |
|
"grad_norm": 0.03172897920012474, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 0.0156, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.8840579710144927, |
|
"grad_norm": 60.220420837402344, |
|
"learning_rate": 1.5600000000000003e-05, |
|
"loss": 0.106, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.9323671497584543, |
|
"grad_norm": 0.045578889548778534, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0012, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.9323671497584543, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_Clickbait": 1.0, |
|
"eval_accuracy_label_Factual": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.001607110258191824, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.822, |
|
"eval_samples_per_second": 995.143, |
|
"eval_steps_per_second": 63.261, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.9806763285024154, |
|
"grad_norm": 0.038461122661828995, |
|
"learning_rate": 1.64e-05, |
|
"loss": 0.0011, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.028985507246377, |
|
"grad_norm": 0.024971311911940575, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.0008, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.077294685990338, |
|
"grad_norm": 0.021732186898589134, |
|
"learning_rate": 1.72e-05, |
|
"loss": 0.0005, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.1256038647342996, |
|
"grad_norm": 22.902217864990234, |
|
"learning_rate": 1.76e-05, |
|
"loss": 0.0134, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 0.05803954228758812, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.0005, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 0.016587387770414352, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 0.0004, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.2705314009661834, |
|
"grad_norm": 0.014241261407732964, |
|
"learning_rate": 1.88e-05, |
|
"loss": 0.0004, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.318840579710145, |
|
"grad_norm": 0.013285420835018158, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.0003, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.367149758454106, |
|
"grad_norm": 0.008689775131642818, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 0.0003, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.4154589371980677, |
|
"grad_norm": 0.05173454433679581, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0433, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4154589371980677, |
|
"eval_accuracy": 0.9987775061124694, |
|
"eval_accuracy_label_Clickbait": 0.9966555183946488, |
|
"eval_accuracy_label_Factual": 1.0, |
|
"eval_f1": 0.998777070551364, |
|
"eval_loss": 0.0020217353012412786, |
|
"eval_precision": 0.9987798570622531, |
|
"eval_recall": 0.9987775061124694, |
|
"eval_runtime": 0.8279, |
|
"eval_samples_per_second": 988.004, |
|
"eval_steps_per_second": 62.807, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.463768115942029, |
|
"grad_norm": 0.07507430762052536, |
|
"learning_rate": 1.834710743801653e-05, |
|
"loss": 0.0055, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.5120772946859904, |
|
"grad_norm": 42.797401428222656, |
|
"learning_rate": 1.669421487603306e-05, |
|
"loss": 0.0161, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.5603864734299515, |
|
"grad_norm": 0.01774718426167965, |
|
"learning_rate": 1.504132231404959e-05, |
|
"loss": 0.0006, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"grad_norm": 0.022867949679493904, |
|
"learning_rate": 1.3388429752066117e-05, |
|
"loss": 0.0191, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.6570048309178746, |
|
"grad_norm": 0.013622589409351349, |
|
"learning_rate": 1.1735537190082646e-05, |
|
"loss": 0.0043, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.7053140096618358, |
|
"grad_norm": 0.012125013396143913, |
|
"learning_rate": 1.0082644628099174e-05, |
|
"loss": 0.0003, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.753623188405797, |
|
"grad_norm": 0.009969482198357582, |
|
"learning_rate": 8.429752066115703e-06, |
|
"loss": 0.0193, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.8019323671497585, |
|
"grad_norm": 0.010625869035720825, |
|
"learning_rate": 6.776859504132232e-06, |
|
"loss": 0.0008, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.85024154589372, |
|
"grad_norm": 0.010633111000061035, |
|
"learning_rate": 5.12396694214876e-06, |
|
"loss": 0.0143, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.898550724637681, |
|
"grad_norm": 0.011428612284362316, |
|
"learning_rate": 3.4710743801652895e-06, |
|
"loss": 0.0003, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.898550724637681, |
|
"eval_accuracy": 0.9951100244498777, |
|
"eval_accuracy_label_Clickbait": 0.9866220735785953, |
|
"eval_accuracy_label_Factual": 1.0, |
|
"eval_f1": 0.9951029456353522, |
|
"eval_loss": 0.016679394990205765, |
|
"eval_precision": 0.9951474238804714, |
|
"eval_recall": 0.9951100244498777, |
|
"eval_runtime": 0.8314, |
|
"eval_samples_per_second": 983.91, |
|
"eval_steps_per_second": 62.547, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.9468599033816423, |
|
"grad_norm": 0.010514287278056145, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 0.0003, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.995169082125604, |
|
"grad_norm": 104.85121154785156, |
|
"learning_rate": 1.6528925619834713e-07, |
|
"loss": 0.0108, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 621, |
|
"total_flos": 29633646182400.0, |
|
"train_loss": 0.06926822083632517, |
|
"train_runtime": 68.411, |
|
"train_samples_per_second": 290.041, |
|
"train_steps_per_second": 9.077 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9951100244498777, |
|
"eval_accuracy_label_Clickbait": 0.9866220735785953, |
|
"eval_accuracy_label_Factual": 1.0, |
|
"eval_f1": 0.9951029456353522, |
|
"eval_loss": 0.017279641702771187, |
|
"eval_precision": 0.9951474238804714, |
|
"eval_recall": 0.9951100244498777, |
|
"eval_runtime": 0.8191, |
|
"eval_samples_per_second": 998.621, |
|
"eval_steps_per_second": 63.482, |
|
"step": 621 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 621, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 29633646182400.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|