deepseek_prover_sft_proving / trainer_state.json
tcwong's picture
Upload 11 files
4d815be verified
raw
history blame
18.3 kB
{
"best_metric": 0.08962889015674591,
"best_model_checkpoint": "model_training/deepseek_prover_sft_no_err/checkpoints-random-09-07-09-00/checkpoint-450",
"epoch": 3.6568527918781726,
"eval_steps": 50,
"global_step": 450,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04060913705583756,
"grad_norm": 95.18575286865234,
"learning_rate": 1e-05,
"loss": 12.8904,
"step": 5
},
{
"epoch": 0.08121827411167512,
"grad_norm": 60.47364044189453,
"learning_rate": 2e-05,
"loss": 9.6717,
"step": 10
},
{
"epoch": 0.1218274111675127,
"grad_norm": 69.353759765625,
"learning_rate": 3e-05,
"loss": 4.9122,
"step": 15
},
{
"epoch": 0.16243654822335024,
"grad_norm": 0.35820522904396057,
"learning_rate": 4e-05,
"loss": 0.6982,
"step": 20
},
{
"epoch": 0.20304568527918782,
"grad_norm": 0.3532850742340088,
"learning_rate": 5e-05,
"loss": 0.1738,
"step": 25
},
{
"epoch": 0.2436548223350254,
"grad_norm": 8.06154727935791,
"learning_rate": 6e-05,
"loss": 0.1606,
"step": 30
},
{
"epoch": 0.28426395939086296,
"grad_norm": 0.33542144298553467,
"learning_rate": 7e-05,
"loss": 0.1632,
"step": 35
},
{
"epoch": 0.3248730964467005,
"grad_norm": 0.2429077923297882,
"learning_rate": 8e-05,
"loss": 0.141,
"step": 40
},
{
"epoch": 0.36548223350253806,
"grad_norm": 0.17707887291908264,
"learning_rate": 9e-05,
"loss": 0.132,
"step": 45
},
{
"epoch": 0.40609137055837563,
"grad_norm": 0.5356423258781433,
"learning_rate": 0.0001,
"loss": 0.1388,
"step": 50
},
{
"epoch": 0.40609137055837563,
"eval_loss": 0.13083037734031677,
"eval_runtime": 577.2624,
"eval_samples_per_second": 1.732,
"eval_steps_per_second": 0.217,
"step": 50
},
{
"epoch": 0.4467005076142132,
"grad_norm": 0.0570620633661747,
"learning_rate": 9.996842891446092e-05,
"loss": 0.1288,
"step": 55
},
{
"epoch": 0.4873096446700508,
"grad_norm": 0.07483270764350891,
"learning_rate": 9.987375552718133e-05,
"loss": 0.1179,
"step": 60
},
{
"epoch": 0.5279187817258884,
"grad_norm": 0.04857001453638077,
"learning_rate": 9.971609939582557e-05,
"loss": 0.1245,
"step": 65
},
{
"epoch": 0.5685279187817259,
"grad_norm": 0.06141730025410652,
"learning_rate": 9.9495659615402e-05,
"loss": 0.121,
"step": 70
},
{
"epoch": 0.6091370558375635,
"grad_norm": 0.07013677060604095,
"learning_rate": 9.921271456683715e-05,
"loss": 0.1082,
"step": 75
},
{
"epoch": 0.649746192893401,
"grad_norm": 0.027853988111019135,
"learning_rate": 9.886762156542428e-05,
"loss": 0.1338,
"step": 80
},
{
"epoch": 0.6903553299492385,
"grad_norm": 0.039150092750787735,
"learning_rate": 9.846081640959007e-05,
"loss": 0.1161,
"step": 85
},
{
"epoch": 0.7309644670050761,
"grad_norm": 0.029366208240389824,
"learning_rate": 9.79928128305494e-05,
"loss": 0.1151,
"step": 90
},
{
"epoch": 0.7715736040609137,
"grad_norm": 0.02762170508503914,
"learning_rate": 9.746420184354334e-05,
"loss": 0.1192,
"step": 95
},
{
"epoch": 0.8121827411167513,
"grad_norm": 0.025405680760741234,
"learning_rate": 9.687565100147939e-05,
"loss": 0.1137,
"step": 100
},
{
"epoch": 0.8121827411167513,
"eval_loss": 0.11551322042942047,
"eval_runtime": 577.0742,
"eval_samples_per_second": 1.733,
"eval_steps_per_second": 0.217,
"step": 100
},
{
"epoch": 0.8527918781725888,
"grad_norm": 0.02768566645681858,
"learning_rate": 9.622790355191672e-05,
"loss": 0.1067,
"step": 105
},
{
"epoch": 0.8934010152284264,
"grad_norm": 0.028063081204891205,
"learning_rate": 9.552177749846083e-05,
"loss": 0.1103,
"step": 110
},
{
"epoch": 0.934010152284264,
"grad_norm": 0.028980253264307976,
"learning_rate": 9.475816456775313e-05,
"loss": 0.1173,
"step": 115
},
{
"epoch": 0.9746192893401016,
"grad_norm": 0.025315387174487114,
"learning_rate": 9.393802908335977e-05,
"loss": 0.1089,
"step": 120
},
{
"epoch": 1.015228426395939,
"grad_norm": 0.026901153847575188,
"learning_rate": 9.306240674798203e-05,
"loss": 0.1137,
"step": 125
},
{
"epoch": 1.0558375634517767,
"grad_norm": 0.025451259687542915,
"learning_rate": 9.213240333552589e-05,
"loss": 0.1054,
"step": 130
},
{
"epoch": 1.0964467005076142,
"grad_norm": 0.0280075091868639,
"learning_rate": 9.114919329468282e-05,
"loss": 0.1299,
"step": 135
},
{
"epoch": 1.1370558375634519,
"grad_norm": 0.029911836609244347,
"learning_rate": 9.011401826578492e-05,
"loss": 0.1009,
"step": 140
},
{
"epoch": 1.1776649746192893,
"grad_norm": 0.029297418892383575,
"learning_rate": 8.902818551280758e-05,
"loss": 0.1016,
"step": 145
},
{
"epoch": 1.218274111675127,
"grad_norm": 0.0310602355748415,
"learning_rate": 8.789306627249985e-05,
"loss": 0.0949,
"step": 150
},
{
"epoch": 1.218274111675127,
"eval_loss": 0.10870195180177689,
"eval_runtime": 577.2383,
"eval_samples_per_second": 1.732,
"eval_steps_per_second": 0.217,
"step": 150
},
{
"epoch": 1.2588832487309645,
"grad_norm": 0.030549369752407074,
"learning_rate": 8.6710094022727e-05,
"loss": 0.0996,
"step": 155
},
{
"epoch": 1.299492385786802,
"grad_norm": 0.033894069492816925,
"learning_rate": 8.548076267221256e-05,
"loss": 0.106,
"step": 160
},
{
"epoch": 1.3401015228426396,
"grad_norm": 0.034311868250370026,
"learning_rate": 8.420662467396547e-05,
"loss": 0.1162,
"step": 165
},
{
"epoch": 1.380710659898477,
"grad_norm": 0.040043774992227554,
"learning_rate": 8.288928906477496e-05,
"loss": 0.1058,
"step": 170
},
{
"epoch": 1.4213197969543148,
"grad_norm": 0.0440133698284626,
"learning_rate": 8.15304194332491e-05,
"loss": 0.101,
"step": 175
},
{
"epoch": 1.4619289340101522,
"grad_norm": 0.03563845902681351,
"learning_rate": 8.013173181896283e-05,
"loss": 0.0981,
"step": 180
},
{
"epoch": 1.50253807106599,
"grad_norm": 0.039490845054388046,
"learning_rate": 7.869499254536865e-05,
"loss": 0.1002,
"step": 185
},
{
"epoch": 1.5431472081218274,
"grad_norm": 0.044993579387664795,
"learning_rate": 7.722201598920673e-05,
"loss": 0.0897,
"step": 190
},
{
"epoch": 1.5837563451776648,
"grad_norm": 0.04990144819021225,
"learning_rate": 7.571466228923115e-05,
"loss": 0.0996,
"step": 195
},
{
"epoch": 1.6243654822335025,
"grad_norm": 0.04168887436389923,
"learning_rate": 7.417483499714589e-05,
"loss": 0.1103,
"step": 200
},
{
"epoch": 1.6243654822335025,
"eval_loss": 0.10267892479896545,
"eval_runtime": 577.34,
"eval_samples_per_second": 1.732,
"eval_steps_per_second": 0.217,
"step": 200
},
{
"epoch": 1.6649746192893402,
"grad_norm": 0.04457447677850723,
"learning_rate": 7.260447867371709e-05,
"loss": 0.0978,
"step": 205
},
{
"epoch": 1.7055837563451777,
"grad_norm": 0.0444829948246479,
"learning_rate": 7.100557643309732e-05,
"loss": 0.0963,
"step": 210
},
{
"epoch": 1.7461928934010151,
"grad_norm": 0.04889954626560211,
"learning_rate": 6.938014743846285e-05,
"loss": 0.096,
"step": 215
},
{
"epoch": 1.7868020304568528,
"grad_norm": 0.05787263438105583,
"learning_rate": 6.773024435212678e-05,
"loss": 0.098,
"step": 220
},
{
"epoch": 1.8274111675126905,
"grad_norm": 0.047007594257593155,
"learning_rate": 6.605795074334794e-05,
"loss": 0.0944,
"step": 225
},
{
"epoch": 1.868020304568528,
"grad_norm": 0.06226496770977974,
"learning_rate": 6.436537845710903e-05,
"loss": 0.0985,
"step": 230
},
{
"epoch": 1.9086294416243654,
"grad_norm": 0.04688010737299919,
"learning_rate": 6.265466494718732e-05,
"loss": 0.1104,
"step": 235
},
{
"epoch": 1.9492385786802031,
"grad_norm": 0.04706396162509918,
"learning_rate": 6.092797057688495e-05,
"loss": 0.0879,
"step": 240
},
{
"epoch": 1.9898477157360406,
"grad_norm": 0.05411742627620697,
"learning_rate": 5.918747589082853e-05,
"loss": 0.0918,
"step": 245
},
{
"epoch": 2.030456852791878,
"grad_norm": 0.046843238174915314,
"learning_rate": 5.7435378861282585e-05,
"loss": 0.0896,
"step": 250
},
{
"epoch": 2.030456852791878,
"eval_loss": 0.09781802445650101,
"eval_runtime": 577.4371,
"eval_samples_per_second": 1.732,
"eval_steps_per_second": 0.216,
"step": 250
},
{
"epoch": 2.0710659898477157,
"grad_norm": 0.04870203882455826,
"learning_rate": 5.567389211245485e-05,
"loss": 0.0915,
"step": 255
},
{
"epoch": 2.1116751269035534,
"grad_norm": 0.05594008415937424,
"learning_rate": 5.390524012629824e-05,
"loss": 0.0863,
"step": 260
},
{
"epoch": 2.152284263959391,
"grad_norm": 0.06335064023733139,
"learning_rate": 5.2131656433338506e-05,
"loss": 0.0931,
"step": 265
},
{
"epoch": 2.1928934010152283,
"grad_norm": 0.06813333183526993,
"learning_rate": 5.035538079207488e-05,
"loss": 0.1013,
"step": 270
},
{
"epoch": 2.233502538071066,
"grad_norm": 0.054481592029333115,
"learning_rate": 4.857865636051585e-05,
"loss": 0.0889,
"step": 275
},
{
"epoch": 2.2741116751269037,
"grad_norm": 0.06014859676361084,
"learning_rate": 4.6803726863421725e-05,
"loss": 0.0873,
"step": 280
},
{
"epoch": 2.314720812182741,
"grad_norm": 0.09669474512338638,
"learning_rate": 4.503283375883165e-05,
"loss": 0.0825,
"step": 285
},
{
"epoch": 2.3553299492385786,
"grad_norm": 0.0704524964094162,
"learning_rate": 4.326821340745304e-05,
"loss": 0.0862,
"step": 290
},
{
"epoch": 2.3959390862944163,
"grad_norm": 0.05738632380962372,
"learning_rate": 4.151209424848819e-05,
"loss": 0.0835,
"step": 295
},
{
"epoch": 2.436548223350254,
"grad_norm": 0.06273756176233292,
"learning_rate": 3.976669398546451e-05,
"loss": 0.0887,
"step": 300
},
{
"epoch": 2.436548223350254,
"eval_loss": 0.09393562376499176,
"eval_runtime": 576.8653,
"eval_samples_per_second": 1.734,
"eval_steps_per_second": 0.217,
"step": 300
},
{
"epoch": 2.4791878172588833,
"grad_norm": 0.05062380060553551,
"learning_rate": 3.803421678562213e-05,
"loss": 0.0855,
"step": 305
},
{
"epoch": 2.519796954314721,
"grad_norm": 0.06058161333203316,
"learning_rate": 3.631685049639586e-05,
"loss": 0.0866,
"step": 310
},
{
"epoch": 2.5604060913705586,
"grad_norm": 0.05282078683376312,
"learning_rate": 3.461676388250651e-05,
"loss": 0.0836,
"step": 315
},
{
"epoch": 2.601015228426396,
"grad_norm": 0.06284045428037643,
"learning_rate": 3.293610388715048e-05,
"loss": 0.0777,
"step": 320
},
{
"epoch": 2.6416243654822336,
"grad_norm": 0.06628148257732391,
"learning_rate": 3.127699292074683e-05,
"loss": 0.095,
"step": 325
},
{
"epoch": 2.682233502538071,
"grad_norm": 0.0586889423429966,
"learning_rate": 2.964152618066508e-05,
"loss": 0.0851,
"step": 330
},
{
"epoch": 2.7228426395939085,
"grad_norm": 0.06476856768131256,
"learning_rate": 2.8031769005319147e-05,
"loss": 0.0876,
"step": 335
},
{
"epoch": 2.763451776649746,
"grad_norm": 0.062451042234897614,
"learning_rate": 2.6449754265968264e-05,
"loss": 0.0836,
"step": 340
},
{
"epoch": 2.804060913705584,
"grad_norm": 0.09133639186620712,
"learning_rate": 2.4897479799518796e-05,
"loss": 0.0795,
"step": 345
},
{
"epoch": 2.8446700507614215,
"grad_norm": 0.06529180705547333,
"learning_rate": 2.3376905885569182e-05,
"loss": 0.0843,
"step": 350
},
{
"epoch": 2.8446700507614215,
"eval_loss": 0.0914108008146286,
"eval_runtime": 578.8025,
"eval_samples_per_second": 1.728,
"eval_steps_per_second": 0.216,
"step": 350
},
{
"epoch": 2.885279187817259,
"grad_norm": 0.05800933018326759,
"learning_rate": 2.1889952770883643e-05,
"loss": 0.0806,
"step": 355
},
{
"epoch": 2.9258883248730965,
"grad_norm": 0.06730043888092041,
"learning_rate": 2.043849824442124e-05,
"loss": 0.086,
"step": 360
},
{
"epoch": 2.966497461928934,
"grad_norm": 0.05438319221138954,
"learning_rate": 1.9024375265982384e-05,
"loss": 0.0846,
"step": 365
},
{
"epoch": 3.0071065989847714,
"grad_norm": 0.06161397323012352,
"learning_rate": 1.764936965146773e-05,
"loss": 0.0811,
"step": 370
},
{
"epoch": 3.047715736040609,
"grad_norm": 0.0842302069067955,
"learning_rate": 1.631521781767214e-05,
"loss": 0.0736,
"step": 375
},
{
"epoch": 3.0883248730964468,
"grad_norm": 0.06074338033795357,
"learning_rate": 1.502360458946232e-05,
"loss": 0.0801,
"step": 380
},
{
"epoch": 3.1289340101522845,
"grad_norm": 0.06264619529247284,
"learning_rate": 1.3776161072106702e-05,
"loss": 0.0805,
"step": 385
},
{
"epoch": 3.1695431472081217,
"grad_norm": 0.07572674006223679,
"learning_rate": 1.257446259144494e-05,
"loss": 0.0841,
"step": 390
},
{
"epoch": 3.2101522842639594,
"grad_norm": 0.08485180884599686,
"learning_rate": 1.1420026704498077e-05,
"loss": 0.0835,
"step": 395
},
{
"epoch": 3.250761421319797,
"grad_norm": 0.05600133538246155,
"learning_rate": 1.031431128303153e-05,
"loss": 0.0775,
"step": 400
},
{
"epoch": 3.250761421319797,
"eval_loss": 0.09013215452432632,
"eval_runtime": 578.5208,
"eval_samples_per_second": 1.729,
"eval_steps_per_second": 0.216,
"step": 400
},
{
"epoch": 3.2913705583756343,
"grad_norm": 0.05972089618444443,
"learning_rate": 9.258712672491415e-06,
"loss": 0.0855,
"step": 405
},
{
"epoch": 3.331979695431472,
"grad_norm": 0.07054904848337173,
"learning_rate": 8.254563928638893e-06,
"loss": 0.083,
"step": 410
},
{
"epoch": 3.3725888324873097,
"grad_norm": 0.06772007048130035,
"learning_rate": 7.3031331341093915e-06,
"loss": 0.0852,
"step": 415
},
{
"epoch": 3.4131979695431474,
"grad_norm": 0.06778612732887268,
"learning_rate": 6.405621797022848e-06,
"loss": 0.083,
"step": 420
},
{
"epoch": 3.4538071065989846,
"grad_norm": 0.06333562731742859,
"learning_rate": 5.563163333667099e-06,
"loss": 0.0727,
"step": 425
},
{
"epoch": 3.4944162436548223,
"grad_norm": 0.05736032873392105,
"learning_rate": 4.776821637170526e-06,
"loss": 0.0756,
"step": 430
},
{
"epoch": 3.53502538071066,
"grad_norm": 0.0700037032365799,
"learning_rate": 4.047589733971646e-06,
"loss": 0.0805,
"step": 435
},
{
"epoch": 3.575634517766497,
"grad_norm": 0.0645386204123497,
"learning_rate": 3.376388529782215e-06,
"loss": 0.0813,
"step": 440
},
{
"epoch": 3.616243654822335,
"grad_norm": 0.05811276286840439,
"learning_rate": 2.7640656466274782e-06,
"loss": 0.0808,
"step": 445
},
{
"epoch": 3.6568527918781726,
"grad_norm": 0.06826285272836685,
"learning_rate": 2.2113943524323167e-06,
"loss": 0.0807,
"step": 450
},
{
"epoch": 3.6568527918781726,
"eval_loss": 0.08962889015674591,
"eval_runtime": 577.8097,
"eval_samples_per_second": 1.731,
"eval_steps_per_second": 0.216,
"step": 450
}
],
"logging_steps": 5,
"max_steps": 492,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.8705510622704435e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}