|
{ |
|
"best_metric": 0.10999426990747452, |
|
"best_model_checkpoint": "model_training/deepseek_prover_sft_no_err/checkpoints-by_file-09-07-09-00/checkpoint-450", |
|
"epoch": 3.6548223350253806, |
|
"eval_steps": 50, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04060913705583756, |
|
"grad_norm": 90.72683715820312, |
|
"learning_rate": 1e-05, |
|
"loss": 12.7608, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08121827411167512, |
|
"grad_norm": 64.71466827392578, |
|
"learning_rate": 2e-05, |
|
"loss": 9.7379, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1218274111675127, |
|
"grad_norm": 72.4654312133789, |
|
"learning_rate": 3e-05, |
|
"loss": 4.8295, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.16243654822335024, |
|
"grad_norm": 0.33800482749938965, |
|
"learning_rate": 4e-05, |
|
"loss": 0.7192, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 0.41024672985076904, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1739, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2436548223350254, |
|
"grad_norm": 7.3582916259765625, |
|
"learning_rate": 6e-05, |
|
"loss": 0.1757, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.28426395939086296, |
|
"grad_norm": 0.308156818151474, |
|
"learning_rate": 7e-05, |
|
"loss": 0.15, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3248730964467005, |
|
"grad_norm": 0.22839659452438354, |
|
"learning_rate": 8e-05, |
|
"loss": 0.1439, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.36548223350253806, |
|
"grad_norm": 0.9331194162368774, |
|
"learning_rate": 9e-05, |
|
"loss": 0.1336, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 0.13771598041057587, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1352, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"eval_loss": 0.14033491909503937, |
|
"eval_runtime": 576.5709, |
|
"eval_samples_per_second": 1.734, |
|
"eval_steps_per_second": 0.217, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4467005076142132, |
|
"grad_norm": 0.04095697030425072, |
|
"learning_rate": 9.996842891446092e-05, |
|
"loss": 0.1245, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.4873096446700508, |
|
"grad_norm": 0.11559335142374039, |
|
"learning_rate": 9.987375552718133e-05, |
|
"loss": 0.1208, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5279187817258884, |
|
"grad_norm": 0.10437008738517761, |
|
"learning_rate": 9.971609939582557e-05, |
|
"loss": 0.1201, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5685279187817259, |
|
"grad_norm": 0.05311759188771248, |
|
"learning_rate": 9.9495659615402e-05, |
|
"loss": 0.1317, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6091370558375635, |
|
"grad_norm": 0.07723415642976761, |
|
"learning_rate": 9.921271456683715e-05, |
|
"loss": 0.1221, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.649746192893401, |
|
"grad_norm": 0.040255699306726456, |
|
"learning_rate": 9.886762156542428e-05, |
|
"loss": 0.112, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6903553299492385, |
|
"grad_norm": 0.028574412688612938, |
|
"learning_rate": 9.846081640959007e-05, |
|
"loss": 0.1086, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7309644670050761, |
|
"grad_norm": 0.041674137115478516, |
|
"learning_rate": 9.79928128305494e-05, |
|
"loss": 0.1129, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7715736040609137, |
|
"grad_norm": 0.04879468306899071, |
|
"learning_rate": 9.746420184354334e-05, |
|
"loss": 0.1096, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 0.03487968072295189, |
|
"learning_rate": 9.687565100147939e-05, |
|
"loss": 0.1114, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"eval_loss": 0.12282345443964005, |
|
"eval_runtime": 576.5189, |
|
"eval_samples_per_second": 1.735, |
|
"eval_steps_per_second": 0.217, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8527918781725888, |
|
"grad_norm": 0.022248243913054466, |
|
"learning_rate": 9.622790355191672e-05, |
|
"loss": 0.1084, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.8934010152284264, |
|
"grad_norm": 0.028482923284173012, |
|
"learning_rate": 9.552177749846083e-05, |
|
"loss": 0.1137, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.934010152284264, |
|
"grad_norm": 0.030047094449400902, |
|
"learning_rate": 9.475816456775313e-05, |
|
"loss": 0.1128, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.9746192893401016, |
|
"grad_norm": 0.024480503052473068, |
|
"learning_rate": 9.393802908335977e-05, |
|
"loss": 0.1065, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.015228426395939, |
|
"grad_norm": 0.023714397102594376, |
|
"learning_rate": 9.306240674798203e-05, |
|
"loss": 0.1136, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.0558375634517767, |
|
"grad_norm": 0.028829872608184814, |
|
"learning_rate": 9.213240333552589e-05, |
|
"loss": 0.1074, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0964467005076142, |
|
"grad_norm": 0.02569424733519554, |
|
"learning_rate": 9.114919329468282e-05, |
|
"loss": 0.106, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.1370558375634519, |
|
"grad_norm": 0.030071981251239777, |
|
"learning_rate": 9.011401826578492e-05, |
|
"loss": 0.1105, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.1776649746192893, |
|
"grad_norm": 0.02678651362657547, |
|
"learning_rate": 8.902818551280758e-05, |
|
"loss": 0.0967, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.218274111675127, |
|
"grad_norm": 0.034579597413539886, |
|
"learning_rate": 8.789306627249985e-05, |
|
"loss": 0.095, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.218274111675127, |
|
"eval_loss": 0.11852660775184631, |
|
"eval_runtime": 576.5584, |
|
"eval_samples_per_second": 1.734, |
|
"eval_steps_per_second": 0.217, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.2588832487309645, |
|
"grad_norm": 0.03252468258142471, |
|
"learning_rate": 8.6710094022727e-05, |
|
"loss": 0.1047, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.299492385786802, |
|
"grad_norm": 0.031006837263703346, |
|
"learning_rate": 8.548076267221256e-05, |
|
"loss": 0.0952, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.3401015228426396, |
|
"grad_norm": 0.03085142932832241, |
|
"learning_rate": 8.420662467396547e-05, |
|
"loss": 0.1088, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.380710659898477, |
|
"grad_norm": 0.03295427933335304, |
|
"learning_rate": 8.288928906477496e-05, |
|
"loss": 0.0935, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.4213197969543148, |
|
"grad_norm": 0.04264140501618385, |
|
"learning_rate": 8.15304194332491e-05, |
|
"loss": 0.1052, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.4619289340101522, |
|
"grad_norm": 0.034094925969839096, |
|
"learning_rate": 8.013173181896283e-05, |
|
"loss": 0.1026, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.50253807106599, |
|
"grad_norm": 0.042117953300476074, |
|
"learning_rate": 7.869499254536865e-05, |
|
"loss": 0.1113, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.5431472081218274, |
|
"grad_norm": 0.036028943955898285, |
|
"learning_rate": 7.722201598920673e-05, |
|
"loss": 0.0962, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.5837563451776648, |
|
"grad_norm": 0.039681848138570786, |
|
"learning_rate": 7.571466228923115e-05, |
|
"loss": 0.1029, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.6243654822335025, |
|
"grad_norm": 0.03896370530128479, |
|
"learning_rate": 7.417483499714589e-05, |
|
"loss": 0.1017, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6243654822335025, |
|
"eval_loss": 0.11510748416185379, |
|
"eval_runtime": 578.0426, |
|
"eval_samples_per_second": 1.73, |
|
"eval_steps_per_second": 0.216, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6649746192893402, |
|
"grad_norm": 0.04175429046154022, |
|
"learning_rate": 7.260447867371709e-05, |
|
"loss": 0.1042, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.7055837563451777, |
|
"grad_norm": 0.041212454438209534, |
|
"learning_rate": 7.100557643309732e-05, |
|
"loss": 0.0961, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.7461928934010151, |
|
"grad_norm": 0.04984848201274872, |
|
"learning_rate": 6.938014743846285e-05, |
|
"loss": 0.0982, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.7868020304568528, |
|
"grad_norm": 0.04844158887863159, |
|
"learning_rate": 6.773024435212678e-05, |
|
"loss": 0.1042, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.8274111675126905, |
|
"grad_norm": 0.04398966580629349, |
|
"learning_rate": 6.605795074334794e-05, |
|
"loss": 0.101, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.868020304568528, |
|
"grad_norm": 0.04412737116217613, |
|
"learning_rate": 6.436537845710903e-05, |
|
"loss": 0.1006, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.9086294416243654, |
|
"grad_norm": 0.04369115084409714, |
|
"learning_rate": 6.265466494718732e-05, |
|
"loss": 0.0883, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.9492385786802031, |
|
"grad_norm": 0.0569264255464077, |
|
"learning_rate": 6.092797057688495e-05, |
|
"loss": 0.0854, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.9898477157360406, |
|
"grad_norm": 0.0520426370203495, |
|
"learning_rate": 5.918747589082853e-05, |
|
"loss": 0.0931, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.030456852791878, |
|
"grad_norm": 0.04745051637291908, |
|
"learning_rate": 5.7435378861282585e-05, |
|
"loss": 0.0997, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.030456852791878, |
|
"eval_loss": 0.11357399076223373, |
|
"eval_runtime": 578.1803, |
|
"eval_samples_per_second": 1.73, |
|
"eval_steps_per_second": 0.216, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.0710659898477157, |
|
"grad_norm": 0.04727412760257721, |
|
"learning_rate": 5.567389211245485e-05, |
|
"loss": 0.0869, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.1116751269035534, |
|
"grad_norm": 0.05797808617353439, |
|
"learning_rate": 5.390524012629824e-05, |
|
"loss": 0.0967, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.152284263959391, |
|
"grad_norm": 0.05673275515437126, |
|
"learning_rate": 5.2131656433338506e-05, |
|
"loss": 0.0915, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.1928934010152283, |
|
"grad_norm": 0.06083540990948677, |
|
"learning_rate": 5.035538079207488e-05, |
|
"loss": 0.0872, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.233502538071066, |
|
"grad_norm": 0.05403359234333038, |
|
"learning_rate": 4.857865636051585e-05, |
|
"loss": 0.0891, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.2741116751269037, |
|
"grad_norm": 0.06568802893161774, |
|
"learning_rate": 4.6803726863421725e-05, |
|
"loss": 0.0879, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.314720812182741, |
|
"grad_norm": 0.05751260742545128, |
|
"learning_rate": 4.503283375883165e-05, |
|
"loss": 0.0868, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.3553299492385786, |
|
"grad_norm": 0.05319669097661972, |
|
"learning_rate": 4.326821340745304e-05, |
|
"loss": 0.0807, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.3959390862944163, |
|
"grad_norm": 0.06429895013570786, |
|
"learning_rate": 4.151209424848819e-05, |
|
"loss": 0.0842, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.436548223350254, |
|
"grad_norm": 0.05513699725270271, |
|
"learning_rate": 3.976669398546451e-05, |
|
"loss": 0.0891, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.436548223350254, |
|
"eval_loss": 0.1108972504734993, |
|
"eval_runtime": 577.9422, |
|
"eval_samples_per_second": 1.73, |
|
"eval_steps_per_second": 0.216, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.4771573604060912, |
|
"grad_norm": 0.059542689472436905, |
|
"learning_rate": 3.803421678562213e-05, |
|
"loss": 0.0911, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.517766497461929, |
|
"grad_norm": 0.054820574820041656, |
|
"learning_rate": 3.631685049639586e-05, |
|
"loss": 0.0815, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.5583756345177666, |
|
"grad_norm": 0.060525231063365936, |
|
"learning_rate": 3.461676388250651e-05, |
|
"loss": 0.083, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.598984771573604, |
|
"grad_norm": 0.07101233303546906, |
|
"learning_rate": 3.293610388715048e-05, |
|
"loss": 0.0853, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.6395939086294415, |
|
"grad_norm": 0.0710340216755867, |
|
"learning_rate": 3.127699292074683e-05, |
|
"loss": 0.088, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.6802030456852792, |
|
"grad_norm": 0.0659579485654831, |
|
"learning_rate": 2.964152618066508e-05, |
|
"loss": 0.0775, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.720812182741117, |
|
"grad_norm": 0.07411176711320877, |
|
"learning_rate": 2.8031769005319147e-05, |
|
"loss": 0.0808, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.761421319796954, |
|
"grad_norm": 0.06739082932472229, |
|
"learning_rate": 2.6449754265968264e-05, |
|
"loss": 0.0842, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.802030456852792, |
|
"grad_norm": 0.0516592413187027, |
|
"learning_rate": 2.4897479799518796e-05, |
|
"loss": 0.0806, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.8426395939086295, |
|
"grad_norm": 0.06238855794072151, |
|
"learning_rate": 2.3376905885569182e-05, |
|
"loss": 0.0794, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.8426395939086295, |
|
"eval_loss": 0.11036123335361481, |
|
"eval_runtime": 577.777, |
|
"eval_samples_per_second": 1.731, |
|
"eval_steps_per_second": 0.216, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.8832487309644668, |
|
"grad_norm": 0.06635326892137527, |
|
"learning_rate": 2.1889952770883643e-05, |
|
"loss": 0.0921, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.9238578680203045, |
|
"grad_norm": 0.06318895518779755, |
|
"learning_rate": 2.043849824442124e-05, |
|
"loss": 0.0785, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.964467005076142, |
|
"grad_norm": 0.09163162112236023, |
|
"learning_rate": 1.9024375265982384e-05, |
|
"loss": 0.0868, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 3.00507614213198, |
|
"grad_norm": 0.07665061205625534, |
|
"learning_rate": 1.764936965146773e-05, |
|
"loss": 0.0842, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.045685279187817, |
|
"grad_norm": 0.06518320739269257, |
|
"learning_rate": 1.631521781767214e-05, |
|
"loss": 0.0819, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 3.0862944162436547, |
|
"grad_norm": 0.06743030995130539, |
|
"learning_rate": 1.502360458946232e-05, |
|
"loss": 0.0729, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.1269035532994924, |
|
"grad_norm": 0.07401350885629654, |
|
"learning_rate": 1.3776161072106702e-05, |
|
"loss": 0.0903, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 3.16751269035533, |
|
"grad_norm": 0.07385122030973434, |
|
"learning_rate": 1.257446259144494e-05, |
|
"loss": 0.0867, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.2081218274111674, |
|
"grad_norm": 0.06651349365711212, |
|
"learning_rate": 1.1420026704498077e-05, |
|
"loss": 0.0815, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 3.248730964467005, |
|
"grad_norm": 0.06250081211328506, |
|
"learning_rate": 1.031431128303153e-05, |
|
"loss": 0.0702, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.248730964467005, |
|
"eval_loss": 0.11043216288089752, |
|
"eval_runtime": 577.4688, |
|
"eval_samples_per_second": 1.732, |
|
"eval_steps_per_second": 0.216, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.2893401015228427, |
|
"grad_norm": 0.0696023628115654, |
|
"learning_rate": 9.258712672491415e-06, |
|
"loss": 0.0826, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 3.3299492385786804, |
|
"grad_norm": 0.06600358337163925, |
|
"learning_rate": 8.254563928638893e-06, |
|
"loss": 0.0775, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.3705583756345177, |
|
"grad_norm": 0.06653475761413574, |
|
"learning_rate": 7.3031331341093915e-06, |
|
"loss": 0.0875, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 3.4111675126903553, |
|
"grad_norm": 0.05932833254337311, |
|
"learning_rate": 6.405621797022848e-06, |
|
"loss": 0.0793, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.451776649746193, |
|
"grad_norm": 0.06918617337942123, |
|
"learning_rate": 5.563163333667099e-06, |
|
"loss": 0.0759, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 3.4923857868020303, |
|
"grad_norm": 0.07110767066478729, |
|
"learning_rate": 4.776821637170526e-06, |
|
"loss": 0.0821, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.532994923857868, |
|
"grad_norm": 0.061374176293611526, |
|
"learning_rate": 4.047589733971646e-06, |
|
"loss": 0.0778, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 3.5736040609137056, |
|
"grad_norm": 0.06456020474433899, |
|
"learning_rate": 3.376388529782215e-06, |
|
"loss": 0.0741, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.6142131979695433, |
|
"grad_norm": 0.07185477763414383, |
|
"learning_rate": 2.7640656466274782e-06, |
|
"loss": 0.0753, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 3.6548223350253806, |
|
"grad_norm": 0.06696159392595291, |
|
"learning_rate": 2.2113943524323167e-06, |
|
"loss": 0.0775, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.6548223350253806, |
|
"eval_loss": 0.10999426990747452, |
|
"eval_runtime": 577.8937, |
|
"eval_samples_per_second": 1.73, |
|
"eval_steps_per_second": 0.216, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 492, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8705510622704435e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|