|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.21150219433526624, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9823745064861824e-05, |
|
"loss": 1.064, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.964749012972363e-05, |
|
"loss": 0.9951, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9471235194585454e-05, |
|
"loss": 1.0051, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.929498025944726e-05, |
|
"loss": 1.0086, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9118725324309084e-05, |
|
"loss": 1.1256, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.89424703891709e-05, |
|
"loss": 1.0419, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8766215454032714e-05, |
|
"loss": 1.0368, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8589960518894536e-05, |
|
"loss": 0.9884, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8413705583756344e-05, |
|
"loss": 1.01, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8237450648618166e-05, |
|
"loss": 1.0734, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.806119571347998e-05, |
|
"loss": 1.0388, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7884940778341796e-05, |
|
"loss": 1.0576, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.770868584320361e-05, |
|
"loss": 1.0449, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7532430908065426e-05, |
|
"loss": 1.0809, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.735617597292725e-05, |
|
"loss": 1.0888, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.717992103778906e-05, |
|
"loss": 1.031, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.700366610265088e-05, |
|
"loss": 1.0546, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.682741116751269e-05, |
|
"loss": 1.093, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.665468133107727e-05, |
|
"loss": 1.0729, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.647842639593909e-05, |
|
"loss": 1.0771, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6302171460800905e-05, |
|
"loss": 1.1318, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.612591652566272e-05, |
|
"loss": 1.0114, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.5949661590524535e-05, |
|
"loss": 1.0272, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.577340665538635e-05, |
|
"loss": 1.0503, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.559715172024817e-05, |
|
"loss": 0.9816, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.542794698251551e-05, |
|
"loss": 1.001, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.525169204737733e-05, |
|
"loss": 1.1184, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.507543711223914e-05, |
|
"loss": 1.0958, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.4899182177100965e-05, |
|
"loss": 1.0912, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.472292724196277e-05, |
|
"loss": 1.0226, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4546672306824595e-05, |
|
"loss": 1.0038, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.437041737168641e-05, |
|
"loss": 0.9825, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4194162436548225e-05, |
|
"loss": 1.0476, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.401790750141005e-05, |
|
"loss": 1.0642, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.3841652566271855e-05, |
|
"loss": 1.0254, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.366539763113368e-05, |
|
"loss": 1.0941, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3489142695995485e-05, |
|
"loss": 0.9739, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.331288776085731e-05, |
|
"loss": 1.0468, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.313663282571912e-05, |
|
"loss": 1.0706, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.296037789058094e-05, |
|
"loss": 0.9796, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.278412295544276e-05, |
|
"loss": 1.0202, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.260786802030457e-05, |
|
"loss": 1.0263, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.243161308516639e-05, |
|
"loss": 0.9823, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2255358150028204e-05, |
|
"loss": 1.0187, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.207910321489002e-05, |
|
"loss": 1.0219, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.1902848279751834e-05, |
|
"loss": 1.0641, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.172659334461365e-05, |
|
"loss": 0.9979, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.155033840947547e-05, |
|
"loss": 0.9762, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1374083474337286e-05, |
|
"loss": 0.983, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.11978285391991e-05, |
|
"loss": 1.0849, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1021573604060916e-05, |
|
"loss": 1.0281, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.084531866892273e-05, |
|
"loss": 1.0041, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.0669063733784546e-05, |
|
"loss": 1.0055, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.049280879864636e-05, |
|
"loss": 1.014, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.0316553863508176e-05, |
|
"loss": 1.0347, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.014029892837e-05, |
|
"loss": 0.9675, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.996404399323181e-05, |
|
"loss": 1.0086, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.978778905809363e-05, |
|
"loss": 0.9836, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.961153412295544e-05, |
|
"loss": 1.0441, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.943527918781726e-05, |
|
"loss": 1.0613, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 14184, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 1.0058830184448e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|