|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.019913376810872702, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00019913376810872703, |
|
"eval_loss": 2.2963063716888428, |
|
"eval_runtime": 622.897, |
|
"eval_samples_per_second": 13.578, |
|
"eval_steps_per_second": 1.699, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005974013043261811, |
|
"grad_norm": 23.913619995117188, |
|
"learning_rate": 1.5e-05, |
|
"loss": 8.8941, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0011948026086523623, |
|
"grad_norm": 19.30548858642578, |
|
"learning_rate": 3e-05, |
|
"loss": 8.8968, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0017922039129785433, |
|
"grad_norm": 10.14571762084961, |
|
"learning_rate": 4.5e-05, |
|
"loss": 8.4803, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0017922039129785433, |
|
"eval_loss": 1.8854235410690308, |
|
"eval_runtime": 627.0021, |
|
"eval_samples_per_second": 13.49, |
|
"eval_steps_per_second": 1.687, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0023896052173047245, |
|
"grad_norm": 5.248955249786377, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 7.1413, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0029870065216309055, |
|
"grad_norm": 5.370950222015381, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 6.8965, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0035844078259570865, |
|
"grad_norm": 6.065382480621338, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 7.1409, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0035844078259570865, |
|
"eval_loss": 1.7099746465682983, |
|
"eval_runtime": 626.7094, |
|
"eval_samples_per_second": 13.496, |
|
"eval_steps_per_second": 1.688, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0041818091302832676, |
|
"grad_norm": 4.529496669769287, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 7.241, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.004779210434609449, |
|
"grad_norm": 3.878418207168579, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 6.9688, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.00537661173893563, |
|
"grad_norm": 5.248115539550781, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 6.7016, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.00537661173893563, |
|
"eval_loss": 1.6524488925933838, |
|
"eval_runtime": 627.2362, |
|
"eval_samples_per_second": 13.485, |
|
"eval_steps_per_second": 1.687, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.005974013043261811, |
|
"grad_norm": 5.091797828674316, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 6.703, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0065714143475879925, |
|
"grad_norm": 4.741832256317139, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 5.8822, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.007168815651914173, |
|
"grad_norm": 4.389284133911133, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 6.637, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.007168815651914173, |
|
"eval_loss": 1.626313328742981, |
|
"eval_runtime": 627.4338, |
|
"eval_samples_per_second": 13.48, |
|
"eval_steps_per_second": 1.686, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0077662169562403545, |
|
"grad_norm": 3.989469528198242, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 6.5048, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.008363618260566535, |
|
"grad_norm": 4.279814720153809, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 6.5277, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.008961019564892717, |
|
"grad_norm": 3.723292827606201, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 6.2794, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.008961019564892717, |
|
"eval_loss": 1.6118539571762085, |
|
"eval_runtime": 626.9775, |
|
"eval_samples_per_second": 13.49, |
|
"eval_steps_per_second": 1.687, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.009558420869218898, |
|
"grad_norm": 3.986083507537842, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 6.0171, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.01015582217354508, |
|
"grad_norm": 4.378915309906006, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 6.5238, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.01075322347787126, |
|
"grad_norm": 4.291739463806152, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 6.2643, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.01075322347787126, |
|
"eval_loss": 1.6048556566238403, |
|
"eval_runtime": 627.3604, |
|
"eval_samples_per_second": 13.482, |
|
"eval_steps_per_second": 1.686, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.01135062478219744, |
|
"grad_norm": 4.052128314971924, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 6.4014, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.011948026086523622, |
|
"grad_norm": 4.348297119140625, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 6.6049, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.012545427390849804, |
|
"grad_norm": 4.070796012878418, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 6.6138, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.012545427390849804, |
|
"eval_loss": 1.5996973514556885, |
|
"eval_runtime": 627.0719, |
|
"eval_samples_per_second": 13.488, |
|
"eval_steps_per_second": 1.687, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.013142828695175985, |
|
"grad_norm": 3.7099554538726807, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 6.5339, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.013740229999502166, |
|
"grad_norm": 4.7017998695373535, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 6.5618, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.014337631303828346, |
|
"grad_norm": 3.729522705078125, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 6.0646, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.014337631303828346, |
|
"eval_loss": 1.5968443155288696, |
|
"eval_runtime": 627.1273, |
|
"eval_samples_per_second": 13.487, |
|
"eval_steps_per_second": 1.687, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.014935032608154528, |
|
"grad_norm": 3.717174768447876, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 6.2037, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.015532433912480709, |
|
"grad_norm": 5.319967746734619, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 6.2515, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.01612983521680689, |
|
"grad_norm": 4.202660083770752, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 6.0643, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.01612983521680689, |
|
"eval_loss": 1.5953447818756104, |
|
"eval_runtime": 627.5483, |
|
"eval_samples_per_second": 13.478, |
|
"eval_steps_per_second": 1.686, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.01672723652113307, |
|
"grad_norm": 4.394832134246826, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 6.6288, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.017324637825459253, |
|
"grad_norm": 5.086785316467285, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 6.3303, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.017922039129785433, |
|
"grad_norm": 3.8469746112823486, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 6.6755, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.017922039129785433, |
|
"eval_loss": 1.5948328971862793, |
|
"eval_runtime": 627.4465, |
|
"eval_samples_per_second": 13.48, |
|
"eval_steps_per_second": 1.686, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.018519440434111613, |
|
"grad_norm": 4.4008331298828125, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 6.3269, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.019116841738437796, |
|
"grad_norm": 3.7891712188720703, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 6.312, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.019714243042763976, |
|
"grad_norm": 4.460823059082031, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 6.0766, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.019714243042763976, |
|
"eval_loss": 1.5946401357650757, |
|
"eval_runtime": 626.8725, |
|
"eval_samples_per_second": 13.492, |
|
"eval_steps_per_second": 1.688, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.484255733649244e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|