|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0005, |
|
"eval_steps": 500, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1e-05, |
|
"grad_norm": 8.875, |
|
"learning_rate": 9.999999997532599e-06, |
|
"loss": 1.6459, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2e-05, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 9.999999990130395e-06, |
|
"loss": 1.6742, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 3e-05, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 9.99999997779339e-06, |
|
"loss": 1.6223, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 4e-05, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 9.999999960521582e-06, |
|
"loss": 1.5398, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 5e-05, |
|
"grad_norm": 3.375, |
|
"learning_rate": 9.999999938314972e-06, |
|
"loss": 1.5666, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 6e-05, |
|
"grad_norm": 1.859375, |
|
"learning_rate": 9.999999911173561e-06, |
|
"loss": 1.5981, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 7e-05, |
|
"grad_norm": 1.734375, |
|
"learning_rate": 9.999999879097347e-06, |
|
"loss": 1.644, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 8e-05, |
|
"grad_norm": 1.796875, |
|
"learning_rate": 9.999999842086332e-06, |
|
"loss": 1.6331, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 9e-05, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 9.999999800140514e-06, |
|
"loss": 1.626, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0001, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 9.999999753259893e-06, |
|
"loss": 1.5778, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00011, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 9.99999970144447e-06, |
|
"loss": 1.6286, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00012, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 9.999999644694247e-06, |
|
"loss": 1.5614, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00013, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 9.999999583009221e-06, |
|
"loss": 1.6447, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00014, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 9.999999516389394e-06, |
|
"loss": 1.5258, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00015, |
|
"grad_norm": 1.25, |
|
"learning_rate": 9.999999444834763e-06, |
|
"loss": 1.6336, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00016, |
|
"grad_norm": 1.5546875, |
|
"learning_rate": 9.999999368345333e-06, |
|
"loss": 1.6073, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00017, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 9.999999286921101e-06, |
|
"loss": 1.5919, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.00018, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 9.999999200562065e-06, |
|
"loss": 1.543, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.00019, |
|
"grad_norm": 1.6875, |
|
"learning_rate": 9.99999910926823e-06, |
|
"loss": 1.6101, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0002, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 9.999999013039593e-06, |
|
"loss": 1.5796, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00021, |
|
"grad_norm": 2.578125, |
|
"learning_rate": 9.999998911876154e-06, |
|
"loss": 1.5748, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.00022, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 9.999998805777915e-06, |
|
"loss": 1.5479, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.00023, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 9.999998694744875e-06, |
|
"loss": 1.5318, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.00024, |
|
"grad_norm": 1.125, |
|
"learning_rate": 9.999998578777036e-06, |
|
"loss": 1.6259, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.00025, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 9.999998457874392e-06, |
|
"loss": 1.5525, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00026, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 9.99999833203695e-06, |
|
"loss": 1.5576, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.00027, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 9.999998201264707e-06, |
|
"loss": 1.3934, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.00028, |
|
"grad_norm": 3.15625, |
|
"learning_rate": 9.999998065557664e-06, |
|
"loss": 1.5423, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.00029, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 9.999997924915818e-06, |
|
"loss": 1.5679, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0003, |
|
"grad_norm": 1.625, |
|
"learning_rate": 9.999997779339175e-06, |
|
"loss": 1.5329, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00031, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 9.999997628827732e-06, |
|
"loss": 1.4603, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.00032, |
|
"grad_norm": 1.46875, |
|
"learning_rate": 9.999997473381487e-06, |
|
"loss": 1.5774, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.00033, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 9.999997313000444e-06, |
|
"loss": 1.5522, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.00034, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 9.9999971476846e-06, |
|
"loss": 1.5964, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.00035, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 9.999996977433957e-06, |
|
"loss": 1.6129, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.00036, |
|
"grad_norm": 1.78125, |
|
"learning_rate": 9.999996802248514e-06, |
|
"loss": 1.548, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.00037, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 9.999996622128274e-06, |
|
"loss": 1.5662, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.00038, |
|
"grad_norm": 4.84375, |
|
"learning_rate": 9.999996437073236e-06, |
|
"loss": 1.6197, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.00039, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 9.999996247083397e-06, |
|
"loss": 1.5308, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0004, |
|
"grad_norm": 1.375, |
|
"learning_rate": 9.99999605215876e-06, |
|
"loss": 1.5846, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00041, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 9.999995852299324e-06, |
|
"loss": 1.4274, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.00042, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 9.999995647505092e-06, |
|
"loss": 1.4986, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.00043, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 9.99999543777606e-06, |
|
"loss": 1.5135, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.00044, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 9.999995223112231e-06, |
|
"loss": 1.5472, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.00045, |
|
"grad_norm": 1.5703125, |
|
"learning_rate": 9.999995003513605e-06, |
|
"loss": 1.5635, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.00046, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 9.999994778980182e-06, |
|
"loss": 1.5506, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.00047, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 9.99999454951196e-06, |
|
"loss": 1.5071, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.00048, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 9.999994315108943e-06, |
|
"loss": 1.5532, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.00049, |
|
"grad_norm": 1.828125, |
|
"learning_rate": 9.999994075771128e-06, |
|
"loss": 1.6061, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0005, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 9.999993831498517e-06, |
|
"loss": 1.5629, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 100000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1527564296192e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|