|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8, |
|
"eval_steps": 500, |
|
"global_step": 320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"Batch Mean": -1.4581298828125, |
|
"accuracy": 0.28125, |
|
"epoch": 0, |
|
"step": 0 |
|
}, |
|
{ |
|
"Batch Mean": -1.4786376953125, |
|
"accuracy": 0.46875, |
|
"epoch": 0, |
|
"step": 0 |
|
}, |
|
{ |
|
"Batch Mean": -1.486572265625, |
|
"accuracy": 0.5, |
|
"epoch": 0, |
|
"step": 0 |
|
}, |
|
{ |
|
"Batch Mean": -1.439697265625, |
|
"accuracy": 0.625, |
|
"epoch": 0, |
|
"step": 0 |
|
}, |
|
{ |
|
"epoch": 0.0025, |
|
"grad_norm": 2.7191572189331055, |
|
"learning_rate": 1.5000000000000002e-07, |
|
"loss": 0.6927, |
|
"step": 1 |
|
}, |
|
{ |
|
"Batch Mean": -1.4107666015625, |
|
"accuracy": 0.4375, |
|
"epoch": 0.0025, |
|
"step": 1 |
|
}, |
|
{ |
|
"Batch Mean": -1.4342041015625, |
|
"accuracy": 0.5, |
|
"epoch": 0.0025, |
|
"step": 1 |
|
}, |
|
{ |
|
"Batch Mean": -1.45263671875, |
|
"accuracy": 0.5625, |
|
"epoch": 0.0025, |
|
"step": 1 |
|
}, |
|
{ |
|
"Batch Mean": -1.4517822265625, |
|
"accuracy": 0.5625, |
|
"epoch": 0.0025, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005, |
|
"grad_norm": 3.204066038131714, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 0.6964, |
|
"step": 2 |
|
}, |
|
{ |
|
"Batch Mean": -1.4908447265625, |
|
"accuracy": 0.59375, |
|
"epoch": 0.005, |
|
"step": 2 |
|
}, |
|
{ |
|
"Batch Mean": -1.425048828125, |
|
"accuracy": 0.4375, |
|
"epoch": 0.005, |
|
"step": 2 |
|
}, |
|
{ |
|
"Batch Mean": -1.464111328125, |
|
"accuracy": 0.375, |
|
"epoch": 0.005, |
|
"step": 2 |
|
}, |
|
{ |
|
"Batch Mean": -1.4324951171875, |
|
"accuracy": 0.59375, |
|
"epoch": 0.005, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0075, |
|
"grad_norm": 3.103353261947632, |
|
"learning_rate": 4.5e-07, |
|
"loss": 0.6991, |
|
"step": 3 |
|
}, |
|
{ |
|
"Batch Mean": -1.494140625, |
|
"accuracy": 0.46875, |
|
"epoch": 0.0075, |
|
"step": 3 |
|
}, |
|
{ |
|
"Batch Mean": -1.4178466796875, |
|
"accuracy": 0.625, |
|
"epoch": 0.0075, |
|
"step": 3 |
|
}, |
|
{ |
|
"Batch Mean": -1.520751953125, |
|
"accuracy": 0.59375, |
|
"epoch": 0.0075, |
|
"step": 3 |
|
}, |
|
{ |
|
"Batch Mean": -1.4844970703125, |
|
"accuracy": 0.5625, |
|
"epoch": 0.0075, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.3672587871551514, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.6883, |
|
"step": 4 |
|
}, |
|
{ |
|
"Batch Mean": -1.4312744140625, |
|
"accuracy": 0.4375, |
|
"epoch": 0.01, |
|
"step": 4 |
|
}, |
|
{ |
|
"Batch Mean": -1.4820556640625, |
|
"accuracy": 0.5625, |
|
"epoch": 0.01, |
|
"step": 4 |
|
}, |
|
{ |
|
"Batch Mean": -1.4405517578125, |
|
"accuracy": 0.5, |
|
"epoch": 0.01, |
|
"step": 4 |
|
}, |
|
{ |
|
"Batch Mean": -1.4302978515625, |
|
"accuracy": 0.53125, |
|
"epoch": 0.01, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0125, |
|
"grad_norm": 3.158576011657715, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.7012, |
|
"step": 5 |
|
}, |
|
{ |
|
"Batch Mean": -1.4569091796875, |
|
"accuracy": 0.40625, |
|
"epoch": 0.0125, |
|
"step": 5 |
|
}, |
|
{ |
|
"Batch Mean": -1.46435546875, |
|
"accuracy": 0.5, |
|
"epoch": 0.0125, |
|
"step": 5 |
|
}, |
|
{ |
|
"Batch Mean": -1.4354248046875, |
|
"accuracy": 0.5625, |
|
"epoch": 0.0125, |
|
"step": 5 |
|
}, |
|
{ |
|
"Batch Mean": -1.47412109375, |
|
"accuracy": 0.40625, |
|
"epoch": 0.0125, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.015, |
|
"grad_norm": 4.888192176818848, |
|
"learning_rate": 9e-07, |
|
"loss": 0.7118, |
|
"step": 6 |
|
}, |
|
{ |
|
"Batch Mean": -1.4361572265625, |
|
"accuracy": 0.53125, |
|
"epoch": 0.015, |
|
"step": 6 |
|
}, |
|
{ |
|
"Batch Mean": -1.4234619140625, |
|
"accuracy": 0.625, |
|
"epoch": 0.015, |
|
"step": 6 |
|
}, |
|
{ |
|
"Batch Mean": -1.4453125, |
|
"accuracy": 0.375, |
|
"epoch": 0.015, |
|
"step": 6 |
|
}, |
|
{ |
|
"Batch Mean": -1.44287109375, |
|
"accuracy": 0.5, |
|
"epoch": 0.015, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0175, |
|
"grad_norm": 3.654751777648926, |
|
"learning_rate": 1.05e-06, |
|
"loss": 0.6901, |
|
"step": 7 |
|
}, |
|
{ |
|
"Batch Mean": -1.4200439453125, |
|
"accuracy": 0.53125, |
|
"epoch": 0.0175, |
|
"step": 7 |
|
}, |
|
{ |
|
"Batch Mean": -1.406494140625, |
|
"accuracy": 0.4375, |
|
"epoch": 0.0175, |
|
"step": 7 |
|
}, |
|
{ |
|
"Batch Mean": -1.4012451171875, |
|
"accuracy": 0.53125, |
|
"epoch": 0.0175, |
|
"step": 7 |
|
}, |
|
{ |
|
"Batch Mean": -1.4122314453125, |
|
"accuracy": 0.4375, |
|
"epoch": 0.0175, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.2707793712615967, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.7026, |
|
"step": 8 |
|
}, |
|
{ |
|
"Batch Mean": -1.400634765625, |
|
"accuracy": 0.5625, |
|
"epoch": 0.02, |
|
"step": 8 |
|
}, |
|
{ |
|
"Batch Mean": -1.3936767578125, |
|
"accuracy": 0.46875, |
|
"epoch": 0.02, |
|
"step": 8 |
|
}, |
|
{ |
|
"Batch Mean": -1.4110107421875, |
|
"accuracy": 0.34375, |
|
"epoch": 0.02, |
|
"step": 8 |
|
}, |
|
{ |
|
"Batch Mean": -1.4215087890625, |
|
"accuracy": 0.625, |
|
"epoch": 0.02, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0225, |
|
"grad_norm": 3.053551197052002, |
|
"learning_rate": 1.35e-06, |
|
"loss": 0.6859, |
|
"step": 9 |
|
}, |
|
{ |
|
"Batch Mean": -1.35302734375, |
|
"accuracy": 0.5625, |
|
"epoch": 0.0225, |
|
"step": 9 |
|
}, |
|
{ |
|
"Batch Mean": -1.35003662109375, |
|
"accuracy": 0.40625, |
|
"epoch": 0.0225, |
|
"step": 9 |
|
}, |
|
{ |
|
"Batch Mean": -1.39306640625, |
|
"accuracy": 0.5625, |
|
"epoch": 0.0225, |
|
"step": 9 |
|
}, |
|
{ |
|
"Batch Mean": -1.3843994140625, |
|
"accuracy": 0.53125, |
|
"epoch": 0.0225, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 2.9442760944366455, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.6853, |
|
"step": 10 |
|
}, |
|
{ |
|
"Batch Mean": -1.31396484375, |
|
"accuracy": 0.46875, |
|
"epoch": 0.025, |
|
"step": 10 |
|
}, |
|
{ |
|
"Batch Mean": -1.33154296875, |
|
"accuracy": 0.5625, |
|
"epoch": 0.025, |
|
"step": 10 |
|
}, |
|
{ |
|
"Batch Mean": -1.3260498046875, |
|
"accuracy": 0.46875, |
|
"epoch": 0.025, |
|
"step": 10 |
|
}, |
|
{ |
|
"Batch Mean": -1.3170166015625, |
|
"accuracy": 0.4375, |
|
"epoch": 0.025, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0275, |
|
"grad_norm": 2.729567050933838, |
|
"learning_rate": 1.65e-06, |
|
"loss": 0.6946, |
|
"step": 11 |
|
}, |
|
{ |
|
"Batch Mean": -1.24346923828125, |
|
"accuracy": 0.4375, |
|
"epoch": 0.0275, |
|
"step": 11 |
|
}, |
|
{ |
|
"Batch Mean": -1.239013671875, |
|
"accuracy": 0.59375, |
|
"epoch": 0.0275, |
|
"step": 11 |
|
}, |
|
{ |
|
"Batch Mean": -1.3074951171875, |
|
"accuracy": 0.5, |
|
"epoch": 0.0275, |
|
"step": 11 |
|
}, |
|
{ |
|
"Batch Mean": -1.24664306640625, |
|
"accuracy": 0.53125, |
|
"epoch": 0.0275, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8832643032073975, |
|
"learning_rate": 1.8e-06, |
|
"loss": 0.6869, |
|
"step": 12 |
|
}, |
|
{ |
|
"Batch Mean": -1.2061767578125, |
|
"accuracy": 0.625, |
|
"epoch": 0.03, |
|
"step": 12 |
|
}, |
|
{ |
|
"Batch Mean": -1.09735107421875, |
|
"accuracy": 0.5625, |
|
"epoch": 0.03, |
|
"step": 12 |
|
}, |
|
{ |
|
"Batch Mean": -1.1669921875, |
|
"accuracy": 0.625, |
|
"epoch": 0.03, |
|
"step": 12 |
|
}, |
|
{ |
|
"Batch Mean": -1.107421875, |
|
"accuracy": 0.53125, |
|
"epoch": 0.03, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0325, |
|
"grad_norm": 3.347060441970825, |
|
"learning_rate": 1.95e-06, |
|
"loss": 0.676, |
|
"step": 13 |
|
}, |
|
{ |
|
"Batch Mean": -0.99713134765625, |
|
"accuracy": 0.53125, |
|
"epoch": 0.0325, |
|
"step": 13 |
|
}, |
|
{ |
|
"Batch Mean": -0.992431640625, |
|
"accuracy": 0.6875, |
|
"epoch": 0.0325, |
|
"step": 13 |
|
}, |
|
{ |
|
"Batch Mean": -1.08367919921875, |
|
"accuracy": 0.65625, |
|
"epoch": 0.0325, |
|
"step": 13 |
|
}, |
|
{ |
|
"Batch Mean": -1.073486328125, |
|
"accuracy": 0.5625, |
|
"epoch": 0.0325, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.035, |
|
"grad_norm": 3.0629279613494873, |
|
"learning_rate": 2.1e-06, |
|
"loss": 0.6446, |
|
"step": 14 |
|
}, |
|
{ |
|
"Batch Mean": -1.027008056640625, |
|
"accuracy": 0.5625, |
|
"epoch": 0.035, |
|
"step": 14 |
|
}, |
|
{ |
|
"Batch Mean": -1.04302978515625, |
|
"accuracy": 0.625, |
|
"epoch": 0.035, |
|
"step": 14 |
|
}, |
|
{ |
|
"Batch Mean": -0.986724853515625, |
|
"accuracy": 0.71875, |
|
"epoch": 0.035, |
|
"step": 14 |
|
}, |
|
{ |
|
"Batch Mean": -1.010406494140625, |
|
"accuracy": 0.65625, |
|
"epoch": 0.035, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0375, |
|
"grad_norm": 3.297088146209717, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.6466, |
|
"step": 15 |
|
}, |
|
{ |
|
"Batch Mean": -0.945648193359375, |
|
"accuracy": 0.625, |
|
"epoch": 0.0375, |
|
"step": 15 |
|
}, |
|
{ |
|
"Batch Mean": -0.90460205078125, |
|
"accuracy": 0.625, |
|
"epoch": 0.0375, |
|
"step": 15 |
|
}, |
|
{ |
|
"Batch Mean": -0.9103546142578125, |
|
"accuracy": 0.625, |
|
"epoch": 0.0375, |
|
"step": 15 |
|
}, |
|
{ |
|
"Batch Mean": -0.84765625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.0375, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 3.339815855026245, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.6261, |
|
"step": 16 |
|
}, |
|
{ |
|
"Batch Mean": -0.7341957092285156, |
|
"accuracy": 0.65625, |
|
"epoch": 0.04, |
|
"step": 16 |
|
}, |
|
{ |
|
"Batch Mean": -0.6576881408691406, |
|
"accuracy": 0.8125, |
|
"epoch": 0.04, |
|
"step": 16 |
|
}, |
|
{ |
|
"Batch Mean": -0.7573471069335938, |
|
"accuracy": 0.75, |
|
"epoch": 0.04, |
|
"step": 16 |
|
}, |
|
{ |
|
"Batch Mean": -0.8988265991210938, |
|
"accuracy": 0.84375, |
|
"epoch": 0.04, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0425, |
|
"grad_norm": 4.010303974151611, |
|
"learning_rate": 2.55e-06, |
|
"loss": 0.6324, |
|
"step": 17 |
|
}, |
|
{ |
|
"Batch Mean": -0.45727968215942383, |
|
"accuracy": 0.59375, |
|
"epoch": 0.0425, |
|
"step": 17 |
|
}, |
|
{ |
|
"Batch Mean": -0.40456533432006836, |
|
"accuracy": 0.6875, |
|
"epoch": 0.0425, |
|
"step": 17 |
|
}, |
|
{ |
|
"Batch Mean": -0.4847888946533203, |
|
"accuracy": 0.59375, |
|
"epoch": 0.0425, |
|
"step": 17 |
|
}, |
|
{ |
|
"Batch Mean": -0.31931304931640625, |
|
"accuracy": 0.65625, |
|
"epoch": 0.0425, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.045, |
|
"grad_norm": 4.431520462036133, |
|
"learning_rate": 2.7e-06, |
|
"loss": 0.62, |
|
"step": 18 |
|
}, |
|
{ |
|
"Batch Mean": -0.0693979263305664, |
|
"accuracy": 0.8125, |
|
"epoch": 0.045, |
|
"step": 18 |
|
}, |
|
{ |
|
"Batch Mean": -0.23062896728515625, |
|
"accuracy": 0.625, |
|
"epoch": 0.045, |
|
"step": 18 |
|
}, |
|
{ |
|
"Batch Mean": -0.10647201538085938, |
|
"accuracy": 0.65625, |
|
"epoch": 0.045, |
|
"step": 18 |
|
}, |
|
{ |
|
"Batch Mean": -0.07384902238845825, |
|
"accuracy": 0.6875, |
|
"epoch": 0.045, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0475, |
|
"grad_norm": 5.421309947967529, |
|
"learning_rate": 2.85e-06, |
|
"loss": 0.5896, |
|
"step": 19 |
|
}, |
|
{ |
|
"Batch Mean": -0.12799835205078125, |
|
"accuracy": 0.6875, |
|
"epoch": 0.0475, |
|
"step": 19 |
|
}, |
|
{ |
|
"Batch Mean": 0.0684967041015625, |
|
"accuracy": 0.875, |
|
"epoch": 0.0475, |
|
"step": 19 |
|
}, |
|
{ |
|
"Batch Mean": -0.014011383056640625, |
|
"accuracy": 0.65625, |
|
"epoch": 0.0475, |
|
"step": 19 |
|
}, |
|
{ |
|
"Batch Mean": 0.0633087158203125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.0475, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.102872848510742, |
|
"learning_rate": 3e-06, |
|
"loss": 0.4931, |
|
"step": 20 |
|
}, |
|
{ |
|
"Batch Mean": 0.19290733337402344, |
|
"accuracy": 0.8125, |
|
"epoch": 0.05, |
|
"step": 20 |
|
}, |
|
{ |
|
"Batch Mean": 0.29687976837158203, |
|
"accuracy": 0.53125, |
|
"epoch": 0.05, |
|
"step": 20 |
|
}, |
|
{ |
|
"Batch Mean": 0.103363037109375, |
|
"accuracy": 0.625, |
|
"epoch": 0.05, |
|
"step": 20 |
|
}, |
|
{ |
|
"Batch Mean": 0.3869609832763672, |
|
"accuracy": 0.71875, |
|
"epoch": 0.05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0525, |
|
"grad_norm": 7.569705486297607, |
|
"learning_rate": 2.992105263157895e-06, |
|
"loss": 0.5976, |
|
"step": 21 |
|
}, |
|
{ |
|
"Batch Mean": 0.47769927978515625, |
|
"accuracy": 0.71875, |
|
"epoch": 0.0525, |
|
"step": 21 |
|
}, |
|
{ |
|
"Batch Mean": 0.5898284912109375, |
|
"accuracy": 0.875, |
|
"epoch": 0.0525, |
|
"step": 21 |
|
}, |
|
{ |
|
"Batch Mean": 0.3037242889404297, |
|
"accuracy": 0.75, |
|
"epoch": 0.0525, |
|
"step": 21 |
|
}, |
|
{ |
|
"Batch Mean": -0.037357330322265625, |
|
"accuracy": 0.75, |
|
"epoch": 0.0525, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.055, |
|
"grad_norm": 7.273393630981445, |
|
"learning_rate": 2.9842105263157896e-06, |
|
"loss": 0.4982, |
|
"step": 22 |
|
}, |
|
{ |
|
"Batch Mean": 0.78411865234375, |
|
"accuracy": 0.59375, |
|
"epoch": 0.055, |
|
"step": 22 |
|
}, |
|
{ |
|
"Batch Mean": 0.3693389892578125, |
|
"accuracy": 0.625, |
|
"epoch": 0.055, |
|
"step": 22 |
|
}, |
|
{ |
|
"Batch Mean": 0.3277778625488281, |
|
"accuracy": 0.59375, |
|
"epoch": 0.055, |
|
"step": 22 |
|
}, |
|
{ |
|
"Batch Mean": 0.23564910888671875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.055, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0575, |
|
"grad_norm": 10.978965759277344, |
|
"learning_rate": 2.9763157894736843e-06, |
|
"loss": 0.6967, |
|
"step": 23 |
|
}, |
|
{ |
|
"Batch Mean": 0.3892631530761719, |
|
"accuracy": 0.71875, |
|
"epoch": 0.0575, |
|
"step": 23 |
|
}, |
|
{ |
|
"Batch Mean": 0.4144134521484375, |
|
"accuracy": 0.65625, |
|
"epoch": 0.0575, |
|
"step": 23 |
|
}, |
|
{ |
|
"Batch Mean": 0.201019287109375, |
|
"accuracy": 0.6875, |
|
"epoch": 0.0575, |
|
"step": 23 |
|
}, |
|
{ |
|
"Batch Mean": 0.15361404418945312, |
|
"accuracy": 0.5625, |
|
"epoch": 0.0575, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.72164249420166, |
|
"learning_rate": 2.968421052631579e-06, |
|
"loss": 0.657, |
|
"step": 24 |
|
}, |
|
{ |
|
"Batch Mean": 0.32332611083984375, |
|
"accuracy": 0.65625, |
|
"epoch": 0.06, |
|
"step": 24 |
|
}, |
|
{ |
|
"Batch Mean": -0.45644378662109375, |
|
"accuracy": 0.71875, |
|
"epoch": 0.06, |
|
"step": 24 |
|
}, |
|
{ |
|
"Batch Mean": 0.10271453857421875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.06, |
|
"step": 24 |
|
}, |
|
{ |
|
"Batch Mean": 0.5616731643676758, |
|
"accuracy": 0.78125, |
|
"epoch": 0.06, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 10.953572273254395, |
|
"learning_rate": 2.960526315789474e-06, |
|
"loss": 0.6313, |
|
"step": 25 |
|
}, |
|
{ |
|
"Batch Mean": -0.08791732788085938, |
|
"accuracy": 0.71875, |
|
"epoch": 0.0625, |
|
"step": 25 |
|
}, |
|
{ |
|
"Batch Mean": -0.12505340576171875, |
|
"accuracy": 0.71875, |
|
"epoch": 0.0625, |
|
"step": 25 |
|
}, |
|
{ |
|
"Batch Mean": 0.2984886169433594, |
|
"accuracy": 0.75, |
|
"epoch": 0.0625, |
|
"step": 25 |
|
}, |
|
{ |
|
"Batch Mean": -0.2277584969997406, |
|
"accuracy": 0.65625, |
|
"epoch": 0.0625, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.065, |
|
"grad_norm": 8.867247581481934, |
|
"learning_rate": 2.9526315789473685e-06, |
|
"loss": 0.5531, |
|
"step": 26 |
|
}, |
|
{ |
|
"Batch Mean": -0.310638427734375, |
|
"accuracy": 0.875, |
|
"epoch": 0.065, |
|
"step": 26 |
|
}, |
|
{ |
|
"Batch Mean": 0.05762290954589844, |
|
"accuracy": 0.71875, |
|
"epoch": 0.065, |
|
"step": 26 |
|
}, |
|
{ |
|
"Batch Mean": -0.3841552734375, |
|
"accuracy": 0.75, |
|
"epoch": 0.065, |
|
"step": 26 |
|
}, |
|
{ |
|
"Batch Mean": -0.13448715209960938, |
|
"accuracy": 0.78125, |
|
"epoch": 0.065, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0675, |
|
"grad_norm": 7.167004585266113, |
|
"learning_rate": 2.9447368421052633e-06, |
|
"loss": 0.4927, |
|
"step": 27 |
|
}, |
|
{ |
|
"Batch Mean": -0.5082488059997559, |
|
"accuracy": 0.59375, |
|
"epoch": 0.0675, |
|
"step": 27 |
|
}, |
|
{ |
|
"Batch Mean": -0.5335745811462402, |
|
"accuracy": 0.65625, |
|
"epoch": 0.0675, |
|
"step": 27 |
|
}, |
|
{ |
|
"Batch Mean": -0.3728065490722656, |
|
"accuracy": 0.65625, |
|
"epoch": 0.0675, |
|
"step": 27 |
|
}, |
|
{ |
|
"Batch Mean": -0.48749029636383057, |
|
"accuracy": 0.65625, |
|
"epoch": 0.0675, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 9.99916934967041, |
|
"learning_rate": 2.936842105263158e-06, |
|
"loss": 0.6787, |
|
"step": 28 |
|
}, |
|
{ |
|
"Batch Mean": -0.5768375396728516, |
|
"accuracy": 0.78125, |
|
"epoch": 0.07, |
|
"step": 28 |
|
}, |
|
{ |
|
"Batch Mean": -0.36152684688568115, |
|
"accuracy": 0.71875, |
|
"epoch": 0.07, |
|
"step": 28 |
|
}, |
|
{ |
|
"Batch Mean": -0.6082801818847656, |
|
"accuracy": 0.59375, |
|
"epoch": 0.07, |
|
"step": 28 |
|
}, |
|
{ |
|
"Batch Mean": -0.5176200866699219, |
|
"accuracy": 0.65625, |
|
"epoch": 0.07, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0725, |
|
"grad_norm": 6.558942794799805, |
|
"learning_rate": 2.9289473684210528e-06, |
|
"loss": 0.571, |
|
"step": 29 |
|
}, |
|
{ |
|
"Batch Mean": -0.3009366989135742, |
|
"accuracy": 0.78125, |
|
"epoch": 0.0725, |
|
"step": 29 |
|
}, |
|
{ |
|
"Batch Mean": -0.4234275817871094, |
|
"accuracy": 0.75, |
|
"epoch": 0.0725, |
|
"step": 29 |
|
}, |
|
{ |
|
"Batch Mean": -0.4476432800292969, |
|
"accuracy": 0.78125, |
|
"epoch": 0.0725, |
|
"step": 29 |
|
}, |
|
{ |
|
"Batch Mean": -0.6630382537841797, |
|
"accuracy": 0.71875, |
|
"epoch": 0.0725, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 5.937437534332275, |
|
"learning_rate": 2.9210526315789475e-06, |
|
"loss": 0.5233, |
|
"step": 30 |
|
}, |
|
{ |
|
"Batch Mean": -0.47089385986328125, |
|
"accuracy": 0.65625, |
|
"epoch": 0.075, |
|
"step": 30 |
|
}, |
|
{ |
|
"Batch Mean": -0.5186127424240112, |
|
"accuracy": 0.78125, |
|
"epoch": 0.075, |
|
"step": 30 |
|
}, |
|
{ |
|
"Batch Mean": -0.5250816345214844, |
|
"accuracy": 0.5625, |
|
"epoch": 0.075, |
|
"step": 30 |
|
}, |
|
{ |
|
"Batch Mean": -0.3480682373046875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.075, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0775, |
|
"grad_norm": 5.8368072509765625, |
|
"learning_rate": 2.9131578947368423e-06, |
|
"loss": 0.5172, |
|
"step": 31 |
|
}, |
|
{ |
|
"Batch Mean": -0.29285621643066406, |
|
"accuracy": 0.8125, |
|
"epoch": 0.0775, |
|
"step": 31 |
|
}, |
|
{ |
|
"Batch Mean": -0.3106422424316406, |
|
"accuracy": 0.84375, |
|
"epoch": 0.0775, |
|
"step": 31 |
|
}, |
|
{ |
|
"Batch Mean": 0.005329132080078125, |
|
"accuracy": 0.71875, |
|
"epoch": 0.0775, |
|
"step": 31 |
|
}, |
|
{ |
|
"Batch Mean": -0.1413421630859375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.0775, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.706140995025635, |
|
"learning_rate": 2.905263157894737e-06, |
|
"loss": 0.5095, |
|
"step": 32 |
|
}, |
|
{ |
|
"Batch Mean": -0.04312324523925781, |
|
"accuracy": 0.6875, |
|
"epoch": 0.08, |
|
"step": 32 |
|
}, |
|
{ |
|
"Batch Mean": -0.10883808135986328, |
|
"accuracy": 0.71875, |
|
"epoch": 0.08, |
|
"step": 32 |
|
}, |
|
{ |
|
"Batch Mean": 0.3197288513183594, |
|
"accuracy": 0.71875, |
|
"epoch": 0.08, |
|
"step": 32 |
|
}, |
|
{ |
|
"Batch Mean": -0.13158416748046875, |
|
"accuracy": 0.65625, |
|
"epoch": 0.08, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0825, |
|
"grad_norm": 6.042052268981934, |
|
"learning_rate": 2.8973684210526318e-06, |
|
"loss": 0.5717, |
|
"step": 33 |
|
}, |
|
{ |
|
"Batch Mean": 0.0721282958984375, |
|
"accuracy": 0.75, |
|
"epoch": 0.0825, |
|
"step": 33 |
|
}, |
|
{ |
|
"Batch Mean": 0.05409049987792969, |
|
"accuracy": 0.71875, |
|
"epoch": 0.0825, |
|
"step": 33 |
|
}, |
|
{ |
|
"Batch Mean": -0.04035043716430664, |
|
"accuracy": 0.625, |
|
"epoch": 0.0825, |
|
"step": 33 |
|
}, |
|
{ |
|
"Batch Mean": -0.04631471633911133, |
|
"accuracy": 0.71875, |
|
"epoch": 0.0825, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.085, |
|
"grad_norm": 5.908041954040527, |
|
"learning_rate": 2.8894736842105265e-06, |
|
"loss": 0.5446, |
|
"step": 34 |
|
}, |
|
{ |
|
"Batch Mean": 0.2712249755859375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.085, |
|
"step": 34 |
|
}, |
|
{ |
|
"Batch Mean": 0.179473876953125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.085, |
|
"step": 34 |
|
}, |
|
{ |
|
"Batch Mean": -0.01055145263671875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.085, |
|
"step": 34 |
|
}, |
|
{ |
|
"Batch Mean": 0.06919479370117188, |
|
"accuracy": 0.8125, |
|
"epoch": 0.085, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0875, |
|
"grad_norm": 4.990839958190918, |
|
"learning_rate": 2.8815789473684213e-06, |
|
"loss": 0.4607, |
|
"step": 35 |
|
}, |
|
{ |
|
"Batch Mean": -0.017984390258789062, |
|
"accuracy": 0.78125, |
|
"epoch": 0.0875, |
|
"step": 35 |
|
}, |
|
{ |
|
"Batch Mean": 0.075164794921875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.0875, |
|
"step": 35 |
|
}, |
|
{ |
|
"Batch Mean": 0.20074462890625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.0875, |
|
"step": 35 |
|
}, |
|
{ |
|
"Batch Mean": -0.03507876396179199, |
|
"accuracy": 0.65625, |
|
"epoch": 0.0875, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.7467803955078125, |
|
"learning_rate": 2.873684210526316e-06, |
|
"loss": 0.5038, |
|
"step": 36 |
|
}, |
|
{ |
|
"Batch Mean": 0.2868976593017578, |
|
"accuracy": 0.75, |
|
"epoch": 0.09, |
|
"step": 36 |
|
}, |
|
{ |
|
"Batch Mean": 0.16400146484375, |
|
"accuracy": 0.65625, |
|
"epoch": 0.09, |
|
"step": 36 |
|
}, |
|
{ |
|
"Batch Mean": 0.2293224334716797, |
|
"accuracy": 0.71875, |
|
"epoch": 0.09, |
|
"step": 36 |
|
}, |
|
{ |
|
"Batch Mean": 0.2969036102294922, |
|
"accuracy": 0.6875, |
|
"epoch": 0.09, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0925, |
|
"grad_norm": 6.629448413848877, |
|
"learning_rate": 2.8657894736842103e-06, |
|
"loss": 0.5233, |
|
"step": 37 |
|
}, |
|
{ |
|
"Batch Mean": -0.07112598419189453, |
|
"accuracy": 0.6875, |
|
"epoch": 0.0925, |
|
"step": 37 |
|
}, |
|
{ |
|
"Batch Mean": 0.25348663330078125, |
|
"accuracy": 0.9375, |
|
"epoch": 0.0925, |
|
"step": 37 |
|
}, |
|
{ |
|
"Batch Mean": 0.2884788513183594, |
|
"accuracy": 0.6875, |
|
"epoch": 0.0925, |
|
"step": 37 |
|
}, |
|
{ |
|
"Batch Mean": 0.06340456008911133, |
|
"accuracy": 0.71875, |
|
"epoch": 0.0925, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.095, |
|
"grad_norm": 6.545988082885742, |
|
"learning_rate": 2.857894736842105e-06, |
|
"loss": 0.521, |
|
"step": 38 |
|
}, |
|
{ |
|
"Batch Mean": 0.372711181640625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.095, |
|
"step": 38 |
|
}, |
|
{ |
|
"Batch Mean": 0.2590770721435547, |
|
"accuracy": 0.53125, |
|
"epoch": 0.095, |
|
"step": 38 |
|
}, |
|
{ |
|
"Batch Mean": 0.016815185546875, |
|
"accuracy": 0.6875, |
|
"epoch": 0.095, |
|
"step": 38 |
|
}, |
|
{ |
|
"Batch Mean": 0.0049419403076171875, |
|
"accuracy": 0.71875, |
|
"epoch": 0.095, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0975, |
|
"grad_norm": 9.898524284362793, |
|
"learning_rate": 2.85e-06, |
|
"loss": 0.6255, |
|
"step": 39 |
|
}, |
|
{ |
|
"Batch Mean": 0.6515955924987793, |
|
"accuracy": 0.6875, |
|
"epoch": 0.0975, |
|
"step": 39 |
|
}, |
|
{ |
|
"Batch Mean": 0.4063148498535156, |
|
"accuracy": 0.8125, |
|
"epoch": 0.0975, |
|
"step": 39 |
|
}, |
|
{ |
|
"Batch Mean": 0.1270294189453125, |
|
"accuracy": 0.71875, |
|
"epoch": 0.0975, |
|
"step": 39 |
|
}, |
|
{ |
|
"Batch Mean": 0.4789772033691406, |
|
"accuracy": 0.6875, |
|
"epoch": 0.0975, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 6.953475475311279, |
|
"learning_rate": 2.8421052631578946e-06, |
|
"loss": 0.4934, |
|
"step": 40 |
|
}, |
|
{ |
|
"Batch Mean": 0.25176239013671875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1, |
|
"step": 40 |
|
}, |
|
{ |
|
"Batch Mean": 0.4009513854980469, |
|
"accuracy": 0.65625, |
|
"epoch": 0.1, |
|
"step": 40 |
|
}, |
|
{ |
|
"Batch Mean": 0.6202306747436523, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1, |
|
"step": 40 |
|
}, |
|
{ |
|
"Batch Mean": 0.2911343574523926, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1025, |
|
"grad_norm": 7.0007123947143555, |
|
"learning_rate": 2.8342105263157897e-06, |
|
"loss": 0.4957, |
|
"step": 41 |
|
}, |
|
{ |
|
"Batch Mean": 0.13779544830322266, |
|
"accuracy": 0.625, |
|
"epoch": 0.1025, |
|
"step": 41 |
|
}, |
|
{ |
|
"Batch Mean": 0.5141849517822266, |
|
"accuracy": 0.84375, |
|
"epoch": 0.1025, |
|
"step": 41 |
|
}, |
|
{ |
|
"Batch Mean": 0.12182235717773438, |
|
"accuracy": 0.71875, |
|
"epoch": 0.1025, |
|
"step": 41 |
|
}, |
|
{ |
|
"Batch Mean": 0.09358537197113037, |
|
"accuracy": 0.65625, |
|
"epoch": 0.1025, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.105, |
|
"grad_norm": 8.165699005126953, |
|
"learning_rate": 2.8263157894736845e-06, |
|
"loss": 0.5642, |
|
"step": 42 |
|
}, |
|
{ |
|
"Batch Mean": 0.26740550994873047, |
|
"accuracy": 0.8125, |
|
"epoch": 0.105, |
|
"step": 42 |
|
}, |
|
{ |
|
"Batch Mean": -0.07419204711914062, |
|
"accuracy": 0.71875, |
|
"epoch": 0.105, |
|
"step": 42 |
|
}, |
|
{ |
|
"Batch Mean": 0.2999420166015625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.105, |
|
"step": 42 |
|
}, |
|
{ |
|
"Batch Mean": -0.2398681640625, |
|
"accuracy": 0.75, |
|
"epoch": 0.105, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1075, |
|
"grad_norm": 7.090755939483643, |
|
"learning_rate": 2.8184210526315792e-06, |
|
"loss": 0.5136, |
|
"step": 43 |
|
}, |
|
{ |
|
"Batch Mean": 0.3058357238769531, |
|
"accuracy": 0.875, |
|
"epoch": 0.1075, |
|
"step": 43 |
|
}, |
|
{ |
|
"Batch Mean": 0.10181450843811035, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1075, |
|
"step": 43 |
|
}, |
|
{ |
|
"Batch Mean": -0.07529067993164062, |
|
"accuracy": 0.71875, |
|
"epoch": 0.1075, |
|
"step": 43 |
|
}, |
|
{ |
|
"Batch Mean": 0.46073150634765625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1075, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.939328670501709, |
|
"learning_rate": 2.810526315789474e-06, |
|
"loss": 0.464, |
|
"step": 44 |
|
}, |
|
{ |
|
"Batch Mean": -0.13095474243164062, |
|
"accuracy": 0.6875, |
|
"epoch": 0.11, |
|
"step": 44 |
|
}, |
|
{ |
|
"Batch Mean": 0.32462239265441895, |
|
"accuracy": 0.875, |
|
"epoch": 0.11, |
|
"step": 44 |
|
}, |
|
{ |
|
"Batch Mean": -0.15337753295898438, |
|
"accuracy": 0.78125, |
|
"epoch": 0.11, |
|
"step": 44 |
|
}, |
|
{ |
|
"Batch Mean": 0.38422298431396484, |
|
"accuracy": 0.71875, |
|
"epoch": 0.11, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1125, |
|
"grad_norm": 6.517725944519043, |
|
"learning_rate": 2.8026315789473687e-06, |
|
"loss": 0.4854, |
|
"step": 45 |
|
}, |
|
{ |
|
"Batch Mean": 0.290924072265625, |
|
"accuracy": 0.625, |
|
"epoch": 0.1125, |
|
"step": 45 |
|
}, |
|
{ |
|
"Batch Mean": 0.03897809982299805, |
|
"accuracy": 0.875, |
|
"epoch": 0.1125, |
|
"step": 45 |
|
}, |
|
{ |
|
"Batch Mean": 0.20547938346862793, |
|
"accuracy": 0.875, |
|
"epoch": 0.1125, |
|
"step": 45 |
|
}, |
|
{ |
|
"Batch Mean": 0.3288555145263672, |
|
"accuracy": 0.75, |
|
"epoch": 0.1125, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.115, |
|
"grad_norm": 5.711620330810547, |
|
"learning_rate": 2.7947368421052635e-06, |
|
"loss": 0.4129, |
|
"step": 46 |
|
}, |
|
{ |
|
"Batch Mean": 0.331978440284729, |
|
"accuracy": 0.78125, |
|
"epoch": 0.115, |
|
"step": 46 |
|
}, |
|
{ |
|
"Batch Mean": -0.12884771823883057, |
|
"accuracy": 0.71875, |
|
"epoch": 0.115, |
|
"step": 46 |
|
}, |
|
{ |
|
"Batch Mean": 0.2715787887573242, |
|
"accuracy": 0.65625, |
|
"epoch": 0.115, |
|
"step": 46 |
|
}, |
|
{ |
|
"Batch Mean": 0.3961639404296875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.115, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1175, |
|
"grad_norm": 6.815968036651611, |
|
"learning_rate": 2.7868421052631578e-06, |
|
"loss": 0.5217, |
|
"step": 47 |
|
}, |
|
{ |
|
"Batch Mean": -0.05124783515930176, |
|
"accuracy": 0.84375, |
|
"epoch": 0.1175, |
|
"step": 47 |
|
}, |
|
{ |
|
"Batch Mean": -0.4043617248535156, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1175, |
|
"step": 47 |
|
}, |
|
{ |
|
"Batch Mean": 0.21244239807128906, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1175, |
|
"step": 47 |
|
}, |
|
{ |
|
"Batch Mean": -0.09090805053710938, |
|
"accuracy": 0.75, |
|
"epoch": 0.1175, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 6.305139541625977, |
|
"learning_rate": 2.7789473684210525e-06, |
|
"loss": 0.4484, |
|
"step": 48 |
|
}, |
|
{ |
|
"Batch Mean": 0.3022747039794922, |
|
"accuracy": 0.65625, |
|
"epoch": 0.12, |
|
"step": 48 |
|
}, |
|
{ |
|
"Batch Mean": -0.013670921325683594, |
|
"accuracy": 0.6875, |
|
"epoch": 0.12, |
|
"step": 48 |
|
}, |
|
{ |
|
"Batch Mean": 0.4046478271484375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.12, |
|
"step": 48 |
|
}, |
|
{ |
|
"Batch Mean": 0.16419363021850586, |
|
"accuracy": 0.84375, |
|
"epoch": 0.12, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1225, |
|
"grad_norm": 5.598595142364502, |
|
"learning_rate": 2.7710526315789473e-06, |
|
"loss": 0.4684, |
|
"step": 49 |
|
}, |
|
{ |
|
"Batch Mean": -0.24893569946289062, |
|
"accuracy": 0.6875, |
|
"epoch": 0.1225, |
|
"step": 49 |
|
}, |
|
{ |
|
"Batch Mean": -0.2393360137939453, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1225, |
|
"step": 49 |
|
}, |
|
{ |
|
"Batch Mean": 0.2698392868041992, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1225, |
|
"step": 49 |
|
}, |
|
{ |
|
"Batch Mean": -0.3564453125, |
|
"accuracy": 0.75, |
|
"epoch": 0.1225, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 6.394057750701904, |
|
"learning_rate": 2.763157894736842e-06, |
|
"loss": 0.4703, |
|
"step": 50 |
|
}, |
|
{ |
|
"Batch Mean": -0.09824085235595703, |
|
"accuracy": 0.71875, |
|
"epoch": 0.125, |
|
"step": 50 |
|
}, |
|
{ |
|
"Batch Mean": -0.1602630615234375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.125, |
|
"step": 50 |
|
}, |
|
{ |
|
"Batch Mean": -0.6205692291259766, |
|
"accuracy": 0.625, |
|
"epoch": 0.125, |
|
"step": 50 |
|
}, |
|
{ |
|
"Batch Mean": 0.06192302703857422, |
|
"accuracy": 0.75, |
|
"epoch": 0.125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1275, |
|
"grad_norm": 7.542079925537109, |
|
"learning_rate": 2.7552631578947368e-06, |
|
"loss": 0.4731, |
|
"step": 51 |
|
}, |
|
{ |
|
"Batch Mean": -0.24329090118408203, |
|
"accuracy": 0.71875, |
|
"epoch": 0.1275, |
|
"step": 51 |
|
}, |
|
{ |
|
"Batch Mean": 0.277587890625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1275, |
|
"step": 51 |
|
}, |
|
{ |
|
"Batch Mean": -0.1536083221435547, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1275, |
|
"step": 51 |
|
}, |
|
{ |
|
"Batch Mean": -0.2829427719116211, |
|
"accuracy": 0.90625, |
|
"epoch": 0.1275, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.608920097351074, |
|
"learning_rate": 2.7473684210526315e-06, |
|
"loss": 0.4472, |
|
"step": 52 |
|
}, |
|
{ |
|
"Batch Mean": -0.2534487247467041, |
|
"accuracy": 0.78125, |
|
"epoch": 0.13, |
|
"step": 52 |
|
}, |
|
{ |
|
"Batch Mean": -0.3897590637207031, |
|
"accuracy": 0.6875, |
|
"epoch": 0.13, |
|
"step": 52 |
|
}, |
|
{ |
|
"Batch Mean": 0.0982666015625, |
|
"accuracy": 0.75, |
|
"epoch": 0.13, |
|
"step": 52 |
|
}, |
|
{ |
|
"Batch Mean": -0.19083404541015625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.13, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1325, |
|
"grad_norm": 9.115386962890625, |
|
"learning_rate": 2.7394736842105263e-06, |
|
"loss": 0.4964, |
|
"step": 53 |
|
}, |
|
{ |
|
"Batch Mean": -0.07914352416992188, |
|
"accuracy": 0.84375, |
|
"epoch": 0.1325, |
|
"step": 53 |
|
}, |
|
{ |
|
"Batch Mean": -0.8162860870361328, |
|
"accuracy": 0.75, |
|
"epoch": 0.1325, |
|
"step": 53 |
|
}, |
|
{ |
|
"Batch Mean": -0.9538593292236328, |
|
"accuracy": 0.71875, |
|
"epoch": 0.1325, |
|
"step": 53 |
|
}, |
|
{ |
|
"Batch Mean": 0.025072097778320312, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1325, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.135, |
|
"grad_norm": 9.654952049255371, |
|
"learning_rate": 2.7315789473684214e-06, |
|
"loss": 0.4771, |
|
"step": 54 |
|
}, |
|
{ |
|
"Batch Mean": -0.2607238292694092, |
|
"accuracy": 0.65625, |
|
"epoch": 0.135, |
|
"step": 54 |
|
}, |
|
{ |
|
"Batch Mean": 0.07077789306640625, |
|
"accuracy": 0.875, |
|
"epoch": 0.135, |
|
"step": 54 |
|
}, |
|
{ |
|
"Batch Mean": -0.2121124267578125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.135, |
|
"step": 54 |
|
}, |
|
{ |
|
"Batch Mean": 0.040355682373046875, |
|
"accuracy": 0.6875, |
|
"epoch": 0.135, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1375, |
|
"grad_norm": 9.226275444030762, |
|
"learning_rate": 2.723684210526316e-06, |
|
"loss": 0.477, |
|
"step": 55 |
|
}, |
|
{ |
|
"Batch Mean": 0.14949023723602295, |
|
"accuracy": 0.75, |
|
"epoch": 0.1375, |
|
"step": 55 |
|
}, |
|
{ |
|
"Batch Mean": -0.2880672216415405, |
|
"accuracy": 0.875, |
|
"epoch": 0.1375, |
|
"step": 55 |
|
}, |
|
{ |
|
"Batch Mean": -0.037652015686035156, |
|
"accuracy": 0.75, |
|
"epoch": 0.1375, |
|
"step": 55 |
|
}, |
|
{ |
|
"Batch Mean": 0.11230850219726562, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1375, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 8.696858406066895, |
|
"learning_rate": 2.715789473684211e-06, |
|
"loss": 0.4385, |
|
"step": 56 |
|
}, |
|
{ |
|
"Batch Mean": -0.14437103271484375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.14, |
|
"step": 56 |
|
}, |
|
{ |
|
"Batch Mean": -0.3502960205078125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.14, |
|
"step": 56 |
|
}, |
|
{ |
|
"Batch Mean": -0.3359222412109375, |
|
"accuracy": 0.65625, |
|
"epoch": 0.14, |
|
"step": 56 |
|
}, |
|
{ |
|
"Batch Mean": -0.4460906982421875, |
|
"accuracy": 0.71875, |
|
"epoch": 0.14, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1425, |
|
"grad_norm": 10.204813003540039, |
|
"learning_rate": 2.7078947368421052e-06, |
|
"loss": 0.4971, |
|
"step": 57 |
|
}, |
|
{ |
|
"Batch Mean": -0.0918121337890625, |
|
"accuracy": 0.75, |
|
"epoch": 0.1425, |
|
"step": 57 |
|
}, |
|
{ |
|
"Batch Mean": 0.1797332763671875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1425, |
|
"step": 57 |
|
}, |
|
{ |
|
"Batch Mean": -0.22362709045410156, |
|
"accuracy": 0.65625, |
|
"epoch": 0.1425, |
|
"step": 57 |
|
}, |
|
{ |
|
"Batch Mean": -0.932403564453125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1425, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.145, |
|
"grad_norm": 9.547924995422363, |
|
"learning_rate": 2.7e-06, |
|
"loss": 0.5235, |
|
"step": 58 |
|
}, |
|
{ |
|
"Batch Mean": -0.69256591796875, |
|
"accuracy": 0.75, |
|
"epoch": 0.145, |
|
"step": 58 |
|
}, |
|
{ |
|
"Batch Mean": -0.408052921295166, |
|
"accuracy": 0.71875, |
|
"epoch": 0.145, |
|
"step": 58 |
|
}, |
|
{ |
|
"Batch Mean": -0.7247238159179688, |
|
"accuracy": 0.75, |
|
"epoch": 0.145, |
|
"step": 58 |
|
}, |
|
{ |
|
"Batch Mean": -0.5294733047485352, |
|
"accuracy": 0.8125, |
|
"epoch": 0.145, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1475, |
|
"grad_norm": 8.18185043334961, |
|
"learning_rate": 2.6921052631578947e-06, |
|
"loss": 0.4697, |
|
"step": 59 |
|
}, |
|
{ |
|
"Batch Mean": -0.562103271484375, |
|
"accuracy": 0.75, |
|
"epoch": 0.1475, |
|
"step": 59 |
|
}, |
|
{ |
|
"Batch Mean": -0.36240386962890625, |
|
"accuracy": 0.71875, |
|
"epoch": 0.1475, |
|
"step": 59 |
|
}, |
|
{ |
|
"Batch Mean": -0.8479537963867188, |
|
"accuracy": 0.75, |
|
"epoch": 0.1475, |
|
"step": 59 |
|
}, |
|
{ |
|
"Batch Mean": -0.5514106750488281, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1475, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 9.638142585754395, |
|
"learning_rate": 2.6842105263157895e-06, |
|
"loss": 0.4854, |
|
"step": 60 |
|
}, |
|
{ |
|
"Batch Mean": -0.9713249206542969, |
|
"accuracy": 0.875, |
|
"epoch": 0.15, |
|
"step": 60 |
|
}, |
|
{ |
|
"Batch Mean": -1.4701347351074219, |
|
"accuracy": 0.8125, |
|
"epoch": 0.15, |
|
"step": 60 |
|
}, |
|
{ |
|
"Batch Mean": -0.8054180145263672, |
|
"accuracy": 0.6875, |
|
"epoch": 0.15, |
|
"step": 60 |
|
}, |
|
{ |
|
"Batch Mean": -1.1165752410888672, |
|
"accuracy": 0.875, |
|
"epoch": 0.15, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1525, |
|
"grad_norm": 9.138744354248047, |
|
"learning_rate": 2.6763157894736842e-06, |
|
"loss": 0.4093, |
|
"step": 61 |
|
}, |
|
{ |
|
"Batch Mean": -1.2550277709960938, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1525, |
|
"step": 61 |
|
}, |
|
{ |
|
"Batch Mean": -0.9237594604492188, |
|
"accuracy": 0.6875, |
|
"epoch": 0.1525, |
|
"step": 61 |
|
}, |
|
{ |
|
"Batch Mean": -0.9178142547607422, |
|
"accuracy": 0.875, |
|
"epoch": 0.1525, |
|
"step": 61 |
|
}, |
|
{ |
|
"Batch Mean": -0.8621349334716797, |
|
"accuracy": 0.75, |
|
"epoch": 0.1525, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.155, |
|
"grad_norm": 9.812451362609863, |
|
"learning_rate": 2.668421052631579e-06, |
|
"loss": 0.4354, |
|
"step": 62 |
|
}, |
|
{ |
|
"Batch Mean": -1.3034553527832031, |
|
"accuracy": 0.78125, |
|
"epoch": 0.155, |
|
"step": 62 |
|
}, |
|
{ |
|
"Batch Mean": -1.0795440673828125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.155, |
|
"step": 62 |
|
}, |
|
{ |
|
"Batch Mean": -1.0960693359375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.155, |
|
"step": 62 |
|
}, |
|
{ |
|
"Batch Mean": -1.2091312408447266, |
|
"accuracy": 0.6875, |
|
"epoch": 0.155, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1575, |
|
"grad_norm": 9.518035888671875, |
|
"learning_rate": 2.6605263157894737e-06, |
|
"loss": 0.4399, |
|
"step": 63 |
|
}, |
|
{ |
|
"Batch Mean": -1.405853271484375, |
|
"accuracy": 0.6875, |
|
"epoch": 0.1575, |
|
"step": 63 |
|
}, |
|
{ |
|
"Batch Mean": -1.4421844482421875, |
|
"accuracy": 0.71875, |
|
"epoch": 0.1575, |
|
"step": 63 |
|
}, |
|
{ |
|
"Batch Mean": -1.2391834259033203, |
|
"accuracy": 0.6875, |
|
"epoch": 0.1575, |
|
"step": 63 |
|
}, |
|
{ |
|
"Batch Mean": -0.881195068359375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1575, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 10.348162651062012, |
|
"learning_rate": 2.6526315789473685e-06, |
|
"loss": 0.537, |
|
"step": 64 |
|
}, |
|
{ |
|
"Batch Mean": -1.0128021240234375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.16, |
|
"step": 64 |
|
}, |
|
{ |
|
"Batch Mean": -1.0150184631347656, |
|
"accuracy": 0.84375, |
|
"epoch": 0.16, |
|
"step": 64 |
|
}, |
|
{ |
|
"Batch Mean": -1.497243881225586, |
|
"accuracy": 0.78125, |
|
"epoch": 0.16, |
|
"step": 64 |
|
}, |
|
{ |
|
"Batch Mean": -0.910819947719574, |
|
"accuracy": 0.75, |
|
"epoch": 0.16, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1625, |
|
"grad_norm": 8.633638381958008, |
|
"learning_rate": 2.644736842105263e-06, |
|
"loss": 0.4436, |
|
"step": 65 |
|
}, |
|
{ |
|
"Batch Mean": -1.0223121643066406, |
|
"accuracy": 0.71875, |
|
"epoch": 0.1625, |
|
"step": 65 |
|
}, |
|
{ |
|
"Batch Mean": -0.5706081390380859, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1625, |
|
"step": 65 |
|
}, |
|
{ |
|
"Batch Mean": -0.965911865234375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1625, |
|
"step": 65 |
|
}, |
|
{ |
|
"Batch Mean": -0.7304267883300781, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1625, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.165, |
|
"grad_norm": 8.088103294372559, |
|
"learning_rate": 2.636842105263158e-06, |
|
"loss": 0.4446, |
|
"step": 66 |
|
}, |
|
{ |
|
"Batch Mean": -0.4677067697048187, |
|
"accuracy": 0.8125, |
|
"epoch": 0.165, |
|
"step": 66 |
|
}, |
|
{ |
|
"Batch Mean": -1.4533824920654297, |
|
"accuracy": 0.78125, |
|
"epoch": 0.165, |
|
"step": 66 |
|
}, |
|
{ |
|
"Batch Mean": -0.78509521484375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.165, |
|
"step": 66 |
|
}, |
|
{ |
|
"Batch Mean": -0.8427619934082031, |
|
"accuracy": 0.78125, |
|
"epoch": 0.165, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1675, |
|
"grad_norm": 7.766864776611328, |
|
"learning_rate": 2.6289473684210527e-06, |
|
"loss": 0.412, |
|
"step": 67 |
|
}, |
|
{ |
|
"Batch Mean": -1.0267219543457031, |
|
"accuracy": 0.75, |
|
"epoch": 0.1675, |
|
"step": 67 |
|
}, |
|
{ |
|
"Batch Mean": -0.0344390869140625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1675, |
|
"step": 67 |
|
}, |
|
{ |
|
"Batch Mean": -0.7120513916015625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1675, |
|
"step": 67 |
|
}, |
|
{ |
|
"Batch Mean": -0.8848686218261719, |
|
"accuracy": 0.75, |
|
"epoch": 0.1675, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 8.952485084533691, |
|
"learning_rate": 2.6210526315789474e-06, |
|
"loss": 0.4073, |
|
"step": 68 |
|
}, |
|
{ |
|
"Batch Mean": -1.2683296203613281, |
|
"accuracy": 0.78125, |
|
"epoch": 0.17, |
|
"step": 68 |
|
}, |
|
{ |
|
"Batch Mean": -0.9470596313476562, |
|
"accuracy": 0.6875, |
|
"epoch": 0.17, |
|
"step": 68 |
|
}, |
|
{ |
|
"Batch Mean": -1.2335700988769531, |
|
"accuracy": 0.78125, |
|
"epoch": 0.17, |
|
"step": 68 |
|
}, |
|
{ |
|
"Batch Mean": -0.9984736442565918, |
|
"accuracy": 0.71875, |
|
"epoch": 0.17, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1725, |
|
"grad_norm": 8.944815635681152, |
|
"learning_rate": 2.613157894736842e-06, |
|
"loss": 0.4827, |
|
"step": 69 |
|
}, |
|
{ |
|
"Batch Mean": -0.6530609130859375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1725, |
|
"step": 69 |
|
}, |
|
{ |
|
"Batch Mean": -0.6013336181640625, |
|
"accuracy": 0.6875, |
|
"epoch": 0.1725, |
|
"step": 69 |
|
}, |
|
{ |
|
"Batch Mean": -1.4489421844482422, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1725, |
|
"step": 69 |
|
}, |
|
{ |
|
"Batch Mean": -0.9736480712890625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.1725, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 8.779143333435059, |
|
"learning_rate": 2.605263157894737e-06, |
|
"loss": 0.4578, |
|
"step": 70 |
|
}, |
|
{ |
|
"Batch Mean": -0.9107780456542969, |
|
"accuracy": 0.84375, |
|
"epoch": 0.175, |
|
"step": 70 |
|
}, |
|
{ |
|
"Batch Mean": -1.1361122131347656, |
|
"accuracy": 0.6875, |
|
"epoch": 0.175, |
|
"step": 70 |
|
}, |
|
{ |
|
"Batch Mean": -0.6527862548828125, |
|
"accuracy": 0.9375, |
|
"epoch": 0.175, |
|
"step": 70 |
|
}, |
|
{ |
|
"Batch Mean": -0.7553470134735107, |
|
"accuracy": 0.6875, |
|
"epoch": 0.175, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1775, |
|
"grad_norm": 8.647814750671387, |
|
"learning_rate": 2.5973684210526317e-06, |
|
"loss": 0.4257, |
|
"step": 71 |
|
}, |
|
{ |
|
"Batch Mean": -0.41971588134765625, |
|
"accuracy": 0.75, |
|
"epoch": 0.1775, |
|
"step": 71 |
|
}, |
|
{ |
|
"Batch Mean": -0.705718994140625, |
|
"accuracy": 0.875, |
|
"epoch": 0.1775, |
|
"step": 71 |
|
}, |
|
{ |
|
"Batch Mean": -1.0686330795288086, |
|
"accuracy": 0.71875, |
|
"epoch": 0.1775, |
|
"step": 71 |
|
}, |
|
{ |
|
"Batch Mean": -0.8464865684509277, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1775, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 8.784235000610352, |
|
"learning_rate": 2.5894736842105264e-06, |
|
"loss": 0.3921, |
|
"step": 72 |
|
}, |
|
{ |
|
"Batch Mean": -0.7266769409179688, |
|
"accuracy": 0.8125, |
|
"epoch": 0.18, |
|
"step": 72 |
|
}, |
|
{ |
|
"Batch Mean": -0.7239456176757812, |
|
"accuracy": 0.8125, |
|
"epoch": 0.18, |
|
"step": 72 |
|
}, |
|
{ |
|
"Batch Mean": -0.6862373352050781, |
|
"accuracy": 0.8125, |
|
"epoch": 0.18, |
|
"step": 72 |
|
}, |
|
{ |
|
"Batch Mean": -0.525360107421875, |
|
"accuracy": 0.875, |
|
"epoch": 0.18, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1825, |
|
"grad_norm": 7.80237340927124, |
|
"learning_rate": 2.581578947368421e-06, |
|
"loss": 0.374, |
|
"step": 73 |
|
}, |
|
{ |
|
"Batch Mean": -0.9130859375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.1825, |
|
"step": 73 |
|
}, |
|
{ |
|
"Batch Mean": -0.35595703125, |
|
"accuracy": 0.875, |
|
"epoch": 0.1825, |
|
"step": 73 |
|
}, |
|
{ |
|
"Batch Mean": -0.8892440795898438, |
|
"accuracy": 0.6875, |
|
"epoch": 0.1825, |
|
"step": 73 |
|
}, |
|
{ |
|
"Batch Mean": -0.4263725280761719, |
|
"accuracy": 0.71875, |
|
"epoch": 0.1825, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.185, |
|
"grad_norm": 7.894434452056885, |
|
"learning_rate": 2.573684210526316e-06, |
|
"loss": 0.4405, |
|
"step": 74 |
|
}, |
|
{ |
|
"Batch Mean": -0.6322441101074219, |
|
"accuracy": 0.8125, |
|
"epoch": 0.185, |
|
"step": 74 |
|
}, |
|
{ |
|
"Batch Mean": -0.456390380859375, |
|
"accuracy": 0.875, |
|
"epoch": 0.185, |
|
"step": 74 |
|
}, |
|
{ |
|
"Batch Mean": -1.1346385478973389, |
|
"accuracy": 0.84375, |
|
"epoch": 0.185, |
|
"step": 74 |
|
}, |
|
{ |
|
"Batch Mean": -0.8706645965576172, |
|
"accuracy": 0.8125, |
|
"epoch": 0.185, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 8.292348861694336, |
|
"learning_rate": 2.5657894736842107e-06, |
|
"loss": 0.3895, |
|
"step": 75 |
|
}, |
|
{ |
|
"Batch Mean": -1.0383148193359375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1875, |
|
"step": 75 |
|
}, |
|
{ |
|
"Batch Mean": 0.19762420654296875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1875, |
|
"step": 75 |
|
}, |
|
{ |
|
"Batch Mean": 0.10494613647460938, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1875, |
|
"step": 75 |
|
}, |
|
{ |
|
"Batch Mean": -0.5181140899658203, |
|
"accuracy": 0.84375, |
|
"epoch": 0.1875, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 8.882412910461426, |
|
"learning_rate": 2.5578947368421054e-06, |
|
"loss": 0.3805, |
|
"step": 76 |
|
}, |
|
{ |
|
"Batch Mean": -0.4382622241973877, |
|
"accuracy": 0.75, |
|
"epoch": 0.19, |
|
"step": 76 |
|
}, |
|
{ |
|
"Batch Mean": -0.16241741180419922, |
|
"accuracy": 0.90625, |
|
"epoch": 0.19, |
|
"step": 76 |
|
}, |
|
{ |
|
"Batch Mean": -0.8776988983154297, |
|
"accuracy": 0.875, |
|
"epoch": 0.19, |
|
"step": 76 |
|
}, |
|
{ |
|
"Batch Mean": -0.6540908813476562, |
|
"accuracy": 0.8125, |
|
"epoch": 0.19, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1925, |
|
"grad_norm": 10.025094032287598, |
|
"learning_rate": 2.55e-06, |
|
"loss": 0.4001, |
|
"step": 77 |
|
}, |
|
{ |
|
"Batch Mean": 0.06690788269042969, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1925, |
|
"step": 77 |
|
}, |
|
{ |
|
"Batch Mean": -0.03551149368286133, |
|
"accuracy": 0.75, |
|
"epoch": 0.1925, |
|
"step": 77 |
|
}, |
|
{ |
|
"Batch Mean": 0.17040252685546875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1925, |
|
"step": 77 |
|
}, |
|
{ |
|
"Batch Mean": -0.019598007202148438, |
|
"accuracy": 0.90625, |
|
"epoch": 0.1925, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.195, |
|
"grad_norm": 9.164822578430176, |
|
"learning_rate": 2.542105263157895e-06, |
|
"loss": 0.3807, |
|
"step": 78 |
|
}, |
|
{ |
|
"Batch Mean": 0.0378570556640625, |
|
"accuracy": 0.75, |
|
"epoch": 0.195, |
|
"step": 78 |
|
}, |
|
{ |
|
"Batch Mean": 0.3024101257324219, |
|
"accuracy": 0.75, |
|
"epoch": 0.195, |
|
"step": 78 |
|
}, |
|
{ |
|
"Batch Mean": 0.1015625, |
|
"accuracy": 0.6875, |
|
"epoch": 0.195, |
|
"step": 78 |
|
}, |
|
{ |
|
"Batch Mean": 0.10402488708496094, |
|
"accuracy": 0.875, |
|
"epoch": 0.195, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1975, |
|
"grad_norm": 9.871844291687012, |
|
"learning_rate": 2.5342105263157892e-06, |
|
"loss": 0.4781, |
|
"step": 79 |
|
}, |
|
{ |
|
"Batch Mean": 0.1356794238090515, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1975, |
|
"step": 79 |
|
}, |
|
{ |
|
"Batch Mean": 0.0782623291015625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1975, |
|
"step": 79 |
|
}, |
|
{ |
|
"Batch Mean": -0.12647247314453125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.1975, |
|
"step": 79 |
|
}, |
|
{ |
|
"Batch Mean": 0.2567100524902344, |
|
"accuracy": 0.78125, |
|
"epoch": 0.1975, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 9.033759117126465, |
|
"learning_rate": 2.526315789473684e-06, |
|
"loss": 0.4288, |
|
"step": 80 |
|
}, |
|
{ |
|
"Batch Mean": -1.4123163223266602, |
|
"accuracy": 0.90625, |
|
"epoch": 0.2, |
|
"step": 80 |
|
}, |
|
{ |
|
"Batch Mean": 0.2380084991455078, |
|
"accuracy": 0.6875, |
|
"epoch": 0.2, |
|
"step": 80 |
|
}, |
|
{ |
|
"Batch Mean": 0.15204644203186035, |
|
"accuracy": 0.75, |
|
"epoch": 0.2, |
|
"step": 80 |
|
}, |
|
{ |
|
"Batch Mean": -0.0515289306640625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2025, |
|
"grad_norm": 11.619911193847656, |
|
"learning_rate": 2.5184210526315787e-06, |
|
"loss": 0.477, |
|
"step": 81 |
|
}, |
|
{ |
|
"Batch Mean": -0.6675605773925781, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2025, |
|
"step": 81 |
|
}, |
|
{ |
|
"Batch Mean": -0.5373616218566895, |
|
"accuracy": 0.75, |
|
"epoch": 0.2025, |
|
"step": 81 |
|
}, |
|
{ |
|
"Batch Mean": 0.20585250854492188, |
|
"accuracy": 0.75, |
|
"epoch": 0.2025, |
|
"step": 81 |
|
}, |
|
{ |
|
"Batch Mean": 0.050811767578125, |
|
"accuracy": 0.71875, |
|
"epoch": 0.2025, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.205, |
|
"grad_norm": 10.653642654418945, |
|
"learning_rate": 2.510526315789474e-06, |
|
"loss": 0.4253, |
|
"step": 82 |
|
}, |
|
{ |
|
"Batch Mean": -0.4852294921875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.205, |
|
"step": 82 |
|
}, |
|
{ |
|
"Batch Mean": 0.31320953369140625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.205, |
|
"step": 82 |
|
}, |
|
{ |
|
"Batch Mean": 0.638336181640625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.205, |
|
"step": 82 |
|
}, |
|
{ |
|
"Batch Mean": 0.02130126953125, |
|
"accuracy": 0.875, |
|
"epoch": 0.205, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2075, |
|
"grad_norm": 8.252171516418457, |
|
"learning_rate": 2.5026315789473686e-06, |
|
"loss": 0.3359, |
|
"step": 83 |
|
}, |
|
{ |
|
"Batch Mean": -0.85870361328125, |
|
"accuracy": 0.71875, |
|
"epoch": 0.2075, |
|
"step": 83 |
|
}, |
|
{ |
|
"Batch Mean": -0.06537818908691406, |
|
"accuracy": 0.71875, |
|
"epoch": 0.2075, |
|
"step": 83 |
|
}, |
|
{ |
|
"Batch Mean": 0.6328989267349243, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2075, |
|
"step": 83 |
|
}, |
|
{ |
|
"Batch Mean": 0.5955638885498047, |
|
"accuracy": 0.6875, |
|
"epoch": 0.2075, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 10.137153625488281, |
|
"learning_rate": 2.4947368421052634e-06, |
|
"loss": 0.5026, |
|
"step": 84 |
|
}, |
|
{ |
|
"Batch Mean": -0.64013671875, |
|
"accuracy": 0.6875, |
|
"epoch": 0.21, |
|
"step": 84 |
|
}, |
|
{ |
|
"Batch Mean": -0.056926727294921875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.21, |
|
"step": 84 |
|
}, |
|
{ |
|
"Batch Mean": -0.9448051452636719, |
|
"accuracy": 0.84375, |
|
"epoch": 0.21, |
|
"step": 84 |
|
}, |
|
{ |
|
"Batch Mean": 0.47279930114746094, |
|
"accuracy": 0.8125, |
|
"epoch": 0.21, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2125, |
|
"grad_norm": 10.299038887023926, |
|
"learning_rate": 2.486842105263158e-06, |
|
"loss": 0.4537, |
|
"step": 85 |
|
}, |
|
{ |
|
"Batch Mean": -0.37763214111328125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2125, |
|
"step": 85 |
|
}, |
|
{ |
|
"Batch Mean": -1.2001113891601562, |
|
"accuracy": 0.625, |
|
"epoch": 0.2125, |
|
"step": 85 |
|
}, |
|
{ |
|
"Batch Mean": -1.2707452774047852, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2125, |
|
"step": 85 |
|
}, |
|
{ |
|
"Batch Mean": -0.43585968017578125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2125, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.215, |
|
"grad_norm": 12.326623916625977, |
|
"learning_rate": 2.478947368421053e-06, |
|
"loss": 0.4968, |
|
"step": 86 |
|
}, |
|
{ |
|
"Batch Mean": -0.1305999755859375, |
|
"accuracy": 0.6875, |
|
"epoch": 0.215, |
|
"step": 86 |
|
}, |
|
{ |
|
"Batch Mean": 0.22275471687316895, |
|
"accuracy": 0.8125, |
|
"epoch": 0.215, |
|
"step": 86 |
|
}, |
|
{ |
|
"Batch Mean": 0.18460631370544434, |
|
"accuracy": 0.875, |
|
"epoch": 0.215, |
|
"step": 86 |
|
}, |
|
{ |
|
"Batch Mean": 0.15557098388671875, |
|
"accuracy": 0.71875, |
|
"epoch": 0.215, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2175, |
|
"grad_norm": 9.211568832397461, |
|
"learning_rate": 2.4710526315789476e-06, |
|
"loss": 0.4499, |
|
"step": 87 |
|
}, |
|
{ |
|
"Batch Mean": -0.503875732421875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2175, |
|
"step": 87 |
|
}, |
|
{ |
|
"Batch Mean": -0.20687103271484375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2175, |
|
"step": 87 |
|
}, |
|
{ |
|
"Batch Mean": 0.28580427169799805, |
|
"accuracy": 0.78125, |
|
"epoch": 0.2175, |
|
"step": 87 |
|
}, |
|
{ |
|
"Batch Mean": -0.6501541137695312, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2175, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 8.780227661132812, |
|
"learning_rate": 2.4631578947368424e-06, |
|
"loss": 0.4483, |
|
"step": 88 |
|
}, |
|
{ |
|
"Batch Mean": -0.8565025329589844, |
|
"accuracy": 0.71875, |
|
"epoch": 0.22, |
|
"step": 88 |
|
}, |
|
{ |
|
"Batch Mean": -0.7164955139160156, |
|
"accuracy": 0.90625, |
|
"epoch": 0.22, |
|
"step": 88 |
|
}, |
|
{ |
|
"Batch Mean": -1.0312111377716064, |
|
"accuracy": 0.75, |
|
"epoch": 0.22, |
|
"step": 88 |
|
}, |
|
{ |
|
"Batch Mean": -1.4193916320800781, |
|
"accuracy": 0.8125, |
|
"epoch": 0.22, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2225, |
|
"grad_norm": 7.674242973327637, |
|
"learning_rate": 2.4552631578947367e-06, |
|
"loss": 0.4364, |
|
"step": 89 |
|
}, |
|
{ |
|
"Batch Mean": -0.4898567199707031, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2225, |
|
"step": 89 |
|
}, |
|
{ |
|
"Batch Mean": -0.9655284881591797, |
|
"accuracy": 0.75, |
|
"epoch": 0.2225, |
|
"step": 89 |
|
}, |
|
{ |
|
"Batch Mean": -0.719451904296875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2225, |
|
"step": 89 |
|
}, |
|
{ |
|
"Batch Mean": -1.1983413696289062, |
|
"accuracy": 0.71875, |
|
"epoch": 0.2225, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 7.926375389099121, |
|
"learning_rate": 2.4473684210526314e-06, |
|
"loss": 0.4139, |
|
"step": 90 |
|
}, |
|
{ |
|
"Batch Mean": -0.1603546142578125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.225, |
|
"step": 90 |
|
}, |
|
{ |
|
"Batch Mean": -1.3579235076904297, |
|
"accuracy": 0.65625, |
|
"epoch": 0.225, |
|
"step": 90 |
|
}, |
|
{ |
|
"Batch Mean": -1.4498214721679688, |
|
"accuracy": 0.75, |
|
"epoch": 0.225, |
|
"step": 90 |
|
}, |
|
{ |
|
"Batch Mean": -0.9952545166015625, |
|
"accuracy": 0.75, |
|
"epoch": 0.225, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2275, |
|
"grad_norm": 8.298666000366211, |
|
"learning_rate": 2.439473684210526e-06, |
|
"loss": 0.4497, |
|
"step": 91 |
|
}, |
|
{ |
|
"Batch Mean": -1.5188751220703125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2275, |
|
"step": 91 |
|
}, |
|
{ |
|
"Batch Mean": -0.718254566192627, |
|
"accuracy": 0.78125, |
|
"epoch": 0.2275, |
|
"step": 91 |
|
}, |
|
{ |
|
"Batch Mean": -0.8531379699707031, |
|
"accuracy": 0.75, |
|
"epoch": 0.2275, |
|
"step": 91 |
|
}, |
|
{ |
|
"Batch Mean": -0.9513950347900391, |
|
"accuracy": 0.90625, |
|
"epoch": 0.2275, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 7.369481563568115, |
|
"learning_rate": 2.431578947368421e-06, |
|
"loss": 0.4209, |
|
"step": 92 |
|
}, |
|
{ |
|
"Batch Mean": -0.795166015625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.23, |
|
"step": 92 |
|
}, |
|
{ |
|
"Batch Mean": -0.4606513977050781, |
|
"accuracy": 0.78125, |
|
"epoch": 0.23, |
|
"step": 92 |
|
}, |
|
{ |
|
"Batch Mean": -0.41733360290527344, |
|
"accuracy": 0.84375, |
|
"epoch": 0.23, |
|
"step": 92 |
|
}, |
|
{ |
|
"Batch Mean": -0.9800624847412109, |
|
"accuracy": 0.6875, |
|
"epoch": 0.23, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2325, |
|
"grad_norm": 8.789040565490723, |
|
"learning_rate": 2.4236842105263157e-06, |
|
"loss": 0.4207, |
|
"step": 93 |
|
}, |
|
{ |
|
"Batch Mean": -0.7392101287841797, |
|
"accuracy": 0.875, |
|
"epoch": 0.2325, |
|
"step": 93 |
|
}, |
|
{ |
|
"Batch Mean": -1.0338478088378906, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2325, |
|
"step": 93 |
|
}, |
|
{ |
|
"Batch Mean": -0.620086669921875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2325, |
|
"step": 93 |
|
}, |
|
{ |
|
"Batch Mean": -1.3432998657226562, |
|
"accuracy": 0.75, |
|
"epoch": 0.2325, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.235, |
|
"grad_norm": 7.387351989746094, |
|
"learning_rate": 2.4157894736842104e-06, |
|
"loss": 0.4106, |
|
"step": 94 |
|
}, |
|
{ |
|
"Batch Mean": -0.6872539520263672, |
|
"accuracy": 0.6875, |
|
"epoch": 0.235, |
|
"step": 94 |
|
}, |
|
{ |
|
"Batch Mean": -0.4092254638671875, |
|
"accuracy": 0.875, |
|
"epoch": 0.235, |
|
"step": 94 |
|
}, |
|
{ |
|
"Batch Mean": -0.7674446105957031, |
|
"accuracy": 0.875, |
|
"epoch": 0.235, |
|
"step": 94 |
|
}, |
|
{ |
|
"Batch Mean": -1.0440845489501953, |
|
"accuracy": 0.78125, |
|
"epoch": 0.235, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2375, |
|
"grad_norm": 6.818679332733154, |
|
"learning_rate": 2.4078947368421056e-06, |
|
"loss": 0.4046, |
|
"step": 95 |
|
}, |
|
{ |
|
"Batch Mean": -0.782130241394043, |
|
"accuracy": 0.78125, |
|
"epoch": 0.2375, |
|
"step": 95 |
|
}, |
|
{ |
|
"Batch Mean": -0.6030197143554688, |
|
"accuracy": 0.71875, |
|
"epoch": 0.2375, |
|
"step": 95 |
|
}, |
|
{ |
|
"Batch Mean": -0.3119964599609375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.2375, |
|
"step": 95 |
|
}, |
|
{ |
|
"Batch Mean": -0.7195053100585938, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2375, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 7.574390411376953, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.4426, |
|
"step": 96 |
|
}, |
|
{ |
|
"Batch Mean": -0.3299369812011719, |
|
"accuracy": 0.84375, |
|
"epoch": 0.24, |
|
"step": 96 |
|
}, |
|
{ |
|
"Batch Mean": -0.3588829040527344, |
|
"accuracy": 0.90625, |
|
"epoch": 0.24, |
|
"step": 96 |
|
}, |
|
{ |
|
"Batch Mean": -0.445037841796875, |
|
"accuracy": 0.75, |
|
"epoch": 0.24, |
|
"step": 96 |
|
}, |
|
{ |
|
"Batch Mean": -0.33353519439697266, |
|
"accuracy": 0.875, |
|
"epoch": 0.24, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2425, |
|
"grad_norm": 7.771867275238037, |
|
"learning_rate": 2.392105263157895e-06, |
|
"loss": 0.3876, |
|
"step": 97 |
|
}, |
|
{ |
|
"Batch Mean": -0.3831367492675781, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2425, |
|
"step": 97 |
|
}, |
|
{ |
|
"Batch Mean": -0.16202545166015625, |
|
"accuracy": 0.75, |
|
"epoch": 0.2425, |
|
"step": 97 |
|
}, |
|
{ |
|
"Batch Mean": -0.038543701171875, |
|
"accuracy": 0.9375, |
|
"epoch": 0.2425, |
|
"step": 97 |
|
}, |
|
{ |
|
"Batch Mean": -0.24297523498535156, |
|
"accuracy": 0.75, |
|
"epoch": 0.2425, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.245, |
|
"grad_norm": 8.123610496520996, |
|
"learning_rate": 2.38421052631579e-06, |
|
"loss": 0.3876, |
|
"step": 98 |
|
}, |
|
{ |
|
"Batch Mean": 0.5521278381347656, |
|
"accuracy": 0.8125, |
|
"epoch": 0.245, |
|
"step": 98 |
|
}, |
|
{ |
|
"Batch Mean": 0.0193023681640625, |
|
"accuracy": 0.875, |
|
"epoch": 0.245, |
|
"step": 98 |
|
}, |
|
{ |
|
"Batch Mean": -0.08532905578613281, |
|
"accuracy": 0.875, |
|
"epoch": 0.245, |
|
"step": 98 |
|
}, |
|
{ |
|
"Batch Mean": -0.08370304107666016, |
|
"accuracy": 0.8125, |
|
"epoch": 0.245, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2475, |
|
"grad_norm": 7.886329174041748, |
|
"learning_rate": 2.376315789473684e-06, |
|
"loss": 0.3496, |
|
"step": 99 |
|
}, |
|
{ |
|
"Batch Mean": 0.37316322326660156, |
|
"accuracy": 0.875, |
|
"epoch": 0.2475, |
|
"step": 99 |
|
}, |
|
{ |
|
"Batch Mean": 0.47714996337890625, |
|
"accuracy": 0.875, |
|
"epoch": 0.2475, |
|
"step": 99 |
|
}, |
|
{ |
|
"Batch Mean": 0.09040069580078125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.2475, |
|
"step": 99 |
|
}, |
|
{ |
|
"Batch Mean": 0.8167877197265625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2475, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 10.153704643249512, |
|
"learning_rate": 2.368421052631579e-06, |
|
"loss": 0.3803, |
|
"step": 100 |
|
}, |
|
{ |
|
"Batch Mean": 0.6466522216796875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.25, |
|
"step": 100 |
|
}, |
|
{ |
|
"Batch Mean": 1.1754875183105469, |
|
"accuracy": 0.71875, |
|
"epoch": 0.25, |
|
"step": 100 |
|
}, |
|
{ |
|
"Batch Mean": 0.6958694458007812, |
|
"accuracy": 0.84375, |
|
"epoch": 0.25, |
|
"step": 100 |
|
}, |
|
{ |
|
"Batch Mean": 0.01902008056640625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.25, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2525, |
|
"grad_norm": 9.173975944519043, |
|
"learning_rate": 2.3605263157894736e-06, |
|
"loss": 0.4147, |
|
"step": 101 |
|
}, |
|
{ |
|
"Batch Mean": 1.03076171875, |
|
"accuracy": 0.875, |
|
"epoch": 0.2525, |
|
"step": 101 |
|
}, |
|
{ |
|
"Batch Mean": 0.8861312866210938, |
|
"accuracy": 0.78125, |
|
"epoch": 0.2525, |
|
"step": 101 |
|
}, |
|
{ |
|
"Batch Mean": 1.0565643310546875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.2525, |
|
"step": 101 |
|
}, |
|
{ |
|
"Batch Mean": 1.2511253356933594, |
|
"accuracy": 0.875, |
|
"epoch": 0.2525, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.255, |
|
"grad_norm": 10.308727264404297, |
|
"learning_rate": 2.3526315789473684e-06, |
|
"loss": 0.4126, |
|
"step": 102 |
|
}, |
|
{ |
|
"Batch Mean": 0.4566202163696289, |
|
"accuracy": 0.78125, |
|
"epoch": 0.255, |
|
"step": 102 |
|
}, |
|
{ |
|
"Batch Mean": 1.6229639053344727, |
|
"accuracy": 0.84375, |
|
"epoch": 0.255, |
|
"step": 102 |
|
}, |
|
{ |
|
"Batch Mean": 1.513641357421875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.255, |
|
"step": 102 |
|
}, |
|
{ |
|
"Batch Mean": 1.0185041427612305, |
|
"accuracy": 0.75, |
|
"epoch": 0.255, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2575, |
|
"grad_norm": 10.615097999572754, |
|
"learning_rate": 2.344736842105263e-06, |
|
"loss": 0.4239, |
|
"step": 103 |
|
}, |
|
{ |
|
"Batch Mean": 1.5927734375, |
|
"accuracy": 0.6875, |
|
"epoch": 0.2575, |
|
"step": 103 |
|
}, |
|
{ |
|
"Batch Mean": 1.3248634338378906, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2575, |
|
"step": 103 |
|
}, |
|
{ |
|
"Batch Mean": 2.0467910766601562, |
|
"accuracy": 0.875, |
|
"epoch": 0.2575, |
|
"step": 103 |
|
}, |
|
{ |
|
"Batch Mean": 1.3942604064941406, |
|
"accuracy": 0.75, |
|
"epoch": 0.2575, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 10.60344123840332, |
|
"learning_rate": 2.336842105263158e-06, |
|
"loss": 0.4392, |
|
"step": 104 |
|
}, |
|
{ |
|
"Batch Mean": 1.5770196914672852, |
|
"accuracy": 0.875, |
|
"epoch": 0.26, |
|
"step": 104 |
|
}, |
|
{ |
|
"Batch Mean": 1.90313720703125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.26, |
|
"step": 104 |
|
}, |
|
{ |
|
"Batch Mean": 1.4428634643554688, |
|
"accuracy": 0.78125, |
|
"epoch": 0.26, |
|
"step": 104 |
|
}, |
|
{ |
|
"Batch Mean": 1.6471443176269531, |
|
"accuracy": 0.84375, |
|
"epoch": 0.26, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2625, |
|
"grad_norm": 10.150686264038086, |
|
"learning_rate": 2.3289473684210526e-06, |
|
"loss": 0.4022, |
|
"step": 105 |
|
}, |
|
{ |
|
"Batch Mean": 2.1425628662109375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2625, |
|
"step": 105 |
|
}, |
|
{ |
|
"Batch Mean": 1.9060516357421875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2625, |
|
"step": 105 |
|
}, |
|
{ |
|
"Batch Mean": 1.4550533294677734, |
|
"accuracy": 0.9375, |
|
"epoch": 0.2625, |
|
"step": 105 |
|
}, |
|
{ |
|
"Batch Mean": 1.6918563842773438, |
|
"accuracy": 0.90625, |
|
"epoch": 0.2625, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.265, |
|
"grad_norm": 9.874603271484375, |
|
"learning_rate": 2.3210526315789473e-06, |
|
"loss": 0.3121, |
|
"step": 106 |
|
}, |
|
{ |
|
"Batch Mean": 1.8628921508789062, |
|
"accuracy": 0.75, |
|
"epoch": 0.265, |
|
"step": 106 |
|
}, |
|
{ |
|
"Batch Mean": 2.297779083251953, |
|
"accuracy": 0.78125, |
|
"epoch": 0.265, |
|
"step": 106 |
|
}, |
|
{ |
|
"Batch Mean": 2.672454833984375, |
|
"accuracy": 0.875, |
|
"epoch": 0.265, |
|
"step": 106 |
|
}, |
|
{ |
|
"Batch Mean": 2.9582862854003906, |
|
"accuracy": 0.90625, |
|
"epoch": 0.265, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2675, |
|
"grad_norm": 9.496011734008789, |
|
"learning_rate": 2.313157894736842e-06, |
|
"loss": 0.3878, |
|
"step": 107 |
|
}, |
|
{ |
|
"Batch Mean": 2.3348617553710938, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2675, |
|
"step": 107 |
|
}, |
|
{ |
|
"Batch Mean": 1.5190563201904297, |
|
"accuracy": 0.90625, |
|
"epoch": 0.2675, |
|
"step": 107 |
|
}, |
|
{ |
|
"Batch Mean": 2.1973114013671875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.2675, |
|
"step": 107 |
|
}, |
|
{ |
|
"Batch Mean": 1.5624818801879883, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2675, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 10.248079299926758, |
|
"learning_rate": 2.305263157894737e-06, |
|
"loss": 0.4428, |
|
"step": 108 |
|
}, |
|
{ |
|
"Batch Mean": 1.4676017761230469, |
|
"accuracy": 0.90625, |
|
"epoch": 0.27, |
|
"step": 108 |
|
}, |
|
{ |
|
"Batch Mean": 1.8388519287109375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.27, |
|
"step": 108 |
|
}, |
|
{ |
|
"Batch Mean": 1.00494384765625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.27, |
|
"step": 108 |
|
}, |
|
{ |
|
"Batch Mean": 1.6318979263305664, |
|
"accuracy": 0.875, |
|
"epoch": 0.27, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2725, |
|
"grad_norm": 9.797259330749512, |
|
"learning_rate": 2.2973684210526316e-06, |
|
"loss": 0.3313, |
|
"step": 109 |
|
}, |
|
{ |
|
"Batch Mean": 2.6313552856445312, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2725, |
|
"step": 109 |
|
}, |
|
{ |
|
"Batch Mean": 1.713785171508789, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2725, |
|
"step": 109 |
|
}, |
|
{ |
|
"Batch Mean": 2.1018028259277344, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2725, |
|
"step": 109 |
|
}, |
|
{ |
|
"Batch Mean": 2.6379241943359375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2725, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 11.505805969238281, |
|
"learning_rate": 2.2894736842105263e-06, |
|
"loss": 0.4466, |
|
"step": 110 |
|
}, |
|
{ |
|
"Batch Mean": 1.686086654663086, |
|
"accuracy": 0.9375, |
|
"epoch": 0.275, |
|
"step": 110 |
|
}, |
|
{ |
|
"Batch Mean": 1.5809831619262695, |
|
"accuracy": 0.84375, |
|
"epoch": 0.275, |
|
"step": 110 |
|
}, |
|
{ |
|
"Batch Mean": 1.9306340217590332, |
|
"accuracy": 0.75, |
|
"epoch": 0.275, |
|
"step": 110 |
|
}, |
|
{ |
|
"Batch Mean": 1.5851421356201172, |
|
"accuracy": 0.8125, |
|
"epoch": 0.275, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2775, |
|
"grad_norm": 9.77007007598877, |
|
"learning_rate": 2.281578947368421e-06, |
|
"loss": 0.3987, |
|
"step": 111 |
|
}, |
|
{ |
|
"Batch Mean": 1.599238395690918, |
|
"accuracy": 0.9375, |
|
"epoch": 0.2775, |
|
"step": 111 |
|
}, |
|
{ |
|
"Batch Mean": 1.3416290283203125, |
|
"accuracy": 0.71875, |
|
"epoch": 0.2775, |
|
"step": 111 |
|
}, |
|
{ |
|
"Batch Mean": 1.3683280944824219, |
|
"accuracy": 0.9375, |
|
"epoch": 0.2775, |
|
"step": 111 |
|
}, |
|
{ |
|
"Batch Mean": 1.7390365600585938, |
|
"accuracy": 0.875, |
|
"epoch": 0.2775, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 10.129262924194336, |
|
"learning_rate": 2.273684210526316e-06, |
|
"loss": 0.3826, |
|
"step": 112 |
|
}, |
|
{ |
|
"Batch Mean": 1.6599540710449219, |
|
"accuracy": 0.8125, |
|
"epoch": 0.28, |
|
"step": 112 |
|
}, |
|
{ |
|
"Batch Mean": 1.790853500366211, |
|
"accuracy": 0.875, |
|
"epoch": 0.28, |
|
"step": 112 |
|
}, |
|
{ |
|
"Batch Mean": 1.2451286315917969, |
|
"accuracy": 0.8125, |
|
"epoch": 0.28, |
|
"step": 112 |
|
}, |
|
{ |
|
"Batch Mean": 1.9894142150878906, |
|
"accuracy": 0.84375, |
|
"epoch": 0.28, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2825, |
|
"grad_norm": 8.05378532409668, |
|
"learning_rate": 2.2657894736842106e-06, |
|
"loss": 0.3701, |
|
"step": 113 |
|
}, |
|
{ |
|
"Batch Mean": 0.7710037231445312, |
|
"accuracy": 0.78125, |
|
"epoch": 0.2825, |
|
"step": 113 |
|
}, |
|
{ |
|
"Batch Mean": 1.5342979431152344, |
|
"accuracy": 0.78125, |
|
"epoch": 0.2825, |
|
"step": 113 |
|
}, |
|
{ |
|
"Batch Mean": 1.7586479187011719, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2825, |
|
"step": 113 |
|
}, |
|
{ |
|
"Batch Mean": 1.0600099563598633, |
|
"accuracy": 0.875, |
|
"epoch": 0.2825, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.285, |
|
"grad_norm": 8.998645782470703, |
|
"learning_rate": 2.2578947368421053e-06, |
|
"loss": 0.4271, |
|
"step": 114 |
|
}, |
|
{ |
|
"Batch Mean": 1.8863487243652344, |
|
"accuracy": 0.8125, |
|
"epoch": 0.285, |
|
"step": 114 |
|
}, |
|
{ |
|
"Batch Mean": 2.05859375, |
|
"accuracy": 0.71875, |
|
"epoch": 0.285, |
|
"step": 114 |
|
}, |
|
{ |
|
"Batch Mean": 1.3946609497070312, |
|
"accuracy": 0.90625, |
|
"epoch": 0.285, |
|
"step": 114 |
|
}, |
|
{ |
|
"Batch Mean": 1.3896846771240234, |
|
"accuracy": 0.84375, |
|
"epoch": 0.285, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2875, |
|
"grad_norm": 8.922933578491211, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.423, |
|
"step": 115 |
|
}, |
|
{ |
|
"Batch Mean": 1.2922439575195312, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2875, |
|
"step": 115 |
|
}, |
|
{ |
|
"Batch Mean": 1.3124160766601562, |
|
"accuracy": 0.84375, |
|
"epoch": 0.2875, |
|
"step": 115 |
|
}, |
|
{ |
|
"Batch Mean": 1.0258140563964844, |
|
"accuracy": 0.78125, |
|
"epoch": 0.2875, |
|
"step": 115 |
|
}, |
|
{ |
|
"Batch Mean": 0.38111114501953125, |
|
"accuracy": 0.875, |
|
"epoch": 0.2875, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 7.676854610443115, |
|
"learning_rate": 2.242105263157895e-06, |
|
"loss": 0.3497, |
|
"step": 116 |
|
}, |
|
{ |
|
"Batch Mean": 0.7025642395019531, |
|
"accuracy": 0.75, |
|
"epoch": 0.29, |
|
"step": 116 |
|
}, |
|
{ |
|
"Batch Mean": 0.7348098754882812, |
|
"accuracy": 0.75, |
|
"epoch": 0.29, |
|
"step": 116 |
|
}, |
|
{ |
|
"Batch Mean": 0.4198455810546875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.29, |
|
"step": 116 |
|
}, |
|
{ |
|
"Batch Mean": 1.551055908203125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.29, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.2925, |
|
"grad_norm": 9.9080810546875, |
|
"learning_rate": 2.2342105263157895e-06, |
|
"loss": 0.4646, |
|
"step": 117 |
|
}, |
|
{ |
|
"Batch Mean": 0.7410621643066406, |
|
"accuracy": 0.8125, |
|
"epoch": 0.2925, |
|
"step": 117 |
|
}, |
|
{ |
|
"Batch Mean": 1.25372314453125, |
|
"accuracy": 0.71875, |
|
"epoch": 0.2925, |
|
"step": 117 |
|
}, |
|
{ |
|
"Batch Mean": 0.13385581970214844, |
|
"accuracy": 0.75, |
|
"epoch": 0.2925, |
|
"step": 117 |
|
}, |
|
{ |
|
"Batch Mean": 1.24212646484375, |
|
"accuracy": 0.875, |
|
"epoch": 0.2925, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.295, |
|
"grad_norm": 9.049751281738281, |
|
"learning_rate": 2.2263157894736843e-06, |
|
"loss": 0.4295, |
|
"step": 118 |
|
}, |
|
{ |
|
"Batch Mean": 0.526611328125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.295, |
|
"step": 118 |
|
}, |
|
{ |
|
"Batch Mean": 0.653597354888916, |
|
"accuracy": 0.6875, |
|
"epoch": 0.295, |
|
"step": 118 |
|
}, |
|
{ |
|
"Batch Mean": 0.07555770874023438, |
|
"accuracy": 0.6875, |
|
"epoch": 0.295, |
|
"step": 118 |
|
}, |
|
{ |
|
"Batch Mean": 0.8257293701171875, |
|
"accuracy": 0.875, |
|
"epoch": 0.295, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.2975, |
|
"grad_norm": 7.58751916885376, |
|
"learning_rate": 2.218421052631579e-06, |
|
"loss": 0.4295, |
|
"step": 119 |
|
}, |
|
{ |
|
"Batch Mean": 1.4244041442871094, |
|
"accuracy": 0.75, |
|
"epoch": 0.2975, |
|
"step": 119 |
|
}, |
|
{ |
|
"Batch Mean": 0.053539276123046875, |
|
"accuracy": 0.875, |
|
"epoch": 0.2975, |
|
"step": 119 |
|
}, |
|
{ |
|
"Batch Mean": 0.33531951904296875, |
|
"accuracy": 0.75, |
|
"epoch": 0.2975, |
|
"step": 119 |
|
}, |
|
{ |
|
"Batch Mean": 1.5470961332321167, |
|
"accuracy": 0.875, |
|
"epoch": 0.2975, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 7.336263179779053, |
|
"learning_rate": 2.2105263157894738e-06, |
|
"loss": 0.3918, |
|
"step": 120 |
|
}, |
|
{ |
|
"Batch Mean": -0.3497161865234375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3, |
|
"step": 120 |
|
}, |
|
{ |
|
"Batch Mean": 0.7215266227722168, |
|
"accuracy": 0.6875, |
|
"epoch": 0.3, |
|
"step": 120 |
|
}, |
|
{ |
|
"Batch Mean": 0.5651702880859375, |
|
"accuracy": 0.96875, |
|
"epoch": 0.3, |
|
"step": 120 |
|
}, |
|
{ |
|
"Batch Mean": 0.9324417114257812, |
|
"accuracy": 0.8125, |
|
"epoch": 0.3, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3025, |
|
"grad_norm": 8.309417724609375, |
|
"learning_rate": 2.2026315789473685e-06, |
|
"loss": 0.4117, |
|
"step": 121 |
|
}, |
|
{ |
|
"Batch Mean": 0.6491870880126953, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3025, |
|
"step": 121 |
|
}, |
|
{ |
|
"Batch Mean": 0.003475189208984375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3025, |
|
"step": 121 |
|
}, |
|
{ |
|
"Batch Mean": 0.000141143798828125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.3025, |
|
"step": 121 |
|
}, |
|
{ |
|
"Batch Mean": 0.3972206115722656, |
|
"accuracy": 0.8125, |
|
"epoch": 0.3025, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.305, |
|
"grad_norm": 9.540141105651855, |
|
"learning_rate": 2.1947368421052633e-06, |
|
"loss": 0.463, |
|
"step": 122 |
|
}, |
|
{ |
|
"Batch Mean": 0.5694351196289062, |
|
"accuracy": 0.96875, |
|
"epoch": 0.305, |
|
"step": 122 |
|
}, |
|
{ |
|
"Batch Mean": -0.144378662109375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.305, |
|
"step": 122 |
|
}, |
|
{ |
|
"Batch Mean": 1.401092529296875, |
|
"accuracy": 0.65625, |
|
"epoch": 0.305, |
|
"step": 122 |
|
}, |
|
{ |
|
"Batch Mean": 0.12181663513183594, |
|
"accuracy": 0.78125, |
|
"epoch": 0.305, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3075, |
|
"grad_norm": 7.845481872558594, |
|
"learning_rate": 2.186842105263158e-06, |
|
"loss": 0.3824, |
|
"step": 123 |
|
}, |
|
{ |
|
"Batch Mean": 1.1395072937011719, |
|
"accuracy": 0.875, |
|
"epoch": 0.3075, |
|
"step": 123 |
|
}, |
|
{ |
|
"Batch Mean": 0.10755538940429688, |
|
"accuracy": 0.71875, |
|
"epoch": 0.3075, |
|
"step": 123 |
|
}, |
|
{ |
|
"Batch Mean": 0.5145070552825928, |
|
"accuracy": 0.75, |
|
"epoch": 0.3075, |
|
"step": 123 |
|
}, |
|
{ |
|
"Batch Mean": -0.4429779052734375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3075, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 8.356697082519531, |
|
"learning_rate": 2.1789473684210528e-06, |
|
"loss": 0.4604, |
|
"step": 124 |
|
}, |
|
{ |
|
"Batch Mean": 0.294830322265625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.31, |
|
"step": 124 |
|
}, |
|
{ |
|
"Batch Mean": 0.6266403198242188, |
|
"accuracy": 0.90625, |
|
"epoch": 0.31, |
|
"step": 124 |
|
}, |
|
{ |
|
"Batch Mean": 0.56964111328125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.31, |
|
"step": 124 |
|
}, |
|
{ |
|
"Batch Mean": -0.3099174499511719, |
|
"accuracy": 0.75, |
|
"epoch": 0.31, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 7.750851631164551, |
|
"learning_rate": 2.1710526315789475e-06, |
|
"loss": 0.4464, |
|
"step": 125 |
|
}, |
|
{ |
|
"Batch Mean": 0.7020511627197266, |
|
"accuracy": 0.9375, |
|
"epoch": 0.3125, |
|
"step": 125 |
|
}, |
|
{ |
|
"Batch Mean": -0.09911084175109863, |
|
"accuracy": 0.875, |
|
"epoch": 0.3125, |
|
"step": 125 |
|
}, |
|
{ |
|
"Batch Mean": 0.45139312744140625, |
|
"accuracy": 0.875, |
|
"epoch": 0.3125, |
|
"step": 125 |
|
}, |
|
{ |
|
"Batch Mean": -0.036022186279296875, |
|
"accuracy": 0.875, |
|
"epoch": 0.3125, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.315, |
|
"grad_norm": 6.1669921875, |
|
"learning_rate": 2.1631578947368423e-06, |
|
"loss": 0.3093, |
|
"step": 126 |
|
}, |
|
{ |
|
"Batch Mean": -0.4551048278808594, |
|
"accuracy": 0.6875, |
|
"epoch": 0.315, |
|
"step": 126 |
|
}, |
|
{ |
|
"Batch Mean": 0.8819370269775391, |
|
"accuracy": 0.8125, |
|
"epoch": 0.315, |
|
"step": 126 |
|
}, |
|
{ |
|
"Batch Mean": 0.3420219421386719, |
|
"accuracy": 0.75, |
|
"epoch": 0.315, |
|
"step": 126 |
|
}, |
|
{ |
|
"Batch Mean": 0.31945037841796875, |
|
"accuracy": 0.875, |
|
"epoch": 0.315, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3175, |
|
"grad_norm": 7.610821723937988, |
|
"learning_rate": 2.155263157894737e-06, |
|
"loss": 0.3873, |
|
"step": 127 |
|
}, |
|
{ |
|
"Batch Mean": 0.00855255126953125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3175, |
|
"step": 127 |
|
}, |
|
{ |
|
"Batch Mean": -0.30928611755371094, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3175, |
|
"step": 127 |
|
}, |
|
{ |
|
"Batch Mean": 0.3133277893066406, |
|
"accuracy": 0.75, |
|
"epoch": 0.3175, |
|
"step": 127 |
|
}, |
|
{ |
|
"Batch Mean": -0.08009719848632812, |
|
"accuracy": 0.75, |
|
"epoch": 0.3175, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 8.80823802947998, |
|
"learning_rate": 2.1473684210526317e-06, |
|
"loss": 0.4543, |
|
"step": 128 |
|
}, |
|
{ |
|
"Batch Mean": -0.2666511535644531, |
|
"accuracy": 0.75, |
|
"epoch": 0.32, |
|
"step": 128 |
|
}, |
|
{ |
|
"Batch Mean": 0.2806854248046875, |
|
"accuracy": 0.875, |
|
"epoch": 0.32, |
|
"step": 128 |
|
}, |
|
{ |
|
"Batch Mean": -0.27446746826171875, |
|
"accuracy": 0.9375, |
|
"epoch": 0.32, |
|
"step": 128 |
|
}, |
|
{ |
|
"Batch Mean": -0.4624176025390625, |
|
"accuracy": 0.71875, |
|
"epoch": 0.32, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3225, |
|
"grad_norm": 8.372282981872559, |
|
"learning_rate": 2.1394736842105265e-06, |
|
"loss": 0.4167, |
|
"step": 129 |
|
}, |
|
{ |
|
"Batch Mean": -0.2513875961303711, |
|
"accuracy": 0.6875, |
|
"epoch": 0.3225, |
|
"step": 129 |
|
}, |
|
{ |
|
"Batch Mean": -0.211395263671875, |
|
"accuracy": 0.90625, |
|
"epoch": 0.3225, |
|
"step": 129 |
|
}, |
|
{ |
|
"Batch Mean": 0.106658935546875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3225, |
|
"step": 129 |
|
}, |
|
{ |
|
"Batch Mean": -0.3263053894042969, |
|
"accuracy": 0.8125, |
|
"epoch": 0.3225, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 8.773994445800781, |
|
"learning_rate": 2.1315789473684212e-06, |
|
"loss": 0.3884, |
|
"step": 130 |
|
}, |
|
{ |
|
"Batch Mean": 0.5553717613220215, |
|
"accuracy": 0.875, |
|
"epoch": 0.325, |
|
"step": 130 |
|
}, |
|
{ |
|
"Batch Mean": -0.5979881286621094, |
|
"accuracy": 0.8125, |
|
"epoch": 0.325, |
|
"step": 130 |
|
}, |
|
{ |
|
"Batch Mean": -0.2571907043457031, |
|
"accuracy": 0.625, |
|
"epoch": 0.325, |
|
"step": 130 |
|
}, |
|
{ |
|
"Batch Mean": -0.528472900390625, |
|
"accuracy": 0.75, |
|
"epoch": 0.325, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3275, |
|
"grad_norm": 9.296905517578125, |
|
"learning_rate": 2.123684210526316e-06, |
|
"loss": 0.4298, |
|
"step": 131 |
|
}, |
|
{ |
|
"Batch Mean": 0.11560249328613281, |
|
"accuracy": 0.71875, |
|
"epoch": 0.3275, |
|
"step": 131 |
|
}, |
|
{ |
|
"Batch Mean": 0.2269425392150879, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3275, |
|
"step": 131 |
|
}, |
|
{ |
|
"Batch Mean": 0.30469512939453125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3275, |
|
"step": 131 |
|
}, |
|
{ |
|
"Batch Mean": 0.09850311279296875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3275, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 9.395648956298828, |
|
"learning_rate": 2.1157894736842103e-06, |
|
"loss": 0.4653, |
|
"step": 132 |
|
}, |
|
{ |
|
"Batch Mean": 0.7032089233398438, |
|
"accuracy": 0.84375, |
|
"epoch": 0.33, |
|
"step": 132 |
|
}, |
|
{ |
|
"Batch Mean": 0.5162849426269531, |
|
"accuracy": 0.6875, |
|
"epoch": 0.33, |
|
"step": 132 |
|
}, |
|
{ |
|
"Batch Mean": 0.10200667381286621, |
|
"accuracy": 0.84375, |
|
"epoch": 0.33, |
|
"step": 132 |
|
}, |
|
{ |
|
"Batch Mean": 0.2635307312011719, |
|
"accuracy": 0.6875, |
|
"epoch": 0.33, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3325, |
|
"grad_norm": 9.032759666442871, |
|
"learning_rate": 2.107894736842105e-06, |
|
"loss": 0.4334, |
|
"step": 133 |
|
}, |
|
{ |
|
"Batch Mean": 0.4982147216796875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3325, |
|
"step": 133 |
|
}, |
|
{ |
|
"Batch Mean": 0.6229362487792969, |
|
"accuracy": 0.90625, |
|
"epoch": 0.3325, |
|
"step": 133 |
|
}, |
|
{ |
|
"Batch Mean": 0.43565940856933594, |
|
"accuracy": 0.75, |
|
"epoch": 0.3325, |
|
"step": 133 |
|
}, |
|
{ |
|
"Batch Mean": -0.32531166076660156, |
|
"accuracy": 0.75, |
|
"epoch": 0.3325, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.335, |
|
"grad_norm": 8.690756797790527, |
|
"learning_rate": 2.1e-06, |
|
"loss": 0.4212, |
|
"step": 134 |
|
}, |
|
{ |
|
"Batch Mean": 0.16964244842529297, |
|
"accuracy": 0.84375, |
|
"epoch": 0.335, |
|
"step": 134 |
|
}, |
|
{ |
|
"Batch Mean": 0.18350982666015625, |
|
"accuracy": 0.75, |
|
"epoch": 0.335, |
|
"step": 134 |
|
}, |
|
{ |
|
"Batch Mean": -0.14165115356445312, |
|
"accuracy": 0.875, |
|
"epoch": 0.335, |
|
"step": 134 |
|
}, |
|
{ |
|
"Batch Mean": 0.8232192993164062, |
|
"accuracy": 0.90625, |
|
"epoch": 0.335, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3375, |
|
"grad_norm": 7.241537094116211, |
|
"learning_rate": 2.0921052631578945e-06, |
|
"loss": 0.3394, |
|
"step": 135 |
|
}, |
|
{ |
|
"Batch Mean": 0.4974327087402344, |
|
"accuracy": 0.6875, |
|
"epoch": 0.3375, |
|
"step": 135 |
|
}, |
|
{ |
|
"Batch Mean": 1.16363525390625, |
|
"accuracy": 0.9375, |
|
"epoch": 0.3375, |
|
"step": 135 |
|
}, |
|
{ |
|
"Batch Mean": 0.39719390869140625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3375, |
|
"step": 135 |
|
}, |
|
{ |
|
"Batch Mean": 0.6345443725585938, |
|
"accuracy": 0.71875, |
|
"epoch": 0.3375, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 8.617725372314453, |
|
"learning_rate": 2.0842105263157897e-06, |
|
"loss": 0.4485, |
|
"step": 136 |
|
}, |
|
{ |
|
"Batch Mean": 0.17121124267578125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.34, |
|
"step": 136 |
|
}, |
|
{ |
|
"Batch Mean": 0.4105377197265625, |
|
"accuracy": 0.96875, |
|
"epoch": 0.34, |
|
"step": 136 |
|
}, |
|
{ |
|
"Batch Mean": 0.3336830139160156, |
|
"accuracy": 0.8125, |
|
"epoch": 0.34, |
|
"step": 136 |
|
}, |
|
{ |
|
"Batch Mean": 0.2991490364074707, |
|
"accuracy": 0.875, |
|
"epoch": 0.34, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3425, |
|
"grad_norm": 8.784757614135742, |
|
"learning_rate": 2.0763157894736845e-06, |
|
"loss": 0.4469, |
|
"step": 137 |
|
}, |
|
{ |
|
"Batch Mean": -0.0010280609130859375, |
|
"accuracy": 0.875, |
|
"epoch": 0.3425, |
|
"step": 137 |
|
}, |
|
{ |
|
"Batch Mean": 0.45244598388671875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3425, |
|
"step": 137 |
|
}, |
|
{ |
|
"Batch Mean": 0.34552574157714844, |
|
"accuracy": 0.8125, |
|
"epoch": 0.3425, |
|
"step": 137 |
|
}, |
|
{ |
|
"Batch Mean": 0.24572372436523438, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3425, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.345, |
|
"grad_norm": 7.846508979797363, |
|
"learning_rate": 2.068421052631579e-06, |
|
"loss": 0.3842, |
|
"step": 138 |
|
}, |
|
{ |
|
"Batch Mean": 0.9147672653198242, |
|
"accuracy": 0.71875, |
|
"epoch": 0.345, |
|
"step": 138 |
|
}, |
|
{ |
|
"Batch Mean": 0.8892059326171875, |
|
"accuracy": 0.90625, |
|
"epoch": 0.345, |
|
"step": 138 |
|
}, |
|
{ |
|
"Batch Mean": 0.02689361572265625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.345, |
|
"step": 138 |
|
}, |
|
{ |
|
"Batch Mean": -0.48032379150390625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.345, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3475, |
|
"grad_norm": 7.4200029373168945, |
|
"learning_rate": 2.060526315789474e-06, |
|
"loss": 0.3841, |
|
"step": 139 |
|
}, |
|
{ |
|
"Batch Mean": 0.131683349609375, |
|
"accuracy": 0.75, |
|
"epoch": 0.3475, |
|
"step": 139 |
|
}, |
|
{ |
|
"Batch Mean": 0.6283473968505859, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3475, |
|
"step": 139 |
|
}, |
|
{ |
|
"Batch Mean": 0.5666923522949219, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3475, |
|
"step": 139 |
|
}, |
|
{ |
|
"Batch Mean": -0.07221603393554688, |
|
"accuracy": 0.8125, |
|
"epoch": 0.3475, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 9.652083396911621, |
|
"learning_rate": 2.0526315789473687e-06, |
|
"loss": 0.4634, |
|
"step": 140 |
|
}, |
|
{ |
|
"Batch Mean": -0.40891122817993164, |
|
"accuracy": 0.8125, |
|
"epoch": 0.35, |
|
"step": 140 |
|
}, |
|
{ |
|
"Batch Mean": 0.8287420272827148, |
|
"accuracy": 0.78125, |
|
"epoch": 0.35, |
|
"step": 140 |
|
}, |
|
{ |
|
"Batch Mean": 0.5498733520507812, |
|
"accuracy": 0.84375, |
|
"epoch": 0.35, |
|
"step": 140 |
|
}, |
|
{ |
|
"Batch Mean": -0.15804672241210938, |
|
"accuracy": 0.78125, |
|
"epoch": 0.35, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3525, |
|
"grad_norm": 7.399588108062744, |
|
"learning_rate": 2.0447368421052634e-06, |
|
"loss": 0.3772, |
|
"step": 141 |
|
}, |
|
{ |
|
"Batch Mean": 0.1704254150390625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3525, |
|
"step": 141 |
|
}, |
|
{ |
|
"Batch Mean": -0.08094024658203125, |
|
"accuracy": 0.875, |
|
"epoch": 0.3525, |
|
"step": 141 |
|
}, |
|
{ |
|
"Batch Mean": 0.109130859375, |
|
"accuracy": 0.75, |
|
"epoch": 0.3525, |
|
"step": 141 |
|
}, |
|
{ |
|
"Batch Mean": -0.000492095947265625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.3525, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.355, |
|
"grad_norm": 8.172666549682617, |
|
"learning_rate": 2.0368421052631578e-06, |
|
"loss": 0.4044, |
|
"step": 142 |
|
}, |
|
{ |
|
"Batch Mean": 0.15118980407714844, |
|
"accuracy": 0.875, |
|
"epoch": 0.355, |
|
"step": 142 |
|
}, |
|
{ |
|
"Batch Mean": 0.5515174865722656, |
|
"accuracy": 0.75, |
|
"epoch": 0.355, |
|
"step": 142 |
|
}, |
|
{ |
|
"Batch Mean": 0.5479736328125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.355, |
|
"step": 142 |
|
}, |
|
{ |
|
"Batch Mean": -0.7359352111816406, |
|
"accuracy": 0.84375, |
|
"epoch": 0.355, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3575, |
|
"grad_norm": 7.695672988891602, |
|
"learning_rate": 2.0289473684210525e-06, |
|
"loss": 0.394, |
|
"step": 143 |
|
}, |
|
{ |
|
"Batch Mean": -0.5440807342529297, |
|
"accuracy": 0.90625, |
|
"epoch": 0.3575, |
|
"step": 143 |
|
}, |
|
{ |
|
"Batch Mean": -0.608673095703125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3575, |
|
"step": 143 |
|
}, |
|
{ |
|
"Batch Mean": -1.0982437133789062, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3575, |
|
"step": 143 |
|
}, |
|
{ |
|
"Batch Mean": 0.3627746105194092, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3575, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 7.4809160232543945, |
|
"learning_rate": 2.0210526315789473e-06, |
|
"loss": 0.4344, |
|
"step": 144 |
|
}, |
|
{ |
|
"Batch Mean": -0.8347702026367188, |
|
"accuracy": 0.8125, |
|
"epoch": 0.36, |
|
"step": 144 |
|
}, |
|
{ |
|
"Batch Mean": 0.42254638671875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.36, |
|
"step": 144 |
|
}, |
|
{ |
|
"Batch Mean": -0.3843860626220703, |
|
"accuracy": 0.90625, |
|
"epoch": 0.36, |
|
"step": 144 |
|
}, |
|
{ |
|
"Batch Mean": -0.43280792236328125, |
|
"accuracy": 0.875, |
|
"epoch": 0.36, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3625, |
|
"grad_norm": 8.051714897155762, |
|
"learning_rate": 2.013157894736842e-06, |
|
"loss": 0.3325, |
|
"step": 145 |
|
}, |
|
{ |
|
"Batch Mean": 0.1273651123046875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3625, |
|
"step": 145 |
|
}, |
|
{ |
|
"Batch Mean": -0.18819427490234375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3625, |
|
"step": 145 |
|
}, |
|
{ |
|
"Batch Mean": 0.09321212768554688, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3625, |
|
"step": 145 |
|
}, |
|
{ |
|
"Batch Mean": 0.22705078125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3625, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.365, |
|
"grad_norm": 6.912979602813721, |
|
"learning_rate": 2.0052631578947367e-06, |
|
"loss": 0.3876, |
|
"step": 146 |
|
}, |
|
{ |
|
"Batch Mean": -0.11212539672851562, |
|
"accuracy": 0.8125, |
|
"epoch": 0.365, |
|
"step": 146 |
|
}, |
|
{ |
|
"Batch Mean": 0.059462785720825195, |
|
"accuracy": 0.75, |
|
"epoch": 0.365, |
|
"step": 146 |
|
}, |
|
{ |
|
"Batch Mean": -0.11966323852539062, |
|
"accuracy": 0.84375, |
|
"epoch": 0.365, |
|
"step": 146 |
|
}, |
|
{ |
|
"Batch Mean": -0.21143722534179688, |
|
"accuracy": 0.84375, |
|
"epoch": 0.365, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3675, |
|
"grad_norm": 8.171217918395996, |
|
"learning_rate": 1.9973684210526315e-06, |
|
"loss": 0.3835, |
|
"step": 147 |
|
}, |
|
{ |
|
"Batch Mean": 0.1815032958984375, |
|
"accuracy": 0.90625, |
|
"epoch": 0.3675, |
|
"step": 147 |
|
}, |
|
{ |
|
"Batch Mean": -0.0745086669921875, |
|
"accuracy": 0.875, |
|
"epoch": 0.3675, |
|
"step": 147 |
|
}, |
|
{ |
|
"Batch Mean": -0.2532862424850464, |
|
"accuracy": 0.90625, |
|
"epoch": 0.3675, |
|
"step": 147 |
|
}, |
|
{ |
|
"Batch Mean": 0.2584037780761719, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3675, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 7.358208179473877, |
|
"learning_rate": 1.9894736842105262e-06, |
|
"loss": 0.3228, |
|
"step": 148 |
|
}, |
|
{ |
|
"Batch Mean": 0.37479400634765625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.37, |
|
"step": 148 |
|
}, |
|
{ |
|
"Batch Mean": -0.385284423828125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.37, |
|
"step": 148 |
|
}, |
|
{ |
|
"Batch Mean": -0.037403106689453125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.37, |
|
"step": 148 |
|
}, |
|
{ |
|
"Batch Mean": 0.872039794921875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.37, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3725, |
|
"grad_norm": 7.973887920379639, |
|
"learning_rate": 1.9815789473684214e-06, |
|
"loss": 0.3803, |
|
"step": 149 |
|
}, |
|
{ |
|
"Batch Mean": 0.465301513671875, |
|
"accuracy": 0.90625, |
|
"epoch": 0.3725, |
|
"step": 149 |
|
}, |
|
{ |
|
"Batch Mean": -0.10911333560943604, |
|
"accuracy": 0.90625, |
|
"epoch": 0.3725, |
|
"step": 149 |
|
}, |
|
{ |
|
"Batch Mean": 0.07960319519042969, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3725, |
|
"step": 149 |
|
}, |
|
{ |
|
"Batch Mean": 0.2885894775390625, |
|
"accuracy": 0.71875, |
|
"epoch": 0.3725, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 7.838663578033447, |
|
"learning_rate": 1.973684210526316e-06, |
|
"loss": 0.3823, |
|
"step": 150 |
|
}, |
|
{ |
|
"Batch Mean": 0.26787567138671875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.375, |
|
"step": 150 |
|
}, |
|
{ |
|
"Batch Mean": 1.1342926025390625, |
|
"accuracy": 0.875, |
|
"epoch": 0.375, |
|
"step": 150 |
|
}, |
|
{ |
|
"Batch Mean": 0.30857276916503906, |
|
"accuracy": 0.71875, |
|
"epoch": 0.375, |
|
"step": 150 |
|
}, |
|
{ |
|
"Batch Mean": 0.18548583984375, |
|
"accuracy": 0.6875, |
|
"epoch": 0.375, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3775, |
|
"grad_norm": 9.460301399230957, |
|
"learning_rate": 1.965789473684211e-06, |
|
"loss": 0.4618, |
|
"step": 151 |
|
}, |
|
{ |
|
"Batch Mean": 1.375244140625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3775, |
|
"step": 151 |
|
}, |
|
{ |
|
"Batch Mean": 1.209747314453125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.3775, |
|
"step": 151 |
|
}, |
|
{ |
|
"Batch Mean": 0.6527290344238281, |
|
"accuracy": 0.6875, |
|
"epoch": 0.3775, |
|
"step": 151 |
|
}, |
|
{ |
|
"Batch Mean": 0.3119029998779297, |
|
"accuracy": 0.71875, |
|
"epoch": 0.3775, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 12.294709205627441, |
|
"learning_rate": 1.9578947368421052e-06, |
|
"loss": 0.5262, |
|
"step": 152 |
|
}, |
|
{ |
|
"Batch Mean": 0.3755950927734375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.38, |
|
"step": 152 |
|
}, |
|
{ |
|
"Batch Mean": 0.26468849182128906, |
|
"accuracy": 0.78125, |
|
"epoch": 0.38, |
|
"step": 152 |
|
}, |
|
{ |
|
"Batch Mean": 0.8468475341796875, |
|
"accuracy": 0.90625, |
|
"epoch": 0.38, |
|
"step": 152 |
|
}, |
|
{ |
|
"Batch Mean": 1.1309213638305664, |
|
"accuracy": 0.875, |
|
"epoch": 0.38, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3825, |
|
"grad_norm": 7.226297378540039, |
|
"learning_rate": 1.95e-06, |
|
"loss": 0.3189, |
|
"step": 153 |
|
}, |
|
{ |
|
"Batch Mean": 0.39007568359375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3825, |
|
"step": 153 |
|
}, |
|
{ |
|
"Batch Mean": 0.3533477783203125, |
|
"accuracy": 0.875, |
|
"epoch": 0.3825, |
|
"step": 153 |
|
}, |
|
{ |
|
"Batch Mean": -0.5847704410552979, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3825, |
|
"step": 153 |
|
}, |
|
{ |
|
"Batch Mean": 0.31223154067993164, |
|
"accuracy": 0.75, |
|
"epoch": 0.3825, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.385, |
|
"grad_norm": 9.52422046661377, |
|
"learning_rate": 1.9421052631578947e-06, |
|
"loss": 0.3566, |
|
"step": 154 |
|
}, |
|
{ |
|
"Batch Mean": 0.13152313232421875, |
|
"accuracy": 0.875, |
|
"epoch": 0.385, |
|
"step": 154 |
|
}, |
|
{ |
|
"Batch Mean": 0.8750481009483337, |
|
"accuracy": 0.78125, |
|
"epoch": 0.385, |
|
"step": 154 |
|
}, |
|
{ |
|
"Batch Mean": 0.15985870361328125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.385, |
|
"step": 154 |
|
}, |
|
{ |
|
"Batch Mean": 0.9354896545410156, |
|
"accuracy": 0.75, |
|
"epoch": 0.385, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3875, |
|
"grad_norm": 9.432368278503418, |
|
"learning_rate": 1.9342105263157895e-06, |
|
"loss": 0.4169, |
|
"step": 155 |
|
}, |
|
{ |
|
"Batch Mean": 0.5066986083984375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3875, |
|
"step": 155 |
|
}, |
|
{ |
|
"Batch Mean": 0.2660942077636719, |
|
"accuracy": 0.875, |
|
"epoch": 0.3875, |
|
"step": 155 |
|
}, |
|
{ |
|
"Batch Mean": -0.8412609100341797, |
|
"accuracy": 0.75, |
|
"epoch": 0.3875, |
|
"step": 155 |
|
}, |
|
{ |
|
"Batch Mean": 0.3382682800292969, |
|
"accuracy": 0.6875, |
|
"epoch": 0.3875, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 9.927288055419922, |
|
"learning_rate": 1.926315789473684e-06, |
|
"loss": 0.4555, |
|
"step": 156 |
|
}, |
|
{ |
|
"Batch Mean": 0.5790252685546875, |
|
"accuracy": 0.6875, |
|
"epoch": 0.39, |
|
"step": 156 |
|
}, |
|
{ |
|
"Batch Mean": 0.1602153778076172, |
|
"accuracy": 0.90625, |
|
"epoch": 0.39, |
|
"step": 156 |
|
}, |
|
{ |
|
"Batch Mean": 0.03354644775390625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.39, |
|
"step": 156 |
|
}, |
|
{ |
|
"Batch Mean": -0.5584635734558105, |
|
"accuracy": 0.90625, |
|
"epoch": 0.39, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3925, |
|
"grad_norm": 9.10993480682373, |
|
"learning_rate": 1.918421052631579e-06, |
|
"loss": 0.4378, |
|
"step": 157 |
|
}, |
|
{ |
|
"Batch Mean": 0.1114959716796875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.3925, |
|
"step": 157 |
|
}, |
|
{ |
|
"Batch Mean": 0.6815891265869141, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3925, |
|
"step": 157 |
|
}, |
|
{ |
|
"Batch Mean": -0.0857391357421875, |
|
"accuracy": 0.875, |
|
"epoch": 0.3925, |
|
"step": 157 |
|
}, |
|
{ |
|
"Batch Mean": 0.23114013671875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.3925, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.395, |
|
"grad_norm": 8.423527717590332, |
|
"learning_rate": 1.9105263157894737e-06, |
|
"loss": 0.3948, |
|
"step": 158 |
|
}, |
|
{ |
|
"Batch Mean": -0.17507171630859375, |
|
"accuracy": 0.90625, |
|
"epoch": 0.395, |
|
"step": 158 |
|
}, |
|
{ |
|
"Batch Mean": -0.07347869873046875, |
|
"accuracy": 0.6875, |
|
"epoch": 0.395, |
|
"step": 158 |
|
}, |
|
{ |
|
"Batch Mean": -0.01995086669921875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.395, |
|
"step": 158 |
|
}, |
|
{ |
|
"Batch Mean": -0.12694931030273438, |
|
"accuracy": 0.875, |
|
"epoch": 0.395, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3975, |
|
"grad_norm": 8.319018363952637, |
|
"learning_rate": 1.9026315789473684e-06, |
|
"loss": 0.3905, |
|
"step": 159 |
|
}, |
|
{ |
|
"Batch Mean": 0.07317733764648438, |
|
"accuracy": 0.8125, |
|
"epoch": 0.3975, |
|
"step": 159 |
|
}, |
|
{ |
|
"Batch Mean": 0.09071731567382812, |
|
"accuracy": 0.78125, |
|
"epoch": 0.3975, |
|
"step": 159 |
|
}, |
|
{ |
|
"Batch Mean": -0.0918726921081543, |
|
"accuracy": 0.625, |
|
"epoch": 0.3975, |
|
"step": 159 |
|
}, |
|
{ |
|
"Batch Mean": 0.07117652893066406, |
|
"accuracy": 0.8125, |
|
"epoch": 0.3975, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 11.484334945678711, |
|
"learning_rate": 1.8947368421052632e-06, |
|
"loss": 0.5275, |
|
"step": 160 |
|
}, |
|
{ |
|
"Batch Mean": -1.411625862121582, |
|
"accuracy": 0.875, |
|
"epoch": 0.4, |
|
"step": 160 |
|
}, |
|
{ |
|
"Batch Mean": -0.39847850799560547, |
|
"accuracy": 0.84375, |
|
"epoch": 0.4, |
|
"step": 160 |
|
}, |
|
{ |
|
"Batch Mean": 0.15625762939453125, |
|
"accuracy": 0.875, |
|
"epoch": 0.4, |
|
"step": 160 |
|
}, |
|
{ |
|
"Batch Mean": -0.21668243408203125, |
|
"accuracy": 0.6875, |
|
"epoch": 0.4, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4025, |
|
"grad_norm": 7.37659215927124, |
|
"learning_rate": 1.8868421052631577e-06, |
|
"loss": 0.3667, |
|
"step": 161 |
|
}, |
|
{ |
|
"Batch Mean": -0.7656517028808594, |
|
"accuracy": 0.71875, |
|
"epoch": 0.4025, |
|
"step": 161 |
|
}, |
|
{ |
|
"Batch Mean": -0.780585765838623, |
|
"accuracy": 0.71875, |
|
"epoch": 0.4025, |
|
"step": 161 |
|
}, |
|
{ |
|
"Batch Mean": -1.1507186889648438, |
|
"accuracy": 0.71875, |
|
"epoch": 0.4025, |
|
"step": 161 |
|
}, |
|
{ |
|
"Batch Mean": -0.6291351318359375, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4025, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.405, |
|
"grad_norm": 7.793739318847656, |
|
"learning_rate": 1.8789473684210525e-06, |
|
"loss": 0.4538, |
|
"step": 162 |
|
}, |
|
{ |
|
"Batch Mean": -1.36981201171875, |
|
"accuracy": 0.71875, |
|
"epoch": 0.405, |
|
"step": 162 |
|
}, |
|
{ |
|
"Batch Mean": -0.37451171875, |
|
"accuracy": 0.75, |
|
"epoch": 0.405, |
|
"step": 162 |
|
}, |
|
{ |
|
"Batch Mean": -1.5573692321777344, |
|
"accuracy": 0.84375, |
|
"epoch": 0.405, |
|
"step": 162 |
|
}, |
|
{ |
|
"Batch Mean": -0.6422004699707031, |
|
"accuracy": 0.84375, |
|
"epoch": 0.405, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4075, |
|
"grad_norm": 7.323417663574219, |
|
"learning_rate": 1.8710526315789476e-06, |
|
"loss": 0.4062, |
|
"step": 163 |
|
}, |
|
{ |
|
"Batch Mean": -0.6216545104980469, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4075, |
|
"step": 163 |
|
}, |
|
{ |
|
"Batch Mean": -0.580409049987793, |
|
"accuracy": 0.84375, |
|
"epoch": 0.4075, |
|
"step": 163 |
|
}, |
|
{ |
|
"Batch Mean": -0.6887893676757812, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4075, |
|
"step": 163 |
|
}, |
|
{ |
|
"Batch Mean": -0.2675666809082031, |
|
"accuracy": 0.875, |
|
"epoch": 0.4075, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 6.404051303863525, |
|
"learning_rate": 1.8631578947368424e-06, |
|
"loss": 0.336, |
|
"step": 164 |
|
}, |
|
{ |
|
"Batch Mean": -0.1745471954345703, |
|
"accuracy": 0.8125, |
|
"epoch": 0.41, |
|
"step": 164 |
|
}, |
|
{ |
|
"Batch Mean": -0.9093170166015625, |
|
"accuracy": 0.75, |
|
"epoch": 0.41, |
|
"step": 164 |
|
}, |
|
{ |
|
"Batch Mean": -0.5017061233520508, |
|
"accuracy": 0.84375, |
|
"epoch": 0.41, |
|
"step": 164 |
|
}, |
|
{ |
|
"Batch Mean": -0.788360595703125, |
|
"accuracy": 0.875, |
|
"epoch": 0.41, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4125, |
|
"grad_norm": 6.220415115356445, |
|
"learning_rate": 1.855263157894737e-06, |
|
"loss": 0.3864, |
|
"step": 165 |
|
}, |
|
{ |
|
"Batch Mean": -1.6242752075195312, |
|
"accuracy": 0.6875, |
|
"epoch": 0.4125, |
|
"step": 165 |
|
}, |
|
{ |
|
"Batch Mean": -0.9466705322265625, |
|
"accuracy": 0.71875, |
|
"epoch": 0.4125, |
|
"step": 165 |
|
}, |
|
{ |
|
"Batch Mean": -0.9919929504394531, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4125, |
|
"step": 165 |
|
}, |
|
{ |
|
"Batch Mean": -0.45612335205078125, |
|
"accuracy": 0.875, |
|
"epoch": 0.4125, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.415, |
|
"grad_norm": 7.886701583862305, |
|
"learning_rate": 1.8473684210526317e-06, |
|
"loss": 0.4337, |
|
"step": 166 |
|
}, |
|
{ |
|
"Batch Mean": -0.4734039306640625, |
|
"accuracy": 0.875, |
|
"epoch": 0.415, |
|
"step": 166 |
|
}, |
|
{ |
|
"Batch Mean": -1.4649581909179688, |
|
"accuracy": 0.78125, |
|
"epoch": 0.415, |
|
"step": 166 |
|
}, |
|
{ |
|
"Batch Mean": -0.777379035949707, |
|
"accuracy": 0.78125, |
|
"epoch": 0.415, |
|
"step": 166 |
|
}, |
|
{ |
|
"Batch Mean": -0.9915351867675781, |
|
"accuracy": 0.78125, |
|
"epoch": 0.415, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4175, |
|
"grad_norm": 6.96359395980835, |
|
"learning_rate": 1.8394736842105264e-06, |
|
"loss": 0.4043, |
|
"step": 167 |
|
}, |
|
{ |
|
"Batch Mean": -0.5342636108398438, |
|
"accuracy": 0.9375, |
|
"epoch": 0.4175, |
|
"step": 167 |
|
}, |
|
{ |
|
"Batch Mean": -0.731837272644043, |
|
"accuracy": 0.9375, |
|
"epoch": 0.4175, |
|
"step": 167 |
|
}, |
|
{ |
|
"Batch Mean": -0.6766242980957031, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4175, |
|
"step": 167 |
|
}, |
|
{ |
|
"Batch Mean": -1.1251049041748047, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4175, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 6.297861576080322, |
|
"learning_rate": 1.8315789473684211e-06, |
|
"loss": 0.3258, |
|
"step": 168 |
|
}, |
|
{ |
|
"Batch Mean": -0.7796592712402344, |
|
"accuracy": 0.875, |
|
"epoch": 0.42, |
|
"step": 168 |
|
}, |
|
{ |
|
"Batch Mean": -0.7946243286132812, |
|
"accuracy": 0.8125, |
|
"epoch": 0.42, |
|
"step": 168 |
|
}, |
|
{ |
|
"Batch Mean": -1.006448745727539, |
|
"accuracy": 0.875, |
|
"epoch": 0.42, |
|
"step": 168 |
|
}, |
|
{ |
|
"Batch Mean": -0.7247085571289062, |
|
"accuracy": 0.8125, |
|
"epoch": 0.42, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4225, |
|
"grad_norm": 8.141414642333984, |
|
"learning_rate": 1.8236842105263159e-06, |
|
"loss": 0.4151, |
|
"step": 169 |
|
}, |
|
{ |
|
"Batch Mean": -0.5639686584472656, |
|
"accuracy": 0.65625, |
|
"epoch": 0.4225, |
|
"step": 169 |
|
}, |
|
{ |
|
"Batch Mean": 0.14863204956054688, |
|
"accuracy": 0.875, |
|
"epoch": 0.4225, |
|
"step": 169 |
|
}, |
|
{ |
|
"Batch Mean": -0.011386871337890625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4225, |
|
"step": 169 |
|
}, |
|
{ |
|
"Batch Mean": -0.9041824340820312, |
|
"accuracy": 0.75, |
|
"epoch": 0.4225, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 8.829943656921387, |
|
"learning_rate": 1.8157894736842106e-06, |
|
"loss": 0.4177, |
|
"step": 170 |
|
}, |
|
{ |
|
"Batch Mean": -0.2944221496582031, |
|
"accuracy": 0.90625, |
|
"epoch": 0.425, |
|
"step": 170 |
|
}, |
|
{ |
|
"Batch Mean": 0.0094757080078125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.425, |
|
"step": 170 |
|
}, |
|
{ |
|
"Batch Mean": -0.3963432312011719, |
|
"accuracy": 0.8125, |
|
"epoch": 0.425, |
|
"step": 170 |
|
}, |
|
{ |
|
"Batch Mean": 0.25408172607421875, |
|
"accuracy": 0.875, |
|
"epoch": 0.425, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4275, |
|
"grad_norm": 8.28211498260498, |
|
"learning_rate": 1.8078947368421052e-06, |
|
"loss": 0.3347, |
|
"step": 171 |
|
}, |
|
{ |
|
"Batch Mean": -0.851898193359375, |
|
"accuracy": 0.875, |
|
"epoch": 0.4275, |
|
"step": 171 |
|
}, |
|
{ |
|
"Batch Mean": -0.6146736145019531, |
|
"accuracy": 0.78125, |
|
"epoch": 0.4275, |
|
"step": 171 |
|
}, |
|
{ |
|
"Batch Mean": -0.6027908325195312, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4275, |
|
"step": 171 |
|
}, |
|
{ |
|
"Batch Mean": 0.005615234375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.4275, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 8.386981964111328, |
|
"learning_rate": 1.8e-06, |
|
"loss": 0.3281, |
|
"step": 172 |
|
}, |
|
{ |
|
"Batch Mean": -0.19662857055664062, |
|
"accuracy": 0.78125, |
|
"epoch": 0.43, |
|
"step": 172 |
|
}, |
|
{ |
|
"Batch Mean": -0.06884765625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.43, |
|
"step": 172 |
|
}, |
|
{ |
|
"Batch Mean": 0.07791900634765625, |
|
"accuracy": 0.9375, |
|
"epoch": 0.43, |
|
"step": 172 |
|
}, |
|
{ |
|
"Batch Mean": 0.6075725555419922, |
|
"accuracy": 0.875, |
|
"epoch": 0.43, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4325, |
|
"grad_norm": 9.057024955749512, |
|
"learning_rate": 1.7921052631578947e-06, |
|
"loss": 0.352, |
|
"step": 173 |
|
}, |
|
{ |
|
"Batch Mean": 0.6575965881347656, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4325, |
|
"step": 173 |
|
}, |
|
{ |
|
"Batch Mean": 0.3367919921875, |
|
"accuracy": 0.75, |
|
"epoch": 0.4325, |
|
"step": 173 |
|
}, |
|
{ |
|
"Batch Mean": 0.3530693054199219, |
|
"accuracy": 0.84375, |
|
"epoch": 0.4325, |
|
"step": 173 |
|
}, |
|
{ |
|
"Batch Mean": 0.14144515991210938, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4325, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.435, |
|
"grad_norm": 11.892682075500488, |
|
"learning_rate": 1.7842105263157894e-06, |
|
"loss": 0.4683, |
|
"step": 174 |
|
}, |
|
{ |
|
"Batch Mean": 0.6257972717285156, |
|
"accuracy": 0.78125, |
|
"epoch": 0.435, |
|
"step": 174 |
|
}, |
|
{ |
|
"Batch Mean": 1.0938339233398438, |
|
"accuracy": 0.84375, |
|
"epoch": 0.435, |
|
"step": 174 |
|
}, |
|
{ |
|
"Batch Mean": -0.27980995178222656, |
|
"accuracy": 0.78125, |
|
"epoch": 0.435, |
|
"step": 174 |
|
}, |
|
{ |
|
"Batch Mean": 0.36159515380859375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.435, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 11.794699668884277, |
|
"learning_rate": 1.7763157894736842e-06, |
|
"loss": 0.4776, |
|
"step": 175 |
|
}, |
|
{ |
|
"Batch Mean": 0.036930084228515625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4375, |
|
"step": 175 |
|
}, |
|
{ |
|
"Batch Mean": 0.08300018310546875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.4375, |
|
"step": 175 |
|
}, |
|
{ |
|
"Batch Mean": 1.2978973388671875, |
|
"accuracy": 0.75, |
|
"epoch": 0.4375, |
|
"step": 175 |
|
}, |
|
{ |
|
"Batch Mean": 1.2393798828125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4375, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 9.54262638092041, |
|
"learning_rate": 1.768421052631579e-06, |
|
"loss": 0.3612, |
|
"step": 176 |
|
}, |
|
{ |
|
"Batch Mean": 0.20166015625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.44, |
|
"step": 176 |
|
}, |
|
{ |
|
"Batch Mean": 0.652130126953125, |
|
"accuracy": 0.75, |
|
"epoch": 0.44, |
|
"step": 176 |
|
}, |
|
{ |
|
"Batch Mean": 0.4303455948829651, |
|
"accuracy": 0.875, |
|
"epoch": 0.44, |
|
"step": 176 |
|
}, |
|
{ |
|
"Batch Mean": 0.0682220458984375, |
|
"accuracy": 0.90625, |
|
"epoch": 0.44, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4425, |
|
"grad_norm": 8.427282333374023, |
|
"learning_rate": 1.7605263157894739e-06, |
|
"loss": 0.3154, |
|
"step": 177 |
|
}, |
|
{ |
|
"Batch Mean": 1.085052490234375, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4425, |
|
"step": 177 |
|
}, |
|
{ |
|
"Batch Mean": -0.2761383056640625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4425, |
|
"step": 177 |
|
}, |
|
{ |
|
"Batch Mean": 1.0452957153320312, |
|
"accuracy": 0.84375, |
|
"epoch": 0.4425, |
|
"step": 177 |
|
}, |
|
{ |
|
"Batch Mean": -0.9046804904937744, |
|
"accuracy": 0.875, |
|
"epoch": 0.4425, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.445, |
|
"grad_norm": 8.763394355773926, |
|
"learning_rate": 1.7526315789473686e-06, |
|
"loss": 0.3493, |
|
"step": 178 |
|
}, |
|
{ |
|
"Batch Mean": 1.1603584289550781, |
|
"accuracy": 0.84375, |
|
"epoch": 0.445, |
|
"step": 178 |
|
}, |
|
{ |
|
"Batch Mean": 0.88775634765625, |
|
"accuracy": 0.875, |
|
"epoch": 0.445, |
|
"step": 178 |
|
}, |
|
{ |
|
"Batch Mean": 0.7270965576171875, |
|
"accuracy": 0.9375, |
|
"epoch": 0.445, |
|
"step": 178 |
|
}, |
|
{ |
|
"Batch Mean": 0.650848388671875, |
|
"accuracy": 0.75, |
|
"epoch": 0.445, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4475, |
|
"grad_norm": 9.489066123962402, |
|
"learning_rate": 1.7447368421052633e-06, |
|
"loss": 0.3698, |
|
"step": 179 |
|
}, |
|
{ |
|
"Batch Mean": -0.037494659423828125, |
|
"accuracy": 0.875, |
|
"epoch": 0.4475, |
|
"step": 179 |
|
}, |
|
{ |
|
"Batch Mean": 1.8027801513671875, |
|
"accuracy": 0.6875, |
|
"epoch": 0.4475, |
|
"step": 179 |
|
}, |
|
{ |
|
"Batch Mean": 0.248992919921875, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4475, |
|
"step": 179 |
|
}, |
|
{ |
|
"Batch Mean": 0.5011711120605469, |
|
"accuracy": 0.75, |
|
"epoch": 0.4475, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 10.667496681213379, |
|
"learning_rate": 1.736842105263158e-06, |
|
"loss": 0.3691, |
|
"step": 180 |
|
}, |
|
{ |
|
"Batch Mean": 1.1597976684570312, |
|
"accuracy": 0.90625, |
|
"epoch": 0.45, |
|
"step": 180 |
|
}, |
|
{ |
|
"Batch Mean": -0.5515823364257812, |
|
"accuracy": 0.84375, |
|
"epoch": 0.45, |
|
"step": 180 |
|
}, |
|
{ |
|
"Batch Mean": 0.6664199829101562, |
|
"accuracy": 0.875, |
|
"epoch": 0.45, |
|
"step": 180 |
|
}, |
|
{ |
|
"Batch Mean": 0.7287979125976562, |
|
"accuracy": 0.8125, |
|
"epoch": 0.45, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4525, |
|
"grad_norm": 9.613815307617188, |
|
"learning_rate": 1.7289473684210526e-06, |
|
"loss": 0.3561, |
|
"step": 181 |
|
}, |
|
{ |
|
"Batch Mean": 0.7310791015625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.4525, |
|
"step": 181 |
|
}, |
|
{ |
|
"Batch Mean": 1.0141067504882812, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4525, |
|
"step": 181 |
|
}, |
|
{ |
|
"Batch Mean": 0.2852134704589844, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4525, |
|
"step": 181 |
|
}, |
|
{ |
|
"Batch Mean": 0.8842849731445312, |
|
"accuracy": 0.59375, |
|
"epoch": 0.4525, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.455, |
|
"grad_norm": 10.207335472106934, |
|
"learning_rate": 1.7210526315789474e-06, |
|
"loss": 0.4445, |
|
"step": 182 |
|
}, |
|
{ |
|
"Batch Mean": 0.48291015625, |
|
"accuracy": 0.71875, |
|
"epoch": 0.455, |
|
"step": 182 |
|
}, |
|
{ |
|
"Batch Mean": 0.6722218990325928, |
|
"accuracy": 0.8125, |
|
"epoch": 0.455, |
|
"step": 182 |
|
}, |
|
{ |
|
"Batch Mean": 0.13133621215820312, |
|
"accuracy": 0.84375, |
|
"epoch": 0.455, |
|
"step": 182 |
|
}, |
|
{ |
|
"Batch Mean": 0.532806396484375, |
|
"accuracy": 0.71875, |
|
"epoch": 0.455, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.4575, |
|
"grad_norm": 11.203272819519043, |
|
"learning_rate": 1.7131578947368421e-06, |
|
"loss": 0.4415, |
|
"step": 183 |
|
}, |
|
{ |
|
"Batch Mean": 0.21616744995117188, |
|
"accuracy": 0.75, |
|
"epoch": 0.4575, |
|
"step": 183 |
|
}, |
|
{ |
|
"Batch Mean": 0.7711029052734375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.4575, |
|
"step": 183 |
|
}, |
|
{ |
|
"Batch Mean": 0.8978157043457031, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4575, |
|
"step": 183 |
|
}, |
|
{ |
|
"Batch Mean": 0.8540107607841492, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4575, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 9.341168403625488, |
|
"learning_rate": 1.7052631578947369e-06, |
|
"loss": 0.3346, |
|
"step": 184 |
|
}, |
|
{ |
|
"Batch Mean": -0.21237564086914062, |
|
"accuracy": 0.84375, |
|
"epoch": 0.46, |
|
"step": 184 |
|
}, |
|
{ |
|
"Batch Mean": -0.0823822021484375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.46, |
|
"step": 184 |
|
}, |
|
{ |
|
"Batch Mean": 0.3316192626953125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.46, |
|
"step": 184 |
|
}, |
|
{ |
|
"Batch Mean": 0.41109466552734375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.46, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.4625, |
|
"grad_norm": 10.488679885864258, |
|
"learning_rate": 1.6973684210526316e-06, |
|
"loss": 0.4111, |
|
"step": 185 |
|
}, |
|
{ |
|
"Batch Mean": 0.288543701171875, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4625, |
|
"step": 185 |
|
}, |
|
{ |
|
"Batch Mean": 0.44722747802734375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.4625, |
|
"step": 185 |
|
}, |
|
{ |
|
"Batch Mean": 0.3730621337890625, |
|
"accuracy": 0.875, |
|
"epoch": 0.4625, |
|
"step": 185 |
|
}, |
|
{ |
|
"Batch Mean": 0.18344879150390625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4625, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.465, |
|
"grad_norm": 9.368718147277832, |
|
"learning_rate": 1.6894736842105264e-06, |
|
"loss": 0.3609, |
|
"step": 186 |
|
}, |
|
{ |
|
"Batch Mean": 0.7768054008483887, |
|
"accuracy": 0.9375, |
|
"epoch": 0.465, |
|
"step": 186 |
|
}, |
|
{ |
|
"Batch Mean": 0.3496270179748535, |
|
"accuracy": 0.8125, |
|
"epoch": 0.465, |
|
"step": 186 |
|
}, |
|
{ |
|
"Batch Mean": -0.13415241241455078, |
|
"accuracy": 0.84375, |
|
"epoch": 0.465, |
|
"step": 186 |
|
}, |
|
{ |
|
"Batch Mean": 0.0657806396484375, |
|
"accuracy": 0.75, |
|
"epoch": 0.465, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4675, |
|
"grad_norm": 7.975052833557129, |
|
"learning_rate": 1.6815789473684209e-06, |
|
"loss": 0.3371, |
|
"step": 187 |
|
}, |
|
{ |
|
"Batch Mean": 0.7432992458343506, |
|
"accuracy": 0.78125, |
|
"epoch": 0.4675, |
|
"step": 187 |
|
}, |
|
{ |
|
"Batch Mean": -0.3183479309082031, |
|
"accuracy": 0.9375, |
|
"epoch": 0.4675, |
|
"step": 187 |
|
}, |
|
{ |
|
"Batch Mean": 0.435943603515625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4675, |
|
"step": 187 |
|
}, |
|
{ |
|
"Batch Mean": 0.7551403045654297, |
|
"accuracy": 0.71875, |
|
"epoch": 0.4675, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 9.242426872253418, |
|
"learning_rate": 1.6736842105263156e-06, |
|
"loss": 0.3914, |
|
"step": 188 |
|
}, |
|
{ |
|
"Batch Mean": -0.22603130340576172, |
|
"accuracy": 0.78125, |
|
"epoch": 0.47, |
|
"step": 188 |
|
}, |
|
{ |
|
"Batch Mean": 0.21080398559570312, |
|
"accuracy": 0.8125, |
|
"epoch": 0.47, |
|
"step": 188 |
|
}, |
|
{ |
|
"Batch Mean": 1.2565302848815918, |
|
"accuracy": 0.84375, |
|
"epoch": 0.47, |
|
"step": 188 |
|
}, |
|
{ |
|
"Batch Mean": 0.49529266357421875, |
|
"accuracy": 0.75, |
|
"epoch": 0.47, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4725, |
|
"grad_norm": 10.69687557220459, |
|
"learning_rate": 1.6657894736842104e-06, |
|
"loss": 0.3953, |
|
"step": 189 |
|
}, |
|
{ |
|
"Batch Mean": 0.20034027099609375, |
|
"accuracy": 0.9375, |
|
"epoch": 0.4725, |
|
"step": 189 |
|
}, |
|
{ |
|
"Batch Mean": -0.8117656707763672, |
|
"accuracy": 0.78125, |
|
"epoch": 0.4725, |
|
"step": 189 |
|
}, |
|
{ |
|
"Batch Mean": 0.21240997314453125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.4725, |
|
"step": 189 |
|
}, |
|
{ |
|
"Batch Mean": -0.209259033203125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4725, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 8.423662185668945, |
|
"learning_rate": 1.6578947368421056e-06, |
|
"loss": 0.3507, |
|
"step": 190 |
|
}, |
|
{ |
|
"Batch Mean": -0.4339327812194824, |
|
"accuracy": 0.84375, |
|
"epoch": 0.475, |
|
"step": 190 |
|
}, |
|
{ |
|
"Batch Mean": -0.024566650390625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.475, |
|
"step": 190 |
|
}, |
|
{ |
|
"Batch Mean": 0.2550210952758789, |
|
"accuracy": 0.84375, |
|
"epoch": 0.475, |
|
"step": 190 |
|
}, |
|
{ |
|
"Batch Mean": -0.426666259765625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.475, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4775, |
|
"grad_norm": 7.80448055267334, |
|
"learning_rate": 1.65e-06, |
|
"loss": 0.3527, |
|
"step": 191 |
|
}, |
|
{ |
|
"Batch Mean": -0.26767730712890625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4775, |
|
"step": 191 |
|
}, |
|
{ |
|
"Batch Mean": -0.1922893524169922, |
|
"accuracy": 0.75, |
|
"epoch": 0.4775, |
|
"step": 191 |
|
}, |
|
{ |
|
"Batch Mean": -0.676006555557251, |
|
"accuracy": 0.78125, |
|
"epoch": 0.4775, |
|
"step": 191 |
|
}, |
|
{ |
|
"Batch Mean": 0.487579345703125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.4775, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 8.321696281433105, |
|
"learning_rate": 1.6421052631578948e-06, |
|
"loss": 0.3313, |
|
"step": 192 |
|
}, |
|
{ |
|
"Batch Mean": -0.0352783203125, |
|
"accuracy": 0.875, |
|
"epoch": 0.48, |
|
"step": 192 |
|
}, |
|
{ |
|
"Batch Mean": -0.2429046630859375, |
|
"accuracy": 0.75, |
|
"epoch": 0.48, |
|
"step": 192 |
|
}, |
|
{ |
|
"Batch Mean": -0.06024169921875, |
|
"accuracy": 0.75, |
|
"epoch": 0.48, |
|
"step": 192 |
|
}, |
|
{ |
|
"Batch Mean": -0.0357513427734375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.48, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.4825, |
|
"grad_norm": 8.304304122924805, |
|
"learning_rate": 1.6342105263157896e-06, |
|
"loss": 0.3774, |
|
"step": 193 |
|
}, |
|
{ |
|
"Batch Mean": 0.4979820251464844, |
|
"accuracy": 0.875, |
|
"epoch": 0.4825, |
|
"step": 193 |
|
}, |
|
{ |
|
"Batch Mean": -0.4633331298828125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4825, |
|
"step": 193 |
|
}, |
|
{ |
|
"Batch Mean": -0.16374969482421875, |
|
"accuracy": 0.875, |
|
"epoch": 0.4825, |
|
"step": 193 |
|
}, |
|
{ |
|
"Batch Mean": -0.5900943279266357, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4825, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.485, |
|
"grad_norm": 8.279520988464355, |
|
"learning_rate": 1.6263157894736843e-06, |
|
"loss": 0.3341, |
|
"step": 194 |
|
}, |
|
{ |
|
"Batch Mean": -0.09425163269042969, |
|
"accuracy": 0.84375, |
|
"epoch": 0.485, |
|
"step": 194 |
|
}, |
|
{ |
|
"Batch Mean": -0.13818645477294922, |
|
"accuracy": 0.71875, |
|
"epoch": 0.485, |
|
"step": 194 |
|
}, |
|
{ |
|
"Batch Mean": -1.012359619140625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.485, |
|
"step": 194 |
|
}, |
|
{ |
|
"Batch Mean": 0.1357574462890625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.485, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4875, |
|
"grad_norm": 8.465059280395508, |
|
"learning_rate": 1.618421052631579e-06, |
|
"loss": 0.3864, |
|
"step": 195 |
|
}, |
|
{ |
|
"Batch Mean": 0.15414810180664062, |
|
"accuracy": 0.84375, |
|
"epoch": 0.4875, |
|
"step": 195 |
|
}, |
|
{ |
|
"Batch Mean": -0.1776885986328125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4875, |
|
"step": 195 |
|
}, |
|
{ |
|
"Batch Mean": 0.6926498413085938, |
|
"accuracy": 0.84375, |
|
"epoch": 0.4875, |
|
"step": 195 |
|
}, |
|
{ |
|
"Batch Mean": -0.368560791015625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.4875, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 8.97960376739502, |
|
"learning_rate": 1.6105263157894738e-06, |
|
"loss": 0.3509, |
|
"step": 196 |
|
}, |
|
{ |
|
"Batch Mean": 0.1314239501953125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.49, |
|
"step": 196 |
|
}, |
|
{ |
|
"Batch Mean": 0.5386600494384766, |
|
"accuracy": 0.78125, |
|
"epoch": 0.49, |
|
"step": 196 |
|
}, |
|
{ |
|
"Batch Mean": -0.04267120361328125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.49, |
|
"step": 196 |
|
}, |
|
{ |
|
"Batch Mean": -0.118682861328125, |
|
"accuracy": 0.75, |
|
"epoch": 0.49, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4925, |
|
"grad_norm": 9.914933204650879, |
|
"learning_rate": 1.6026315789473683e-06, |
|
"loss": 0.4482, |
|
"step": 197 |
|
}, |
|
{ |
|
"Batch Mean": 0.4810333251953125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.4925, |
|
"step": 197 |
|
}, |
|
{ |
|
"Batch Mean": -0.2501678466796875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.4925, |
|
"step": 197 |
|
}, |
|
{ |
|
"Batch Mean": 0.2891963720321655, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4925, |
|
"step": 197 |
|
}, |
|
{ |
|
"Batch Mean": 1.520294189453125, |
|
"accuracy": 0.875, |
|
"epoch": 0.4925, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.495, |
|
"grad_norm": 11.716139793395996, |
|
"learning_rate": 1.594736842105263e-06, |
|
"loss": 0.3593, |
|
"step": 198 |
|
}, |
|
{ |
|
"Batch Mean": 0.05594635009765625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.495, |
|
"step": 198 |
|
}, |
|
{ |
|
"Batch Mean": 0.24084854125976562, |
|
"accuracy": 0.84375, |
|
"epoch": 0.495, |
|
"step": 198 |
|
}, |
|
{ |
|
"Batch Mean": 0.1896038055419922, |
|
"accuracy": 0.90625, |
|
"epoch": 0.495, |
|
"step": 198 |
|
}, |
|
{ |
|
"Batch Mean": 0.554718017578125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.495, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.4975, |
|
"grad_norm": 8.381601333618164, |
|
"learning_rate": 1.5868421052631578e-06, |
|
"loss": 0.3333, |
|
"step": 199 |
|
}, |
|
{ |
|
"Batch Mean": 0.05019569396972656, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4975, |
|
"step": 199 |
|
}, |
|
{ |
|
"Batch Mean": 0.7256813049316406, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4975, |
|
"step": 199 |
|
}, |
|
{ |
|
"Batch Mean": 1.0603713989257812, |
|
"accuracy": 0.96875, |
|
"epoch": 0.4975, |
|
"step": 199 |
|
}, |
|
{ |
|
"Batch Mean": 0.70733642578125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.4975, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 7.317599773406982, |
|
"learning_rate": 1.5789473684210526e-06, |
|
"loss": 0.2282, |
|
"step": 200 |
|
}, |
|
{ |
|
"Batch Mean": 0.7157418727874756, |
|
"accuracy": 0.75, |
|
"epoch": 0.5, |
|
"step": 200 |
|
}, |
|
{ |
|
"Batch Mean": 0.44158172607421875, |
|
"accuracy": 0.90625, |
|
"epoch": 0.5, |
|
"step": 200 |
|
}, |
|
{ |
|
"Batch Mean": 0.82073974609375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.5, |
|
"step": 200 |
|
}, |
|
{ |
|
"Batch Mean": 0.3656768798828125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.5, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5025, |
|
"grad_norm": 9.914247512817383, |
|
"learning_rate": 1.5710526315789473e-06, |
|
"loss": 0.3714, |
|
"step": 201 |
|
}, |
|
{ |
|
"Batch Mean": 2.0449066162109375, |
|
"accuracy": 0.75, |
|
"epoch": 0.5025, |
|
"step": 201 |
|
}, |
|
{ |
|
"Batch Mean": 1.1046676635742188, |
|
"accuracy": 0.9375, |
|
"epoch": 0.5025, |
|
"step": 201 |
|
}, |
|
{ |
|
"Batch Mean": 0.9132452011108398, |
|
"accuracy": 0.875, |
|
"epoch": 0.5025, |
|
"step": 201 |
|
}, |
|
{ |
|
"Batch Mean": -0.29121971130371094, |
|
"accuracy": 0.875, |
|
"epoch": 0.5025, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.505, |
|
"grad_norm": 8.482183456420898, |
|
"learning_rate": 1.563157894736842e-06, |
|
"loss": 0.2812, |
|
"step": 202 |
|
}, |
|
{ |
|
"Batch Mean": 1.1230392456054688, |
|
"accuracy": 0.78125, |
|
"epoch": 0.505, |
|
"step": 202 |
|
}, |
|
{ |
|
"Batch Mean": 1.31500244140625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.505, |
|
"step": 202 |
|
}, |
|
{ |
|
"Batch Mean": 1.9371337890625, |
|
"accuracy": 0.9375, |
|
"epoch": 0.505, |
|
"step": 202 |
|
}, |
|
{ |
|
"Batch Mean": 1.4601593017578125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.505, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5075, |
|
"grad_norm": 9.382710456848145, |
|
"learning_rate": 1.5552631578947368e-06, |
|
"loss": 0.3238, |
|
"step": 203 |
|
}, |
|
{ |
|
"Batch Mean": 2.134002685546875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5075, |
|
"step": 203 |
|
}, |
|
{ |
|
"Batch Mean": 1.7428550720214844, |
|
"accuracy": 0.875, |
|
"epoch": 0.5075, |
|
"step": 203 |
|
}, |
|
{ |
|
"Batch Mean": 1.4754257202148438, |
|
"accuracy": 0.90625, |
|
"epoch": 0.5075, |
|
"step": 203 |
|
}, |
|
{ |
|
"Batch Mean": 1.1280174255371094, |
|
"accuracy": 0.96875, |
|
"epoch": 0.5075, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 9.43131160736084, |
|
"learning_rate": 1.5473684210526318e-06, |
|
"loss": 0.2696, |
|
"step": 204 |
|
}, |
|
{ |
|
"Batch Mean": 2.1200408935546875, |
|
"accuracy": 0.875, |
|
"epoch": 0.51, |
|
"step": 204 |
|
}, |
|
{ |
|
"Batch Mean": 2.031508445739746, |
|
"accuracy": 0.84375, |
|
"epoch": 0.51, |
|
"step": 204 |
|
}, |
|
{ |
|
"Batch Mean": 2.3471832275390625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.51, |
|
"step": 204 |
|
}, |
|
{ |
|
"Batch Mean": 1.75640869140625, |
|
"accuracy": 0.875, |
|
"epoch": 0.51, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5125, |
|
"grad_norm": 10.774242401123047, |
|
"learning_rate": 1.5394736842105265e-06, |
|
"loss": 0.3529, |
|
"step": 205 |
|
}, |
|
{ |
|
"Batch Mean": 3.005859375, |
|
"accuracy": 0.6875, |
|
"epoch": 0.5125, |
|
"step": 205 |
|
}, |
|
{ |
|
"Batch Mean": 2.5205230712890625, |
|
"accuracy": 0.75, |
|
"epoch": 0.5125, |
|
"step": 205 |
|
}, |
|
{ |
|
"Batch Mean": 1.7466964721679688, |
|
"accuracy": 0.78125, |
|
"epoch": 0.5125, |
|
"step": 205 |
|
}, |
|
{ |
|
"Batch Mean": 2.30712890625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.5125, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.515, |
|
"grad_norm": 16.1846981048584, |
|
"learning_rate": 1.5315789473684213e-06, |
|
"loss": 0.5315, |
|
"step": 206 |
|
}, |
|
{ |
|
"Batch Mean": 3.2044105529785156, |
|
"accuracy": 0.8125, |
|
"epoch": 0.515, |
|
"step": 206 |
|
}, |
|
{ |
|
"Batch Mean": 1.8886871337890625, |
|
"accuracy": 0.875, |
|
"epoch": 0.515, |
|
"step": 206 |
|
}, |
|
{ |
|
"Batch Mean": 2.1570053100585938, |
|
"accuracy": 0.90625, |
|
"epoch": 0.515, |
|
"step": 206 |
|
}, |
|
{ |
|
"Batch Mean": 1.6560897827148438, |
|
"accuracy": 0.8125, |
|
"epoch": 0.515, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5175, |
|
"grad_norm": 9.28087043762207, |
|
"learning_rate": 1.5236842105263158e-06, |
|
"loss": 0.3136, |
|
"step": 207 |
|
}, |
|
{ |
|
"Batch Mean": 1.535853385925293, |
|
"accuracy": 0.71875, |
|
"epoch": 0.5175, |
|
"step": 207 |
|
}, |
|
{ |
|
"Batch Mean": 2.13421630859375, |
|
"accuracy": 0.75, |
|
"epoch": 0.5175, |
|
"step": 207 |
|
}, |
|
{ |
|
"Batch Mean": 1.4612560272216797, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5175, |
|
"step": 207 |
|
}, |
|
{ |
|
"Batch Mean": 1.895176887512207, |
|
"accuracy": 0.9375, |
|
"epoch": 0.5175, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 11.444698333740234, |
|
"learning_rate": 1.5157894736842105e-06, |
|
"loss": 0.3901, |
|
"step": 208 |
|
}, |
|
{ |
|
"Batch Mean": 1.9641056060791016, |
|
"accuracy": 0.90625, |
|
"epoch": 0.52, |
|
"step": 208 |
|
}, |
|
{ |
|
"Batch Mean": 1.5080509185791016, |
|
"accuracy": 0.96875, |
|
"epoch": 0.52, |
|
"step": 208 |
|
}, |
|
{ |
|
"Batch Mean": 1.1202011108398438, |
|
"accuracy": 0.84375, |
|
"epoch": 0.52, |
|
"step": 208 |
|
}, |
|
{ |
|
"Batch Mean": 1.5916824340820312, |
|
"accuracy": 0.8125, |
|
"epoch": 0.52, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5225, |
|
"grad_norm": 8.556312561035156, |
|
"learning_rate": 1.5078947368421053e-06, |
|
"loss": 0.2412, |
|
"step": 209 |
|
}, |
|
{ |
|
"Batch Mean": 1.2708587646484375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5225, |
|
"step": 209 |
|
}, |
|
{ |
|
"Batch Mean": 1.7554092407226562, |
|
"accuracy": 0.78125, |
|
"epoch": 0.5225, |
|
"step": 209 |
|
}, |
|
{ |
|
"Batch Mean": 1.596282958984375, |
|
"accuracy": 0.90625, |
|
"epoch": 0.5225, |
|
"step": 209 |
|
}, |
|
{ |
|
"Batch Mean": 2.2964630126953125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5225, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 10.603358268737793, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.4092, |
|
"step": 210 |
|
}, |
|
{ |
|
"Batch Mean": 1.58636474609375, |
|
"accuracy": 0.65625, |
|
"epoch": 0.525, |
|
"step": 210 |
|
}, |
|
{ |
|
"Batch Mean": 1.4641342163085938, |
|
"accuracy": 0.90625, |
|
"epoch": 0.525, |
|
"step": 210 |
|
}, |
|
{ |
|
"Batch Mean": 1.2256956100463867, |
|
"accuracy": 0.875, |
|
"epoch": 0.525, |
|
"step": 210 |
|
}, |
|
{ |
|
"Batch Mean": 1.890106201171875, |
|
"accuracy": 0.9375, |
|
"epoch": 0.525, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5275, |
|
"grad_norm": 9.736684799194336, |
|
"learning_rate": 1.4921052631578948e-06, |
|
"loss": 0.3204, |
|
"step": 211 |
|
}, |
|
{ |
|
"Batch Mean": 0.8585739135742188, |
|
"accuracy": 0.875, |
|
"epoch": 0.5275, |
|
"step": 211 |
|
}, |
|
{ |
|
"Batch Mean": 1.5339336395263672, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5275, |
|
"step": 211 |
|
}, |
|
{ |
|
"Batch Mean": 1.3365402221679688, |
|
"accuracy": 0.75, |
|
"epoch": 0.5275, |
|
"step": 211 |
|
}, |
|
{ |
|
"Batch Mean": 1.0759520530700684, |
|
"accuracy": 0.78125, |
|
"epoch": 0.5275, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 12.381434440612793, |
|
"learning_rate": 1.4842105263157895e-06, |
|
"loss": 0.4277, |
|
"step": 212 |
|
}, |
|
{ |
|
"Batch Mean": 1.0791473388671875, |
|
"accuracy": 0.75, |
|
"epoch": 0.53, |
|
"step": 212 |
|
}, |
|
{ |
|
"Batch Mean": 1.3099441528320312, |
|
"accuracy": 0.8125, |
|
"epoch": 0.53, |
|
"step": 212 |
|
}, |
|
{ |
|
"Batch Mean": 1.1486282348632812, |
|
"accuracy": 0.75, |
|
"epoch": 0.53, |
|
"step": 212 |
|
}, |
|
{ |
|
"Batch Mean": 0.118804931640625, |
|
"accuracy": 0.875, |
|
"epoch": 0.53, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5325, |
|
"grad_norm": 12.662047386169434, |
|
"learning_rate": 1.4763157894736843e-06, |
|
"loss": 0.4684, |
|
"step": 213 |
|
}, |
|
{ |
|
"Batch Mean": 0.7102508544921875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5325, |
|
"step": 213 |
|
}, |
|
{ |
|
"Batch Mean": 0.9431266784667969, |
|
"accuracy": 0.875, |
|
"epoch": 0.5325, |
|
"step": 213 |
|
}, |
|
{ |
|
"Batch Mean": 0.718994140625, |
|
"accuracy": 0.875, |
|
"epoch": 0.5325, |
|
"step": 213 |
|
}, |
|
{ |
|
"Batch Mean": 1.0906906127929688, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5325, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.535, |
|
"grad_norm": 11.220440864562988, |
|
"learning_rate": 1.468421052631579e-06, |
|
"loss": 0.362, |
|
"step": 214 |
|
}, |
|
{ |
|
"Batch Mean": 1.2099227905273438, |
|
"accuracy": 0.90625, |
|
"epoch": 0.535, |
|
"step": 214 |
|
}, |
|
{ |
|
"Batch Mean": 0.4810771942138672, |
|
"accuracy": 0.8125, |
|
"epoch": 0.535, |
|
"step": 214 |
|
}, |
|
{ |
|
"Batch Mean": 1.1623077392578125, |
|
"accuracy": 0.875, |
|
"epoch": 0.535, |
|
"step": 214 |
|
}, |
|
{ |
|
"Batch Mean": 0.9495391845703125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.535, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5375, |
|
"grad_norm": 10.310274124145508, |
|
"learning_rate": 1.4605263157894738e-06, |
|
"loss": 0.3185, |
|
"step": 215 |
|
}, |
|
{ |
|
"Batch Mean": 1.64483642578125, |
|
"accuracy": 0.875, |
|
"epoch": 0.5375, |
|
"step": 215 |
|
}, |
|
{ |
|
"Batch Mean": 1.425384521484375, |
|
"accuracy": 0.71875, |
|
"epoch": 0.5375, |
|
"step": 215 |
|
}, |
|
{ |
|
"Batch Mean": 1.1650314331054688, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5375, |
|
"step": 215 |
|
}, |
|
{ |
|
"Batch Mean": 0.7734451293945312, |
|
"accuracy": 0.90625, |
|
"epoch": 0.5375, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 9.699481964111328, |
|
"learning_rate": 1.4526315789473685e-06, |
|
"loss": 0.3705, |
|
"step": 216 |
|
}, |
|
{ |
|
"Batch Mean": 0.2362499237060547, |
|
"accuracy": 0.875, |
|
"epoch": 0.54, |
|
"step": 216 |
|
}, |
|
{ |
|
"Batch Mean": 0.6925016641616821, |
|
"accuracy": 0.84375, |
|
"epoch": 0.54, |
|
"step": 216 |
|
}, |
|
{ |
|
"Batch Mean": 0.17254638671875, |
|
"accuracy": 0.75, |
|
"epoch": 0.54, |
|
"step": 216 |
|
}, |
|
{ |
|
"Batch Mean": 0.7945098876953125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.54, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5425, |
|
"grad_norm": 8.40283203125, |
|
"learning_rate": 1.4447368421052633e-06, |
|
"loss": 0.3253, |
|
"step": 217 |
|
}, |
|
{ |
|
"Batch Mean": 0.285064697265625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5425, |
|
"step": 217 |
|
}, |
|
{ |
|
"Batch Mean": -0.3369560241699219, |
|
"accuracy": 0.71875, |
|
"epoch": 0.5425, |
|
"step": 217 |
|
}, |
|
{ |
|
"Batch Mean": 0.433685302734375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5425, |
|
"step": 217 |
|
}, |
|
{ |
|
"Batch Mean": -0.8839111328125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.5425, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.545, |
|
"grad_norm": 9.901025772094727, |
|
"learning_rate": 1.436842105263158e-06, |
|
"loss": 0.4004, |
|
"step": 218 |
|
}, |
|
{ |
|
"Batch Mean": -0.3134193420410156, |
|
"accuracy": 0.78125, |
|
"epoch": 0.545, |
|
"step": 218 |
|
}, |
|
{ |
|
"Batch Mean": 0.3160438537597656, |
|
"accuracy": 0.8125, |
|
"epoch": 0.545, |
|
"step": 218 |
|
}, |
|
{ |
|
"Batch Mean": 0.38923072814941406, |
|
"accuracy": 0.875, |
|
"epoch": 0.545, |
|
"step": 218 |
|
}, |
|
{ |
|
"Batch Mean": -0.12998676300048828, |
|
"accuracy": 0.8125, |
|
"epoch": 0.545, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5475, |
|
"grad_norm": 8.9075345993042, |
|
"learning_rate": 1.4289473684210525e-06, |
|
"loss": 0.3749, |
|
"step": 219 |
|
}, |
|
{ |
|
"Batch Mean": -0.24931716918945312, |
|
"accuracy": 0.9375, |
|
"epoch": 0.5475, |
|
"step": 219 |
|
}, |
|
{ |
|
"Batch Mean": 0.47113037109375, |
|
"accuracy": 0.875, |
|
"epoch": 0.5475, |
|
"step": 219 |
|
}, |
|
{ |
|
"Batch Mean": -0.0037775039672851562, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5475, |
|
"step": 219 |
|
}, |
|
{ |
|
"Batch Mean": -0.5166091918945312, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5475, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 8.356500625610352, |
|
"learning_rate": 1.4210526315789473e-06, |
|
"loss": 0.3279, |
|
"step": 220 |
|
}, |
|
{ |
|
"Batch Mean": -0.5496654510498047, |
|
"accuracy": 0.8125, |
|
"epoch": 0.55, |
|
"step": 220 |
|
}, |
|
{ |
|
"Batch Mean": 0.20181941986083984, |
|
"accuracy": 0.78125, |
|
"epoch": 0.55, |
|
"step": 220 |
|
}, |
|
{ |
|
"Batch Mean": -0.54656982421875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.55, |
|
"step": 220 |
|
}, |
|
{ |
|
"Batch Mean": -0.30051422119140625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.55, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5525, |
|
"grad_norm": 7.921041965484619, |
|
"learning_rate": 1.4131578947368422e-06, |
|
"loss": 0.341, |
|
"step": 221 |
|
}, |
|
{ |
|
"Batch Mean": -0.13405609130859375, |
|
"accuracy": 0.90625, |
|
"epoch": 0.5525, |
|
"step": 221 |
|
}, |
|
{ |
|
"Batch Mean": 0.010646820068359375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5525, |
|
"step": 221 |
|
}, |
|
{ |
|
"Batch Mean": 0.3812522888183594, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5525, |
|
"step": 221 |
|
}, |
|
{ |
|
"Batch Mean": -0.5361175537109375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.5525, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.555, |
|
"grad_norm": 8.19301986694336, |
|
"learning_rate": 1.405263157894737e-06, |
|
"loss": 0.3547, |
|
"step": 222 |
|
}, |
|
{ |
|
"Batch Mean": 0.10836279392242432, |
|
"accuracy": 0.8125, |
|
"epoch": 0.555, |
|
"step": 222 |
|
}, |
|
{ |
|
"Batch Mean": -0.5170745849609375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.555, |
|
"step": 222 |
|
}, |
|
{ |
|
"Batch Mean": 0.1517333984375, |
|
"accuracy": 0.875, |
|
"epoch": 0.555, |
|
"step": 222 |
|
}, |
|
{ |
|
"Batch Mean": -0.07366180419921875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.555, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5575, |
|
"grad_norm": 7.958079814910889, |
|
"learning_rate": 1.3973684210526317e-06, |
|
"loss": 0.346, |
|
"step": 223 |
|
}, |
|
{ |
|
"Batch Mean": 0.05038261413574219, |
|
"accuracy": 0.78125, |
|
"epoch": 0.5575, |
|
"step": 223 |
|
}, |
|
{ |
|
"Batch Mean": 0.45386505126953125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5575, |
|
"step": 223 |
|
}, |
|
{ |
|
"Batch Mean": -0.5984153747558594, |
|
"accuracy": 0.78125, |
|
"epoch": 0.5575, |
|
"step": 223 |
|
}, |
|
{ |
|
"Batch Mean": -0.6059384346008301, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5575, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 8.758768081665039, |
|
"learning_rate": 1.3894736842105263e-06, |
|
"loss": 0.3879, |
|
"step": 224 |
|
}, |
|
{ |
|
"Batch Mean": -0.15983963012695312, |
|
"accuracy": 0.875, |
|
"epoch": 0.56, |
|
"step": 224 |
|
}, |
|
{ |
|
"Batch Mean": -0.44620513916015625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.56, |
|
"step": 224 |
|
}, |
|
{ |
|
"Batch Mean": 0.8515167236328125, |
|
"accuracy": 0.71875, |
|
"epoch": 0.56, |
|
"step": 224 |
|
}, |
|
{ |
|
"Batch Mean": -0.22232818603515625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.56, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 8.232926368713379, |
|
"learning_rate": 1.381578947368421e-06, |
|
"loss": 0.3441, |
|
"step": 225 |
|
}, |
|
{ |
|
"Batch Mean": -0.40540122985839844, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5625, |
|
"step": 225 |
|
}, |
|
{ |
|
"Batch Mean": -0.0818939208984375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5625, |
|
"step": 225 |
|
}, |
|
{ |
|
"Batch Mean": -0.104736328125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5625, |
|
"step": 225 |
|
}, |
|
{ |
|
"Batch Mean": 0.058429718017578125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.5625, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.565, |
|
"grad_norm": 8.037442207336426, |
|
"learning_rate": 1.3736842105263158e-06, |
|
"loss": 0.328, |
|
"step": 226 |
|
}, |
|
{ |
|
"Batch Mean": -0.6231307983398438, |
|
"accuracy": 0.78125, |
|
"epoch": 0.565, |
|
"step": 226 |
|
}, |
|
{ |
|
"Batch Mean": -0.10566139221191406, |
|
"accuracy": 0.75, |
|
"epoch": 0.565, |
|
"step": 226 |
|
}, |
|
{ |
|
"Batch Mean": -0.45677947998046875, |
|
"accuracy": 0.75, |
|
"epoch": 0.565, |
|
"step": 226 |
|
}, |
|
{ |
|
"Batch Mean": 0.34828758239746094, |
|
"accuracy": 0.875, |
|
"epoch": 0.565, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5675, |
|
"grad_norm": 10.482982635498047, |
|
"learning_rate": 1.3657894736842107e-06, |
|
"loss": 0.3896, |
|
"step": 227 |
|
}, |
|
{ |
|
"Batch Mean": -0.6079559326171875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5675, |
|
"step": 227 |
|
}, |
|
{ |
|
"Batch Mean": 0.6353607177734375, |
|
"accuracy": 0.90625, |
|
"epoch": 0.5675, |
|
"step": 227 |
|
}, |
|
{ |
|
"Batch Mean": -0.0821685791015625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.5675, |
|
"step": 227 |
|
}, |
|
{ |
|
"Batch Mean": -0.6024322509765625, |
|
"accuracy": 0.875, |
|
"epoch": 0.5675, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 9.87027359008789, |
|
"learning_rate": 1.3578947368421055e-06, |
|
"loss": 0.4043, |
|
"step": 228 |
|
}, |
|
{ |
|
"Batch Mean": 0.01294708251953125, |
|
"accuracy": 0.75, |
|
"epoch": 0.57, |
|
"step": 228 |
|
}, |
|
{ |
|
"Batch Mean": -0.0946044921875, |
|
"accuracy": 0.75, |
|
"epoch": 0.57, |
|
"step": 228 |
|
}, |
|
{ |
|
"Batch Mean": 0.2442779541015625, |
|
"accuracy": 0.75, |
|
"epoch": 0.57, |
|
"step": 228 |
|
}, |
|
{ |
|
"Batch Mean": -0.8115768432617188, |
|
"accuracy": 0.6875, |
|
"epoch": 0.57, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5725, |
|
"grad_norm": 10.491792678833008, |
|
"learning_rate": 1.35e-06, |
|
"loss": 0.5172, |
|
"step": 229 |
|
}, |
|
{ |
|
"Batch Mean": -0.7526817321777344, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5725, |
|
"step": 229 |
|
}, |
|
{ |
|
"Batch Mean": -0.886474609375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5725, |
|
"step": 229 |
|
}, |
|
{ |
|
"Batch Mean": 0.230804443359375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5725, |
|
"step": 229 |
|
}, |
|
{ |
|
"Batch Mean": -0.021968364715576172, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5725, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 8.642767906188965, |
|
"learning_rate": 1.3421052631578947e-06, |
|
"loss": 0.41, |
|
"step": 230 |
|
}, |
|
{ |
|
"Batch Mean": -0.39715576171875, |
|
"accuracy": 0.90625, |
|
"epoch": 0.575, |
|
"step": 230 |
|
}, |
|
{ |
|
"Batch Mean": -0.4642791748046875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.575, |
|
"step": 230 |
|
}, |
|
{ |
|
"Batch Mean": -0.17293310165405273, |
|
"accuracy": 0.84375, |
|
"epoch": 0.575, |
|
"step": 230 |
|
}, |
|
{ |
|
"Batch Mean": 0.32227325439453125, |
|
"accuracy": 0.875, |
|
"epoch": 0.575, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5775, |
|
"grad_norm": 7.04720401763916, |
|
"learning_rate": 1.3342105263157895e-06, |
|
"loss": 0.292, |
|
"step": 231 |
|
}, |
|
{ |
|
"Batch Mean": -0.10285186767578125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.5775, |
|
"step": 231 |
|
}, |
|
{ |
|
"Batch Mean": -0.19066619873046875, |
|
"accuracy": 0.6875, |
|
"epoch": 0.5775, |
|
"step": 231 |
|
}, |
|
{ |
|
"Batch Mean": -0.33397674560546875, |
|
"accuracy": 0.9375, |
|
"epoch": 0.5775, |
|
"step": 231 |
|
}, |
|
{ |
|
"Batch Mean": 0.04266357421875, |
|
"accuracy": 0.75, |
|
"epoch": 0.5775, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 10.38796329498291, |
|
"learning_rate": 1.3263157894736842e-06, |
|
"loss": 0.4887, |
|
"step": 232 |
|
}, |
|
{ |
|
"Batch Mean": -0.6411285400390625, |
|
"accuracy": 0.875, |
|
"epoch": 0.58, |
|
"step": 232 |
|
}, |
|
{ |
|
"Batch Mean": -0.6589622497558594, |
|
"accuracy": 0.875, |
|
"epoch": 0.58, |
|
"step": 232 |
|
}, |
|
{ |
|
"Batch Mean": -0.6592941284179688, |
|
"accuracy": 0.78125, |
|
"epoch": 0.58, |
|
"step": 232 |
|
}, |
|
{ |
|
"Batch Mean": -0.2879180908203125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.58, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5825, |
|
"grad_norm": 7.456547737121582, |
|
"learning_rate": 1.318421052631579e-06, |
|
"loss": 0.3232, |
|
"step": 233 |
|
}, |
|
{ |
|
"Batch Mean": -0.6381454467773438, |
|
"accuracy": 0.75, |
|
"epoch": 0.5825, |
|
"step": 233 |
|
}, |
|
{ |
|
"Batch Mean": -0.38673877716064453, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5825, |
|
"step": 233 |
|
}, |
|
{ |
|
"Batch Mean": -0.5200033187866211, |
|
"accuracy": 0.96875, |
|
"epoch": 0.5825, |
|
"step": 233 |
|
}, |
|
{ |
|
"Batch Mean": 0.268524169921875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5825, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.585, |
|
"grad_norm": 8.48525619506836, |
|
"learning_rate": 1.3105263157894737e-06, |
|
"loss": 0.3436, |
|
"step": 234 |
|
}, |
|
{ |
|
"Batch Mean": -0.35057640075683594, |
|
"accuracy": 0.875, |
|
"epoch": 0.585, |
|
"step": 234 |
|
}, |
|
{ |
|
"Batch Mean": -0.6433639526367188, |
|
"accuracy": 0.875, |
|
"epoch": 0.585, |
|
"step": 234 |
|
}, |
|
{ |
|
"Batch Mean": -0.2548065185546875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.585, |
|
"step": 234 |
|
}, |
|
{ |
|
"Batch Mean": -0.7379226684570312, |
|
"accuracy": 0.78125, |
|
"epoch": 0.585, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5875, |
|
"grad_norm": 7.986849784851074, |
|
"learning_rate": 1.3026315789473685e-06, |
|
"loss": 0.363, |
|
"step": 235 |
|
}, |
|
{ |
|
"Batch Mean": 0.220703125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5875, |
|
"step": 235 |
|
}, |
|
{ |
|
"Batch Mean": -0.40514373779296875, |
|
"accuracy": 0.75, |
|
"epoch": 0.5875, |
|
"step": 235 |
|
}, |
|
{ |
|
"Batch Mean": -0.6641845703125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5875, |
|
"step": 235 |
|
}, |
|
{ |
|
"Batch Mean": -0.18366241455078125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5875, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 8.324469566345215, |
|
"learning_rate": 1.2947368421052632e-06, |
|
"loss": 0.3479, |
|
"step": 236 |
|
}, |
|
{ |
|
"Batch Mean": -0.4942283630371094, |
|
"accuracy": 0.71875, |
|
"epoch": 0.59, |
|
"step": 236 |
|
}, |
|
{ |
|
"Batch Mean": -0.4511871337890625, |
|
"accuracy": 0.875, |
|
"epoch": 0.59, |
|
"step": 236 |
|
}, |
|
{ |
|
"Batch Mean": -0.2188549041748047, |
|
"accuracy": 0.75, |
|
"epoch": 0.59, |
|
"step": 236 |
|
}, |
|
{ |
|
"Batch Mean": -0.431976318359375, |
|
"accuracy": 0.875, |
|
"epoch": 0.59, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.5925, |
|
"grad_norm": 9.84953784942627, |
|
"learning_rate": 1.286842105263158e-06, |
|
"loss": 0.3883, |
|
"step": 237 |
|
}, |
|
{ |
|
"Batch Mean": -0.22838401794433594, |
|
"accuracy": 0.6875, |
|
"epoch": 0.5925, |
|
"step": 237 |
|
}, |
|
{ |
|
"Batch Mean": 0.27164459228515625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5925, |
|
"step": 237 |
|
}, |
|
{ |
|
"Batch Mean": 0.04124641418457031, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5925, |
|
"step": 237 |
|
}, |
|
{ |
|
"Batch Mean": -0.847412109375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.5925, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.595, |
|
"grad_norm": 8.254185676574707, |
|
"learning_rate": 1.2789473684210527e-06, |
|
"loss": 0.3768, |
|
"step": 238 |
|
}, |
|
{ |
|
"Batch Mean": -0.4527606964111328, |
|
"accuracy": 0.90625, |
|
"epoch": 0.595, |
|
"step": 238 |
|
}, |
|
{ |
|
"Batch Mean": -0.35851097106933594, |
|
"accuracy": 0.75, |
|
"epoch": 0.595, |
|
"step": 238 |
|
}, |
|
{ |
|
"Batch Mean": -0.08063125610351562, |
|
"accuracy": 0.84375, |
|
"epoch": 0.595, |
|
"step": 238 |
|
}, |
|
{ |
|
"Batch Mean": 0.1613616943359375, |
|
"accuracy": 0.875, |
|
"epoch": 0.595, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5975, |
|
"grad_norm": 9.05409049987793, |
|
"learning_rate": 1.2710526315789474e-06, |
|
"loss": 0.3697, |
|
"step": 239 |
|
}, |
|
{ |
|
"Batch Mean": 0.2474193572998047, |
|
"accuracy": 0.78125, |
|
"epoch": 0.5975, |
|
"step": 239 |
|
}, |
|
{ |
|
"Batch Mean": 1.1201629638671875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5975, |
|
"step": 239 |
|
}, |
|
{ |
|
"Batch Mean": 0.15471649169921875, |
|
"accuracy": 0.71875, |
|
"epoch": 0.5975, |
|
"step": 239 |
|
}, |
|
{ |
|
"Batch Mean": -0.5026946067810059, |
|
"accuracy": 0.84375, |
|
"epoch": 0.5975, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 8.425403594970703, |
|
"learning_rate": 1.263157894736842e-06, |
|
"loss": 0.34, |
|
"step": 240 |
|
}, |
|
{ |
|
"Batch Mean": 0.4770984649658203, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6, |
|
"step": 240 |
|
}, |
|
{ |
|
"Batch Mean": 0.06944465637207031, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6, |
|
"step": 240 |
|
}, |
|
{ |
|
"Batch Mean": 0.487091064453125, |
|
"accuracy": 0.9375, |
|
"epoch": 0.6, |
|
"step": 240 |
|
}, |
|
{ |
|
"Batch Mean": 0.026515960693359375, |
|
"accuracy": 0.65625, |
|
"epoch": 0.6, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6025, |
|
"grad_norm": 8.503494262695312, |
|
"learning_rate": 1.255263157894737e-06, |
|
"loss": 0.3549, |
|
"step": 241 |
|
}, |
|
{ |
|
"Batch Mean": 0.30998992919921875, |
|
"accuracy": 0.875, |
|
"epoch": 0.6025, |
|
"step": 241 |
|
}, |
|
{ |
|
"Batch Mean": -0.07773590087890625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6025, |
|
"step": 241 |
|
}, |
|
{ |
|
"Batch Mean": 0.5370500087738037, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6025, |
|
"step": 241 |
|
}, |
|
{ |
|
"Batch Mean": 0.6121978759765625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6025, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.605, |
|
"grad_norm": 9.044095039367676, |
|
"learning_rate": 1.2473684210526317e-06, |
|
"loss": 0.3287, |
|
"step": 242 |
|
}, |
|
{ |
|
"Batch Mean": 0.062145233154296875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.605, |
|
"step": 242 |
|
}, |
|
{ |
|
"Batch Mean": -0.17317962646484375, |
|
"accuracy": 0.9375, |
|
"epoch": 0.605, |
|
"step": 242 |
|
}, |
|
{ |
|
"Batch Mean": 0.0579681396484375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.605, |
|
"step": 242 |
|
}, |
|
{ |
|
"Batch Mean": 1.0936737060546875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.605, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6075, |
|
"grad_norm": 10.647380828857422, |
|
"learning_rate": 1.2394736842105264e-06, |
|
"loss": 0.3714, |
|
"step": 243 |
|
}, |
|
{ |
|
"Batch Mean": 0.4714508056640625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6075, |
|
"step": 243 |
|
}, |
|
{ |
|
"Batch Mean": 0.7704868316650391, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6075, |
|
"step": 243 |
|
}, |
|
{ |
|
"Batch Mean": 0.7423057556152344, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6075, |
|
"step": 243 |
|
}, |
|
{ |
|
"Batch Mean": 0.5257568359375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6075, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 9.190378189086914, |
|
"learning_rate": 1.2315789473684212e-06, |
|
"loss": 0.3868, |
|
"step": 244 |
|
}, |
|
{ |
|
"Batch Mean": 0.8801288604736328, |
|
"accuracy": 0.875, |
|
"epoch": 0.61, |
|
"step": 244 |
|
}, |
|
{ |
|
"Batch Mean": 0.33542299270629883, |
|
"accuracy": 0.8125, |
|
"epoch": 0.61, |
|
"step": 244 |
|
}, |
|
{ |
|
"Batch Mean": -0.39012861251831055, |
|
"accuracy": 0.71875, |
|
"epoch": 0.61, |
|
"step": 244 |
|
}, |
|
{ |
|
"Batch Mean": 0.7453155517578125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.61, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6125, |
|
"grad_norm": 9.23464298248291, |
|
"learning_rate": 1.2236842105263157e-06, |
|
"loss": 0.3762, |
|
"step": 245 |
|
}, |
|
{ |
|
"Batch Mean": 0.4720573425292969, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6125, |
|
"step": 245 |
|
}, |
|
{ |
|
"Batch Mean": 0.7902488708496094, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6125, |
|
"step": 245 |
|
}, |
|
{ |
|
"Batch Mean": 0.936099112033844, |
|
"accuracy": 0.875, |
|
"epoch": 0.6125, |
|
"step": 245 |
|
}, |
|
{ |
|
"Batch Mean": 1.1021003723144531, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6125, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.615, |
|
"grad_norm": 8.870701789855957, |
|
"learning_rate": 1.2157894736842105e-06, |
|
"loss": 0.3422, |
|
"step": 246 |
|
}, |
|
{ |
|
"Batch Mean": 0.2618560791015625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.615, |
|
"step": 246 |
|
}, |
|
{ |
|
"Batch Mean": 0.07040119171142578, |
|
"accuracy": 0.875, |
|
"epoch": 0.615, |
|
"step": 246 |
|
}, |
|
{ |
|
"Batch Mean": 0.46091651916503906, |
|
"accuracy": 0.9375, |
|
"epoch": 0.615, |
|
"step": 246 |
|
}, |
|
{ |
|
"Batch Mean": 0.4185791015625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.615, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6175, |
|
"grad_norm": 9.687936782836914, |
|
"learning_rate": 1.2078947368421052e-06, |
|
"loss": 0.3031, |
|
"step": 247 |
|
}, |
|
{ |
|
"Batch Mean": -0.5745697021484375, |
|
"accuracy": 0.71875, |
|
"epoch": 0.6175, |
|
"step": 247 |
|
}, |
|
{ |
|
"Batch Mean": 0.3359527587890625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6175, |
|
"step": 247 |
|
}, |
|
{ |
|
"Batch Mean": 0.40079498291015625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6175, |
|
"step": 247 |
|
}, |
|
{ |
|
"Batch Mean": 1.0915374755859375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6175, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 9.317652702331543, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.3662, |
|
"step": 248 |
|
}, |
|
{ |
|
"Batch Mean": 0.9274635314941406, |
|
"accuracy": 0.875, |
|
"epoch": 0.62, |
|
"step": 248 |
|
}, |
|
{ |
|
"Batch Mean": 1.6848640441894531, |
|
"accuracy": 0.84375, |
|
"epoch": 0.62, |
|
"step": 248 |
|
}, |
|
{ |
|
"Batch Mean": 1.019012451171875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.62, |
|
"step": 248 |
|
}, |
|
{ |
|
"Batch Mean": 1.162078857421875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.62, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6225, |
|
"grad_norm": 8.295259475708008, |
|
"learning_rate": 1.192105263157895e-06, |
|
"loss": 0.3305, |
|
"step": 249 |
|
}, |
|
{ |
|
"Batch Mean": 1.0198884010314941, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6225, |
|
"step": 249 |
|
}, |
|
{ |
|
"Batch Mean": 0.7031314373016357, |
|
"accuracy": 0.875, |
|
"epoch": 0.6225, |
|
"step": 249 |
|
}, |
|
{ |
|
"Batch Mean": 0.3843822479248047, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6225, |
|
"step": 249 |
|
}, |
|
{ |
|
"Batch Mean": 1.5286293029785156, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6225, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 9.459128379821777, |
|
"learning_rate": 1.1842105263157894e-06, |
|
"loss": 0.3292, |
|
"step": 250 |
|
}, |
|
{ |
|
"Batch Mean": 1.4051265716552734, |
|
"accuracy": 0.78125, |
|
"epoch": 0.625, |
|
"step": 250 |
|
}, |
|
{ |
|
"Batch Mean": 0.991455078125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.625, |
|
"step": 250 |
|
}, |
|
{ |
|
"Batch Mean": 0.6639633178710938, |
|
"accuracy": 0.78125, |
|
"epoch": 0.625, |
|
"step": 250 |
|
}, |
|
{ |
|
"Batch Mean": 1.212158203125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.625, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6275, |
|
"grad_norm": 10.484034538269043, |
|
"learning_rate": 1.1763157894736842e-06, |
|
"loss": 0.3449, |
|
"step": 251 |
|
}, |
|
{ |
|
"Batch Mean": 0.2921485900878906, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6275, |
|
"step": 251 |
|
}, |
|
{ |
|
"Batch Mean": 0.31158447265625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6275, |
|
"step": 251 |
|
}, |
|
{ |
|
"Batch Mean": 1.2971420288085938, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6275, |
|
"step": 251 |
|
}, |
|
{ |
|
"Batch Mean": 0.18883895874023438, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6275, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 10.465251922607422, |
|
"learning_rate": 1.168421052631579e-06, |
|
"loss": 0.3998, |
|
"step": 252 |
|
}, |
|
{ |
|
"Batch Mean": 0.7099113464355469, |
|
"accuracy": 0.75, |
|
"epoch": 0.63, |
|
"step": 252 |
|
}, |
|
{ |
|
"Batch Mean": 0.5808029174804688, |
|
"accuracy": 0.75, |
|
"epoch": 0.63, |
|
"step": 252 |
|
}, |
|
{ |
|
"Batch Mean": 1.5232391357421875, |
|
"accuracy": 0.875, |
|
"epoch": 0.63, |
|
"step": 252 |
|
}, |
|
{ |
|
"Batch Mean": -0.030918121337890625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.63, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6325, |
|
"grad_norm": 13.60582447052002, |
|
"learning_rate": 1.1605263157894737e-06, |
|
"loss": 0.3337, |
|
"step": 253 |
|
}, |
|
{ |
|
"Batch Mean": 1.4740982055664062, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6325, |
|
"step": 253 |
|
}, |
|
{ |
|
"Batch Mean": 0.3468012809753418, |
|
"accuracy": 0.75, |
|
"epoch": 0.6325, |
|
"step": 253 |
|
}, |
|
{ |
|
"Batch Mean": 0.9975442886352539, |
|
"accuracy": 0.9375, |
|
"epoch": 0.6325, |
|
"step": 253 |
|
}, |
|
{ |
|
"Batch Mean": 1.1949005126953125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6325, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.635, |
|
"grad_norm": 10.633439064025879, |
|
"learning_rate": 1.1526315789473684e-06, |
|
"loss": 0.3772, |
|
"step": 254 |
|
}, |
|
{ |
|
"Batch Mean": 1.391693115234375, |
|
"accuracy": 0.75, |
|
"epoch": 0.635, |
|
"step": 254 |
|
}, |
|
{ |
|
"Batch Mean": 1.0715713500976562, |
|
"accuracy": 0.90625, |
|
"epoch": 0.635, |
|
"step": 254 |
|
}, |
|
{ |
|
"Batch Mean": 1.3236198425292969, |
|
"accuracy": 0.9375, |
|
"epoch": 0.635, |
|
"step": 254 |
|
}, |
|
{ |
|
"Batch Mean": 1.13323974609375, |
|
"accuracy": 0.875, |
|
"epoch": 0.635, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6375, |
|
"grad_norm": 9.179872512817383, |
|
"learning_rate": 1.1447368421052632e-06, |
|
"loss": 0.3094, |
|
"step": 255 |
|
}, |
|
{ |
|
"Batch Mean": 0.4628944396972656, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6375, |
|
"step": 255 |
|
}, |
|
{ |
|
"Batch Mean": 1.6166534423828125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6375, |
|
"step": 255 |
|
}, |
|
{ |
|
"Batch Mean": 0.903778076171875, |
|
"accuracy": 0.9375, |
|
"epoch": 0.6375, |
|
"step": 255 |
|
}, |
|
{ |
|
"Batch Mean": 1.50897216796875, |
|
"accuracy": 0.6875, |
|
"epoch": 0.6375, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 9.531991958618164, |
|
"learning_rate": 1.136842105263158e-06, |
|
"loss": 0.4104, |
|
"step": 256 |
|
}, |
|
{ |
|
"Batch Mean": 0.07395172119140625, |
|
"accuracy": 0.9375, |
|
"epoch": 0.64, |
|
"step": 256 |
|
}, |
|
{ |
|
"Batch Mean": 1.1985054016113281, |
|
"accuracy": 0.84375, |
|
"epoch": 0.64, |
|
"step": 256 |
|
}, |
|
{ |
|
"Batch Mean": 0.29442596435546875, |
|
"accuracy": 0.9375, |
|
"epoch": 0.64, |
|
"step": 256 |
|
}, |
|
{ |
|
"Batch Mean": 0.16774749755859375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.64, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.6425, |
|
"grad_norm": 7.9248480796813965, |
|
"learning_rate": 1.1289473684210527e-06, |
|
"loss": 0.2785, |
|
"step": 257 |
|
}, |
|
{ |
|
"Batch Mean": 0.3604927062988281, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6425, |
|
"step": 257 |
|
}, |
|
{ |
|
"Batch Mean": 0.8195343017578125, |
|
"accuracy": 0.875, |
|
"epoch": 0.6425, |
|
"step": 257 |
|
}, |
|
{ |
|
"Batch Mean": 0.894989013671875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6425, |
|
"step": 257 |
|
}, |
|
{ |
|
"Batch Mean": 0.8864212036132812, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6425, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.645, |
|
"grad_norm": 11.30528736114502, |
|
"learning_rate": 1.1210526315789474e-06, |
|
"loss": 0.3164, |
|
"step": 258 |
|
}, |
|
{ |
|
"Batch Mean": 0.2879199981689453, |
|
"accuracy": 0.875, |
|
"epoch": 0.645, |
|
"step": 258 |
|
}, |
|
{ |
|
"Batch Mean": 0.7450437545776367, |
|
"accuracy": 0.71875, |
|
"epoch": 0.645, |
|
"step": 258 |
|
}, |
|
{ |
|
"Batch Mean": 0.7837753295898438, |
|
"accuracy": 0.875, |
|
"epoch": 0.645, |
|
"step": 258 |
|
}, |
|
{ |
|
"Batch Mean": 1.498239517211914, |
|
"accuracy": 0.84375, |
|
"epoch": 0.645, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.6475, |
|
"grad_norm": 10.275593757629395, |
|
"learning_rate": 1.1131578947368421e-06, |
|
"loss": 0.3365, |
|
"step": 259 |
|
}, |
|
{ |
|
"Batch Mean": -0.20503616333007812, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6475, |
|
"step": 259 |
|
}, |
|
{ |
|
"Batch Mean": 0.6535758972167969, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6475, |
|
"step": 259 |
|
}, |
|
{ |
|
"Batch Mean": 0.3572044372558594, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6475, |
|
"step": 259 |
|
}, |
|
{ |
|
"Batch Mean": 0.2315387725830078, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6475, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 10.48078727722168, |
|
"learning_rate": 1.1052631578947369e-06, |
|
"loss": 0.3889, |
|
"step": 260 |
|
}, |
|
{ |
|
"Batch Mean": 0.3063087463378906, |
|
"accuracy": 0.9375, |
|
"epoch": 0.65, |
|
"step": 260 |
|
}, |
|
{ |
|
"Batch Mean": 0.827545166015625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.65, |
|
"step": 260 |
|
}, |
|
{ |
|
"Batch Mean": 1.0363435745239258, |
|
"accuracy": 0.875, |
|
"epoch": 0.65, |
|
"step": 260 |
|
}, |
|
{ |
|
"Batch Mean": 0.6229095458984375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.65, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6525, |
|
"grad_norm": 9.440726280212402, |
|
"learning_rate": 1.0973684210526316e-06, |
|
"loss": 0.3457, |
|
"step": 261 |
|
}, |
|
{ |
|
"Batch Mean": 1.293971061706543, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6525, |
|
"step": 261 |
|
}, |
|
{ |
|
"Batch Mean": 0.5774269104003906, |
|
"accuracy": 0.875, |
|
"epoch": 0.6525, |
|
"step": 261 |
|
}, |
|
{ |
|
"Batch Mean": 0.8183193206787109, |
|
"accuracy": 0.875, |
|
"epoch": 0.6525, |
|
"step": 261 |
|
}, |
|
{ |
|
"Batch Mean": 1.8413429260253906, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6525, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.655, |
|
"grad_norm": 10.349913597106934, |
|
"learning_rate": 1.0894736842105264e-06, |
|
"loss": 0.3752, |
|
"step": 262 |
|
}, |
|
{ |
|
"Batch Mean": 0.500396728515625, |
|
"accuracy": 0.71875, |
|
"epoch": 0.655, |
|
"step": 262 |
|
}, |
|
{ |
|
"Batch Mean": 0.7432174682617188, |
|
"accuracy": 0.8125, |
|
"epoch": 0.655, |
|
"step": 262 |
|
}, |
|
{ |
|
"Batch Mean": 0.8873291015625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.655, |
|
"step": 262 |
|
}, |
|
{ |
|
"Batch Mean": 0.4037017822265625, |
|
"accuracy": 0.75, |
|
"epoch": 0.655, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.6575, |
|
"grad_norm": 11.169168472290039, |
|
"learning_rate": 1.0815789473684211e-06, |
|
"loss": 0.4241, |
|
"step": 263 |
|
}, |
|
{ |
|
"Batch Mean": 0.12017822265625, |
|
"accuracy": 0.875, |
|
"epoch": 0.6575, |
|
"step": 263 |
|
}, |
|
{ |
|
"Batch Mean": 0.853546142578125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6575, |
|
"step": 263 |
|
}, |
|
{ |
|
"Batch Mean": -0.26004791259765625, |
|
"accuracy": 0.75, |
|
"epoch": 0.6575, |
|
"step": 263 |
|
}, |
|
{ |
|
"Batch Mean": 0.823577880859375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6575, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 10.056443214416504, |
|
"learning_rate": 1.0736842105263159e-06, |
|
"loss": 0.3715, |
|
"step": 264 |
|
}, |
|
{ |
|
"Batch Mean": 0.2717742919921875, |
|
"accuracy": 0.875, |
|
"epoch": 0.66, |
|
"step": 264 |
|
}, |
|
{ |
|
"Batch Mean": 0.24300384521484375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.66, |
|
"step": 264 |
|
}, |
|
{ |
|
"Batch Mean": -0.5204277038574219, |
|
"accuracy": 0.8125, |
|
"epoch": 0.66, |
|
"step": 264 |
|
}, |
|
{ |
|
"Batch Mean": 0.34992408752441406, |
|
"accuracy": 0.90625, |
|
"epoch": 0.66, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.6625, |
|
"grad_norm": 9.29755687713623, |
|
"learning_rate": 1.0657894736842106e-06, |
|
"loss": 0.3861, |
|
"step": 265 |
|
}, |
|
{ |
|
"Batch Mean": -0.2652130126953125, |
|
"accuracy": 0.71875, |
|
"epoch": 0.6625, |
|
"step": 265 |
|
}, |
|
{ |
|
"Batch Mean": 1.3868637084960938, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6625, |
|
"step": 265 |
|
}, |
|
{ |
|
"Batch Mean": 0.24340057373046875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6625, |
|
"step": 265 |
|
}, |
|
{ |
|
"Batch Mean": -0.5577392578125, |
|
"accuracy": 0.75, |
|
"epoch": 0.6625, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.665, |
|
"grad_norm": 10.35000991821289, |
|
"learning_rate": 1.0578947368421052e-06, |
|
"loss": 0.4203, |
|
"step": 266 |
|
}, |
|
{ |
|
"Batch Mean": -0.9594917297363281, |
|
"accuracy": 0.90625, |
|
"epoch": 0.665, |
|
"step": 266 |
|
}, |
|
{ |
|
"Batch Mean": -0.025142669677734375, |
|
"accuracy": 0.9375, |
|
"epoch": 0.665, |
|
"step": 266 |
|
}, |
|
{ |
|
"Batch Mean": 0.22489547729492188, |
|
"accuracy": 0.90625, |
|
"epoch": 0.665, |
|
"step": 266 |
|
}, |
|
{ |
|
"Batch Mean": 0.244171142578125, |
|
"accuracy": 0.875, |
|
"epoch": 0.665, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6675, |
|
"grad_norm": 7.351399898529053, |
|
"learning_rate": 1.05e-06, |
|
"loss": 0.2589, |
|
"step": 267 |
|
}, |
|
{ |
|
"Batch Mean": 0.034183502197265625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6675, |
|
"step": 267 |
|
}, |
|
{ |
|
"Batch Mean": -0.8277130126953125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6675, |
|
"step": 267 |
|
}, |
|
{ |
|
"Batch Mean": -0.5242729187011719, |
|
"accuracy": 0.96875, |
|
"epoch": 0.6675, |
|
"step": 267 |
|
}, |
|
{ |
|
"Batch Mean": -0.7859745025634766, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6675, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 8.214353561401367, |
|
"learning_rate": 1.0421052631578949e-06, |
|
"loss": 0.2886, |
|
"step": 268 |
|
}, |
|
{ |
|
"Batch Mean": 0.5358123779296875, |
|
"accuracy": 0.9375, |
|
"epoch": 0.67, |
|
"step": 268 |
|
}, |
|
{ |
|
"Batch Mean": 0.2813873291015625, |
|
"accuracy": 0.71875, |
|
"epoch": 0.67, |
|
"step": 268 |
|
}, |
|
{ |
|
"Batch Mean": -0.6162033081054688, |
|
"accuracy": 0.875, |
|
"epoch": 0.67, |
|
"step": 268 |
|
}, |
|
{ |
|
"Batch Mean": -0.2956199645996094, |
|
"accuracy": 0.90625, |
|
"epoch": 0.67, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.6725, |
|
"grad_norm": 9.68172836303711, |
|
"learning_rate": 1.0342105263157896e-06, |
|
"loss": 0.3499, |
|
"step": 269 |
|
}, |
|
{ |
|
"Batch Mean": 0.42858123779296875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6725, |
|
"step": 269 |
|
}, |
|
{ |
|
"Batch Mean": 0.08481597900390625, |
|
"accuracy": 0.5625, |
|
"epoch": 0.6725, |
|
"step": 269 |
|
}, |
|
{ |
|
"Batch Mean": 0.0674285888671875, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6725, |
|
"step": 269 |
|
}, |
|
{ |
|
"Batch Mean": -0.5495738983154297, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6725, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 10.252084732055664, |
|
"learning_rate": 1.0263157894736843e-06, |
|
"loss": 0.4524, |
|
"step": 270 |
|
}, |
|
{ |
|
"Batch Mean": -0.15476417541503906, |
|
"accuracy": 0.90625, |
|
"epoch": 0.675, |
|
"step": 270 |
|
}, |
|
{ |
|
"Batch Mean": -0.1256561279296875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.675, |
|
"step": 270 |
|
}, |
|
{ |
|
"Batch Mean": -0.7836074829101562, |
|
"accuracy": 0.71875, |
|
"epoch": 0.675, |
|
"step": 270 |
|
}, |
|
{ |
|
"Batch Mean": -0.43794846534729004, |
|
"accuracy": 0.8125, |
|
"epoch": 0.675, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6775, |
|
"grad_norm": 8.471720695495605, |
|
"learning_rate": 1.0184210526315789e-06, |
|
"loss": 0.3632, |
|
"step": 271 |
|
}, |
|
{ |
|
"Batch Mean": -0.39324188232421875, |
|
"accuracy": 0.875, |
|
"epoch": 0.6775, |
|
"step": 271 |
|
}, |
|
{ |
|
"Batch Mean": 0.01934814453125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6775, |
|
"step": 271 |
|
}, |
|
{ |
|
"Batch Mean": -0.6975955963134766, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6775, |
|
"step": 271 |
|
}, |
|
{ |
|
"Batch Mean": 0.15717697143554688, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6775, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 8.42764949798584, |
|
"learning_rate": 1.0105263157894736e-06, |
|
"loss": 0.3476, |
|
"step": 272 |
|
}, |
|
{ |
|
"Batch Mean": -0.37816810607910156, |
|
"accuracy": 0.875, |
|
"epoch": 0.68, |
|
"step": 272 |
|
}, |
|
{ |
|
"Batch Mean": 0.6948776245117188, |
|
"accuracy": 0.875, |
|
"epoch": 0.68, |
|
"step": 272 |
|
}, |
|
{ |
|
"Batch Mean": 0.19351577758789062, |
|
"accuracy": 0.84375, |
|
"epoch": 0.68, |
|
"step": 272 |
|
}, |
|
{ |
|
"Batch Mean": -0.0253753662109375, |
|
"accuracy": 0.71875, |
|
"epoch": 0.68, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6825, |
|
"grad_norm": 8.725883483886719, |
|
"learning_rate": 1.0026315789473684e-06, |
|
"loss": 0.3565, |
|
"step": 273 |
|
}, |
|
{ |
|
"Batch Mean": -0.7272148132324219, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6825, |
|
"step": 273 |
|
}, |
|
{ |
|
"Batch Mean": -0.6979103088378906, |
|
"accuracy": 0.75, |
|
"epoch": 0.6825, |
|
"step": 273 |
|
}, |
|
{ |
|
"Batch Mean": -0.8193511962890625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6825, |
|
"step": 273 |
|
}, |
|
{ |
|
"Batch Mean": 0.5439682006835938, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6825, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.685, |
|
"grad_norm": 8.82199478149414, |
|
"learning_rate": 9.947368421052631e-07, |
|
"loss": 0.3683, |
|
"step": 274 |
|
}, |
|
{ |
|
"Batch Mean": -0.6800460815429688, |
|
"accuracy": 0.78125, |
|
"epoch": 0.685, |
|
"step": 274 |
|
}, |
|
{ |
|
"Batch Mean": -0.7421398162841797, |
|
"accuracy": 0.8125, |
|
"epoch": 0.685, |
|
"step": 274 |
|
}, |
|
{ |
|
"Batch Mean": -1.1038150787353516, |
|
"accuracy": 0.84375, |
|
"epoch": 0.685, |
|
"step": 274 |
|
}, |
|
{ |
|
"Batch Mean": -0.6650419235229492, |
|
"accuracy": 0.84375, |
|
"epoch": 0.685, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 8.768556594848633, |
|
"learning_rate": 9.86842105263158e-07, |
|
"loss": 0.3605, |
|
"step": 275 |
|
}, |
|
{ |
|
"Batch Mean": -0.1325054168701172, |
|
"accuracy": 0.75, |
|
"epoch": 0.6875, |
|
"step": 275 |
|
}, |
|
{ |
|
"Batch Mean": 0.18872451782226562, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6875, |
|
"step": 275 |
|
}, |
|
{ |
|
"Batch Mean": -0.443756103515625, |
|
"accuracy": 0.875, |
|
"epoch": 0.6875, |
|
"step": 275 |
|
}, |
|
{ |
|
"Batch Mean": -0.5071296691894531, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6875, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 9.871837615966797, |
|
"learning_rate": 9.789473684210526e-07, |
|
"loss": 0.4065, |
|
"step": 276 |
|
}, |
|
{ |
|
"Batch Mean": 0.037105560302734375, |
|
"accuracy": 0.90625, |
|
"epoch": 0.69, |
|
"step": 276 |
|
}, |
|
{ |
|
"Batch Mean": 0.05627763271331787, |
|
"accuracy": 0.78125, |
|
"epoch": 0.69, |
|
"step": 276 |
|
}, |
|
{ |
|
"Batch Mean": 0.0525970458984375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.69, |
|
"step": 276 |
|
}, |
|
{ |
|
"Batch Mean": -0.3279247283935547, |
|
"accuracy": 0.71875, |
|
"epoch": 0.69, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6925, |
|
"grad_norm": 10.501238822937012, |
|
"learning_rate": 9.710526315789474e-07, |
|
"loss": 0.3826, |
|
"step": 277 |
|
}, |
|
{ |
|
"Batch Mean": 0.18790054321289062, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6925, |
|
"step": 277 |
|
}, |
|
{ |
|
"Batch Mean": -0.4830589294433594, |
|
"accuracy": 0.8125, |
|
"epoch": 0.6925, |
|
"step": 277 |
|
}, |
|
{ |
|
"Batch Mean": 0.24172592163085938, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6925, |
|
"step": 277 |
|
}, |
|
{ |
|
"Batch Mean": 1.0150909423828125, |
|
"accuracy": 0.875, |
|
"epoch": 0.6925, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.695, |
|
"grad_norm": 9.854218482971191, |
|
"learning_rate": 9.63157894736842e-07, |
|
"loss": 0.4258, |
|
"step": 278 |
|
}, |
|
{ |
|
"Batch Mean": -0.2906951904296875, |
|
"accuracy": 0.90625, |
|
"epoch": 0.695, |
|
"step": 278 |
|
}, |
|
{ |
|
"Batch Mean": 0.6845970153808594, |
|
"accuracy": 0.84375, |
|
"epoch": 0.695, |
|
"step": 278 |
|
}, |
|
{ |
|
"Batch Mean": 0.239648699760437, |
|
"accuracy": 0.75, |
|
"epoch": 0.695, |
|
"step": 278 |
|
}, |
|
{ |
|
"Batch Mean": -0.20991897583007812, |
|
"accuracy": 0.90625, |
|
"epoch": 0.695, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6975, |
|
"grad_norm": 9.013479232788086, |
|
"learning_rate": 9.552631578947368e-07, |
|
"loss": 0.3103, |
|
"step": 279 |
|
}, |
|
{ |
|
"Batch Mean": 0.18093395233154297, |
|
"accuracy": 0.78125, |
|
"epoch": 0.6975, |
|
"step": 279 |
|
}, |
|
{ |
|
"Batch Mean": -0.1142578125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6975, |
|
"step": 279 |
|
}, |
|
{ |
|
"Batch Mean": -0.07617950439453125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.6975, |
|
"step": 279 |
|
}, |
|
{ |
|
"Batch Mean": -0.1187744140625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.6975, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 9.930963516235352, |
|
"learning_rate": 9.473684210526316e-07, |
|
"loss": 0.3725, |
|
"step": 280 |
|
}, |
|
{ |
|
"Batch Mean": 0.5311622619628906, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7, |
|
"step": 280 |
|
}, |
|
{ |
|
"Batch Mean": -0.624298095703125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7, |
|
"step": 280 |
|
}, |
|
{ |
|
"Batch Mean": 0.5956707000732422, |
|
"accuracy": 0.90625, |
|
"epoch": 0.7, |
|
"step": 280 |
|
}, |
|
{ |
|
"Batch Mean": -0.102508544921875, |
|
"accuracy": 0.75, |
|
"epoch": 0.7, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7025, |
|
"grad_norm": 10.052390098571777, |
|
"learning_rate": 9.394736842105262e-07, |
|
"loss": 0.3588, |
|
"step": 281 |
|
}, |
|
{ |
|
"Batch Mean": -0.5856146812438965, |
|
"accuracy": 0.71875, |
|
"epoch": 0.7025, |
|
"step": 281 |
|
}, |
|
{ |
|
"Batch Mean": 0.062744140625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7025, |
|
"step": 281 |
|
}, |
|
{ |
|
"Batch Mean": -0.6748523712158203, |
|
"accuracy": 0.875, |
|
"epoch": 0.7025, |
|
"step": 281 |
|
}, |
|
{ |
|
"Batch Mean": 0.475738525390625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.7025, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.705, |
|
"grad_norm": 9.577348709106445, |
|
"learning_rate": 9.315789473684212e-07, |
|
"loss": 0.4109, |
|
"step": 282 |
|
}, |
|
{ |
|
"Batch Mean": 0.5323333740234375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.705, |
|
"step": 282 |
|
}, |
|
{ |
|
"Batch Mean": -1.1327362060546875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.705, |
|
"step": 282 |
|
}, |
|
{ |
|
"Batch Mean": -0.2037353515625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.705, |
|
"step": 282 |
|
}, |
|
{ |
|
"Batch Mean": 0.39438629150390625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.705, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7075, |
|
"grad_norm": 9.311172485351562, |
|
"learning_rate": 9.236842105263158e-07, |
|
"loss": 0.3508, |
|
"step": 283 |
|
}, |
|
{ |
|
"Batch Mean": -0.8892917633056641, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7075, |
|
"step": 283 |
|
}, |
|
{ |
|
"Batch Mean": -0.035157203674316406, |
|
"accuracy": 0.875, |
|
"epoch": 0.7075, |
|
"step": 283 |
|
}, |
|
{ |
|
"Batch Mean": 0.039051055908203125, |
|
"accuracy": 0.75, |
|
"epoch": 0.7075, |
|
"step": 283 |
|
}, |
|
{ |
|
"Batch Mean": 0.43335962295532227, |
|
"accuracy": 0.90625, |
|
"epoch": 0.7075, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 10.052939414978027, |
|
"learning_rate": 9.157894736842106e-07, |
|
"loss": 0.3374, |
|
"step": 284 |
|
}, |
|
{ |
|
"Batch Mean": 0.1412334442138672, |
|
"accuracy": 0.84375, |
|
"epoch": 0.71, |
|
"step": 284 |
|
}, |
|
{ |
|
"Batch Mean": 0.0943450927734375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.71, |
|
"step": 284 |
|
}, |
|
{ |
|
"Batch Mean": -0.5173187255859375, |
|
"accuracy": 0.875, |
|
"epoch": 0.71, |
|
"step": 284 |
|
}, |
|
{ |
|
"Batch Mean": 0.1943206787109375, |
|
"accuracy": 0.9375, |
|
"epoch": 0.71, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7125, |
|
"grad_norm": 9.635650634765625, |
|
"learning_rate": 9.078947368421053e-07, |
|
"loss": 0.3296, |
|
"step": 285 |
|
}, |
|
{ |
|
"Batch Mean": 0.5180206298828125, |
|
"accuracy": 0.875, |
|
"epoch": 0.7125, |
|
"step": 285 |
|
}, |
|
{ |
|
"Batch Mean": -0.7832927703857422, |
|
"accuracy": 0.6875, |
|
"epoch": 0.7125, |
|
"step": 285 |
|
}, |
|
{ |
|
"Batch Mean": 0.2614097595214844, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7125, |
|
"step": 285 |
|
}, |
|
{ |
|
"Batch Mean": -0.2687416076660156, |
|
"accuracy": 0.9375, |
|
"epoch": 0.7125, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.715, |
|
"grad_norm": 9.55765151977539, |
|
"learning_rate": 9e-07, |
|
"loss": 0.3793, |
|
"step": 286 |
|
}, |
|
{ |
|
"Batch Mean": -0.6387786865234375, |
|
"accuracy": 0.875, |
|
"epoch": 0.715, |
|
"step": 286 |
|
}, |
|
{ |
|
"Batch Mean": 0.1472339630126953, |
|
"accuracy": 0.84375, |
|
"epoch": 0.715, |
|
"step": 286 |
|
}, |
|
{ |
|
"Batch Mean": -0.25982666015625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.715, |
|
"step": 286 |
|
}, |
|
{ |
|
"Batch Mean": -0.5125141143798828, |
|
"accuracy": 0.875, |
|
"epoch": 0.715, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7175, |
|
"grad_norm": 8.80599308013916, |
|
"learning_rate": 8.921052631578947e-07, |
|
"loss": 0.3445, |
|
"step": 287 |
|
}, |
|
{ |
|
"Batch Mean": -0.220855712890625, |
|
"accuracy": 0.875, |
|
"epoch": 0.7175, |
|
"step": 287 |
|
}, |
|
{ |
|
"Batch Mean": -0.15911865234375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7175, |
|
"step": 287 |
|
}, |
|
{ |
|
"Batch Mean": -0.525360107421875, |
|
"accuracy": 0.9375, |
|
"epoch": 0.7175, |
|
"step": 287 |
|
}, |
|
{ |
|
"Batch Mean": 0.1629009246826172, |
|
"accuracy": 0.90625, |
|
"epoch": 0.7175, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 7.637984275817871, |
|
"learning_rate": 8.842105263157895e-07, |
|
"loss": 0.2692, |
|
"step": 288 |
|
}, |
|
{ |
|
"Batch Mean": -0.882720947265625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.72, |
|
"step": 288 |
|
}, |
|
{ |
|
"Batch Mean": -0.592742919921875, |
|
"accuracy": 0.875, |
|
"epoch": 0.72, |
|
"step": 288 |
|
}, |
|
{ |
|
"Batch Mean": 0.02447509765625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.72, |
|
"step": 288 |
|
}, |
|
{ |
|
"Batch Mean": -0.37477874755859375, |
|
"accuracy": 0.875, |
|
"epoch": 0.72, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7225, |
|
"grad_norm": 11.262386322021484, |
|
"learning_rate": 8.763157894736843e-07, |
|
"loss": 0.3622, |
|
"step": 289 |
|
}, |
|
{ |
|
"Batch Mean": -1.2448196411132812, |
|
"accuracy": 0.90625, |
|
"epoch": 0.7225, |
|
"step": 289 |
|
}, |
|
{ |
|
"Batch Mean": 0.02728271484375, |
|
"accuracy": 0.875, |
|
"epoch": 0.7225, |
|
"step": 289 |
|
}, |
|
{ |
|
"Batch Mean": -0.29929351806640625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.7225, |
|
"step": 289 |
|
}, |
|
{ |
|
"Batch Mean": -1.1397781372070312, |
|
"accuracy": 0.625, |
|
"epoch": 0.7225, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 8.555329322814941, |
|
"learning_rate": 8.68421052631579e-07, |
|
"loss": 0.328, |
|
"step": 290 |
|
}, |
|
{ |
|
"Batch Mean": -0.44579315185546875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.725, |
|
"step": 290 |
|
}, |
|
{ |
|
"Batch Mean": -0.4141693115234375, |
|
"accuracy": 0.875, |
|
"epoch": 0.725, |
|
"step": 290 |
|
}, |
|
{ |
|
"Batch Mean": -0.665985107421875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.725, |
|
"step": 290 |
|
}, |
|
{ |
|
"Batch Mean": -0.4548025131225586, |
|
"accuracy": 0.75, |
|
"epoch": 0.725, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7275, |
|
"grad_norm": 9.342544555664062, |
|
"learning_rate": 8.605263157894737e-07, |
|
"loss": 0.366, |
|
"step": 291 |
|
}, |
|
{ |
|
"Batch Mean": -0.8993301391601562, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7275, |
|
"step": 291 |
|
}, |
|
{ |
|
"Batch Mean": 0.19589996337890625, |
|
"accuracy": 0.9375, |
|
"epoch": 0.7275, |
|
"step": 291 |
|
}, |
|
{ |
|
"Batch Mean": 0.6077609062194824, |
|
"accuracy": 0.875, |
|
"epoch": 0.7275, |
|
"step": 291 |
|
}, |
|
{ |
|
"Batch Mean": -0.5311317443847656, |
|
"accuracy": 0.78125, |
|
"epoch": 0.7275, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 9.269038200378418, |
|
"learning_rate": 8.526315789473684e-07, |
|
"loss": 0.3235, |
|
"step": 292 |
|
}, |
|
{ |
|
"Batch Mean": -0.4767875671386719, |
|
"accuracy": 0.9375, |
|
"epoch": 0.73, |
|
"step": 292 |
|
}, |
|
{ |
|
"Batch Mean": -0.0320281982421875, |
|
"accuracy": 0.875, |
|
"epoch": 0.73, |
|
"step": 292 |
|
}, |
|
{ |
|
"Batch Mean": 0.34002208709716797, |
|
"accuracy": 0.78125, |
|
"epoch": 0.73, |
|
"step": 292 |
|
}, |
|
{ |
|
"Batch Mean": -1.6079635620117188, |
|
"accuracy": 0.84375, |
|
"epoch": 0.73, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.7325, |
|
"grad_norm": 9.221135139465332, |
|
"learning_rate": 8.447368421052632e-07, |
|
"loss": 0.3575, |
|
"step": 293 |
|
}, |
|
{ |
|
"Batch Mean": 0.22001266479492188, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7325, |
|
"step": 293 |
|
}, |
|
{ |
|
"Batch Mean": 0.10904312133789062, |
|
"accuracy": 0.71875, |
|
"epoch": 0.7325, |
|
"step": 293 |
|
}, |
|
{ |
|
"Batch Mean": -0.912567138671875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7325, |
|
"step": 293 |
|
}, |
|
{ |
|
"Batch Mean": -0.6306076049804688, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7325, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.735, |
|
"grad_norm": 11.674117088317871, |
|
"learning_rate": 8.368421052631578e-07, |
|
"loss": 0.4057, |
|
"step": 294 |
|
}, |
|
{ |
|
"Batch Mean": -1.0194950103759766, |
|
"accuracy": 0.8125, |
|
"epoch": 0.735, |
|
"step": 294 |
|
}, |
|
{ |
|
"Batch Mean": 0.14434814453125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.735, |
|
"step": 294 |
|
}, |
|
{ |
|
"Batch Mean": -0.29044342041015625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.735, |
|
"step": 294 |
|
}, |
|
{ |
|
"Batch Mean": 0.13979148864746094, |
|
"accuracy": 0.84375, |
|
"epoch": 0.735, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.7375, |
|
"grad_norm": 9.911567687988281, |
|
"learning_rate": 8.289473684210528e-07, |
|
"loss": 0.3553, |
|
"step": 295 |
|
}, |
|
{ |
|
"Batch Mean": -0.03961944580078125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.7375, |
|
"step": 295 |
|
}, |
|
{ |
|
"Batch Mean": -0.2553253173828125, |
|
"accuracy": 0.71875, |
|
"epoch": 0.7375, |
|
"step": 295 |
|
}, |
|
{ |
|
"Batch Mean": -0.6845512390136719, |
|
"accuracy": 0.78125, |
|
"epoch": 0.7375, |
|
"step": 295 |
|
}, |
|
{ |
|
"Batch Mean": 0.6528472900390625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7375, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 11.676740646362305, |
|
"learning_rate": 8.210526315789474e-07, |
|
"loss": 0.4071, |
|
"step": 296 |
|
}, |
|
{ |
|
"Batch Mean": -0.355865478515625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.74, |
|
"step": 296 |
|
}, |
|
{ |
|
"Batch Mean": -0.0177001953125, |
|
"accuracy": 0.875, |
|
"epoch": 0.74, |
|
"step": 296 |
|
}, |
|
{ |
|
"Batch Mean": -0.4150969982147217, |
|
"accuracy": 0.75, |
|
"epoch": 0.74, |
|
"step": 296 |
|
}, |
|
{ |
|
"Batch Mean": -0.3167438507080078, |
|
"accuracy": 0.84375, |
|
"epoch": 0.74, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.7425, |
|
"grad_norm": 9.465566635131836, |
|
"learning_rate": 8.131578947368422e-07, |
|
"loss": 0.3269, |
|
"step": 297 |
|
}, |
|
{ |
|
"Batch Mean": 0.6043167114257812, |
|
"accuracy": 0.71875, |
|
"epoch": 0.7425, |
|
"step": 297 |
|
}, |
|
{ |
|
"Batch Mean": 0.49607324600219727, |
|
"accuracy": 0.75, |
|
"epoch": 0.7425, |
|
"step": 297 |
|
}, |
|
{ |
|
"Batch Mean": -0.2490692138671875, |
|
"accuracy": 0.875, |
|
"epoch": 0.7425, |
|
"step": 297 |
|
}, |
|
{ |
|
"Batch Mean": -0.280670166015625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.7425, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.745, |
|
"grad_norm": 11.616758346557617, |
|
"learning_rate": 8.052631578947369e-07, |
|
"loss": 0.42, |
|
"step": 298 |
|
}, |
|
{ |
|
"Batch Mean": 0.7779655456542969, |
|
"accuracy": 0.8125, |
|
"epoch": 0.745, |
|
"step": 298 |
|
}, |
|
{ |
|
"Batch Mean": -0.6361045837402344, |
|
"accuracy": 0.875, |
|
"epoch": 0.745, |
|
"step": 298 |
|
}, |
|
{ |
|
"Batch Mean": -0.3686501383781433, |
|
"accuracy": 0.8125, |
|
"epoch": 0.745, |
|
"step": 298 |
|
}, |
|
{ |
|
"Batch Mean": 0.463165283203125, |
|
"accuracy": 0.875, |
|
"epoch": 0.745, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.7475, |
|
"grad_norm": 11.344236373901367, |
|
"learning_rate": 7.973684210526315e-07, |
|
"loss": 0.3875, |
|
"step": 299 |
|
}, |
|
{ |
|
"Batch Mean": 0.6528396606445312, |
|
"accuracy": 0.78125, |
|
"epoch": 0.7475, |
|
"step": 299 |
|
}, |
|
{ |
|
"Batch Mean": 0.44107818603515625, |
|
"accuracy": 0.71875, |
|
"epoch": 0.7475, |
|
"step": 299 |
|
}, |
|
{ |
|
"Batch Mean": 0.2606048583984375, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7475, |
|
"step": 299 |
|
}, |
|
{ |
|
"Batch Mean": -0.16002655029296875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7475, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 12.416658401489258, |
|
"learning_rate": 7.894736842105263e-07, |
|
"loss": 0.403, |
|
"step": 300 |
|
}, |
|
{ |
|
"Batch Mean": -1.0142822265625, |
|
"accuracy": 0.875, |
|
"epoch": 0.75, |
|
"step": 300 |
|
}, |
|
{ |
|
"Batch Mean": 0.3593902587890625, |
|
"accuracy": 0.9375, |
|
"epoch": 0.75, |
|
"step": 300 |
|
}, |
|
{ |
|
"Batch Mean": -0.3219757080078125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.75, |
|
"step": 300 |
|
}, |
|
{ |
|
"Batch Mean": -0.3983287811279297, |
|
"accuracy": 0.84375, |
|
"epoch": 0.75, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7525, |
|
"grad_norm": 9.682062149047852, |
|
"learning_rate": 7.81578947368421e-07, |
|
"loss": 0.311, |
|
"step": 301 |
|
}, |
|
{ |
|
"Batch Mean": -0.663543701171875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7525, |
|
"step": 301 |
|
}, |
|
{ |
|
"Batch Mean": 0.281829833984375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7525, |
|
"step": 301 |
|
}, |
|
{ |
|
"Batch Mean": 1.0360565185546875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7525, |
|
"step": 301 |
|
}, |
|
{ |
|
"Batch Mean": -0.3531379699707031, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7525, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.755, |
|
"grad_norm": 12.624238967895508, |
|
"learning_rate": 7.736842105263159e-07, |
|
"loss": 0.4274, |
|
"step": 302 |
|
}, |
|
{ |
|
"Batch Mean": -0.30055999755859375, |
|
"accuracy": 0.90625, |
|
"epoch": 0.755, |
|
"step": 302 |
|
}, |
|
{ |
|
"Batch Mean": -0.3213653564453125, |
|
"accuracy": 0.875, |
|
"epoch": 0.755, |
|
"step": 302 |
|
}, |
|
{ |
|
"Batch Mean": -0.178985595703125, |
|
"accuracy": 0.90625, |
|
"epoch": 0.755, |
|
"step": 302 |
|
}, |
|
{ |
|
"Batch Mean": 0.06946945190429688, |
|
"accuracy": 0.78125, |
|
"epoch": 0.755, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.7575, |
|
"grad_norm": 9.274081230163574, |
|
"learning_rate": 7.657894736842106e-07, |
|
"loss": 0.3211, |
|
"step": 303 |
|
}, |
|
{ |
|
"Batch Mean": -0.36700439453125, |
|
"accuracy": 0.9375, |
|
"epoch": 0.7575, |
|
"step": 303 |
|
}, |
|
{ |
|
"Batch Mean": -0.030065536499023438, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7575, |
|
"step": 303 |
|
}, |
|
{ |
|
"Batch Mean": -0.24184799194335938, |
|
"accuracy": 0.875, |
|
"epoch": 0.7575, |
|
"step": 303 |
|
}, |
|
{ |
|
"Batch Mean": 0.25110673904418945, |
|
"accuracy": 0.75, |
|
"epoch": 0.7575, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 9.465882301330566, |
|
"learning_rate": 7.578947368421053e-07, |
|
"loss": 0.3342, |
|
"step": 304 |
|
}, |
|
{ |
|
"Batch Mean": -0.25606608390808105, |
|
"accuracy": 0.90625, |
|
"epoch": 0.76, |
|
"step": 304 |
|
}, |
|
{ |
|
"Batch Mean": 0.24300003051757812, |
|
"accuracy": 0.8125, |
|
"epoch": 0.76, |
|
"step": 304 |
|
}, |
|
{ |
|
"Batch Mean": -0.2869873046875, |
|
"accuracy": 0.8125, |
|
"epoch": 0.76, |
|
"step": 304 |
|
}, |
|
{ |
|
"Batch Mean": 0.7027158737182617, |
|
"accuracy": 0.875, |
|
"epoch": 0.76, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.7625, |
|
"grad_norm": 10.51352596282959, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.3529, |
|
"step": 305 |
|
}, |
|
{ |
|
"Batch Mean": -0.39292144775390625, |
|
"accuracy": 0.96875, |
|
"epoch": 0.7625, |
|
"step": 305 |
|
}, |
|
{ |
|
"Batch Mean": -0.16762542724609375, |
|
"accuracy": 1.0, |
|
"epoch": 0.7625, |
|
"step": 305 |
|
}, |
|
{ |
|
"Batch Mean": 0.5650863647460938, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7625, |
|
"step": 305 |
|
}, |
|
{ |
|
"Batch Mean": 0.4365959167480469, |
|
"accuracy": 0.9375, |
|
"epoch": 0.7625, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.765, |
|
"grad_norm": 8.560236930847168, |
|
"learning_rate": 7.421052631578948e-07, |
|
"loss": 0.2595, |
|
"step": 306 |
|
}, |
|
{ |
|
"Batch Mean": -0.9248428344726562, |
|
"accuracy": 0.75, |
|
"epoch": 0.765, |
|
"step": 306 |
|
}, |
|
{ |
|
"Batch Mean": 0.199981689453125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.765, |
|
"step": 306 |
|
}, |
|
{ |
|
"Batch Mean": 0.20327186584472656, |
|
"accuracy": 0.8125, |
|
"epoch": 0.765, |
|
"step": 306 |
|
}, |
|
{ |
|
"Batch Mean": 0.016353607177734375, |
|
"accuracy": 0.9375, |
|
"epoch": 0.765, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.7675, |
|
"grad_norm": 11.130337715148926, |
|
"learning_rate": 7.342105263157895e-07, |
|
"loss": 0.3823, |
|
"step": 307 |
|
}, |
|
{ |
|
"Batch Mean": -0.706817626953125, |
|
"accuracy": 0.875, |
|
"epoch": 0.7675, |
|
"step": 307 |
|
}, |
|
{ |
|
"Batch Mean": -0.53656005859375, |
|
"accuracy": 0.6875, |
|
"epoch": 0.7675, |
|
"step": 307 |
|
}, |
|
{ |
|
"Batch Mean": 0.19480514526367188, |
|
"accuracy": 0.875, |
|
"epoch": 0.7675, |
|
"step": 307 |
|
}, |
|
{ |
|
"Batch Mean": -0.339019775390625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7675, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 8.770188331604004, |
|
"learning_rate": 7.263157894736843e-07, |
|
"loss": 0.3215, |
|
"step": 308 |
|
}, |
|
{ |
|
"Batch Mean": -0.4981985092163086, |
|
"accuracy": 0.84375, |
|
"epoch": 0.77, |
|
"step": 308 |
|
}, |
|
{ |
|
"Batch Mean": 0.4097747802734375, |
|
"accuracy": 0.96875, |
|
"epoch": 0.77, |
|
"step": 308 |
|
}, |
|
{ |
|
"Batch Mean": -0.8528766632080078, |
|
"accuracy": 0.875, |
|
"epoch": 0.77, |
|
"step": 308 |
|
}, |
|
{ |
|
"Batch Mean": 0.3489990234375, |
|
"accuracy": 0.875, |
|
"epoch": 0.77, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.7725, |
|
"grad_norm": 9.400025367736816, |
|
"learning_rate": 7.18421052631579e-07, |
|
"loss": 0.2833, |
|
"step": 309 |
|
}, |
|
{ |
|
"Batch Mean": 0.6253204345703125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7725, |
|
"step": 309 |
|
}, |
|
{ |
|
"Batch Mean": 0.5490493774414062, |
|
"accuracy": 0.9375, |
|
"epoch": 0.7725, |
|
"step": 309 |
|
}, |
|
{ |
|
"Batch Mean": 0.3172607421875, |
|
"accuracy": 0.65625, |
|
"epoch": 0.7725, |
|
"step": 309 |
|
}, |
|
{ |
|
"Batch Mean": 0.30156707763671875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.7725, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 9.653395652770996, |
|
"learning_rate": 7.105263157894736e-07, |
|
"loss": 0.3767, |
|
"step": 310 |
|
}, |
|
{ |
|
"Batch Mean": 0.2414093017578125, |
|
"accuracy": 0.78125, |
|
"epoch": 0.775, |
|
"step": 310 |
|
}, |
|
{ |
|
"Batch Mean": -0.6696853637695312, |
|
"accuracy": 0.84375, |
|
"epoch": 0.775, |
|
"step": 310 |
|
}, |
|
{ |
|
"Batch Mean": -0.2911529541015625, |
|
"accuracy": 0.90625, |
|
"epoch": 0.775, |
|
"step": 310 |
|
}, |
|
{ |
|
"Batch Mean": 0.1298370361328125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.775, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7775, |
|
"grad_norm": 8.652348518371582, |
|
"learning_rate": 7.026315789473685e-07, |
|
"loss": 0.3161, |
|
"step": 311 |
|
}, |
|
{ |
|
"Batch Mean": 0.009725570678710938, |
|
"accuracy": 0.90625, |
|
"epoch": 0.7775, |
|
"step": 311 |
|
}, |
|
{ |
|
"Batch Mean": -0.7600326538085938, |
|
"accuracy": 0.78125, |
|
"epoch": 0.7775, |
|
"step": 311 |
|
}, |
|
{ |
|
"Batch Mean": -0.3205299377441406, |
|
"accuracy": 0.90625, |
|
"epoch": 0.7775, |
|
"step": 311 |
|
}, |
|
{ |
|
"Batch Mean": -0.5525283813476562, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7775, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 8.444558143615723, |
|
"learning_rate": 6.947368421052631e-07, |
|
"loss": 0.3203, |
|
"step": 312 |
|
}, |
|
{ |
|
"Batch Mean": -0.6497936248779297, |
|
"accuracy": 0.75, |
|
"epoch": 0.78, |
|
"step": 312 |
|
}, |
|
{ |
|
"Batch Mean": 1.0596923828125, |
|
"accuracy": 0.875, |
|
"epoch": 0.78, |
|
"step": 312 |
|
}, |
|
{ |
|
"Batch Mean": 0.3611183166503906, |
|
"accuracy": 0.71875, |
|
"epoch": 0.78, |
|
"step": 312 |
|
}, |
|
{ |
|
"Batch Mean": 0.0991964340209961, |
|
"accuracy": 0.8125, |
|
"epoch": 0.78, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.7825, |
|
"grad_norm": 14.062588691711426, |
|
"learning_rate": 6.868421052631579e-07, |
|
"loss": 0.4345, |
|
"step": 313 |
|
}, |
|
{ |
|
"Batch Mean": 0.509796142578125, |
|
"accuracy": 0.875, |
|
"epoch": 0.7825, |
|
"step": 313 |
|
}, |
|
{ |
|
"Batch Mean": -0.25455474853515625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7825, |
|
"step": 313 |
|
}, |
|
{ |
|
"Batch Mean": -0.8733139038085938, |
|
"accuracy": 0.65625, |
|
"epoch": 0.7825, |
|
"step": 313 |
|
}, |
|
{ |
|
"Batch Mean": -0.2877159118652344, |
|
"accuracy": 0.9375, |
|
"epoch": 0.7825, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.785, |
|
"grad_norm": 10.200967788696289, |
|
"learning_rate": 6.789473684210527e-07, |
|
"loss": 0.356, |
|
"step": 314 |
|
}, |
|
{ |
|
"Batch Mean": -0.753840446472168, |
|
"accuracy": 0.84375, |
|
"epoch": 0.785, |
|
"step": 314 |
|
}, |
|
{ |
|
"Batch Mean": 0.43662261962890625, |
|
"accuracy": 0.9375, |
|
"epoch": 0.785, |
|
"step": 314 |
|
}, |
|
{ |
|
"Batch Mean": -0.9569015502929688, |
|
"accuracy": 0.78125, |
|
"epoch": 0.785, |
|
"step": 314 |
|
}, |
|
{ |
|
"Batch Mean": 0.0350494384765625, |
|
"accuracy": 0.71875, |
|
"epoch": 0.785, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.7875, |
|
"grad_norm": 11.170928955078125, |
|
"learning_rate": 6.710526315789474e-07, |
|
"loss": 0.4035, |
|
"step": 315 |
|
}, |
|
{ |
|
"Batch Mean": -1.0777130126953125, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7875, |
|
"step": 315 |
|
}, |
|
{ |
|
"Batch Mean": 0.650787353515625, |
|
"accuracy": 0.78125, |
|
"epoch": 0.7875, |
|
"step": 315 |
|
}, |
|
{ |
|
"Batch Mean": -0.5789690017700195, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7875, |
|
"step": 315 |
|
}, |
|
{ |
|
"Batch Mean": 0.0339202880859375, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7875, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.819647789001465, |
|
"learning_rate": 6.631578947368421e-07, |
|
"loss": 0.3456, |
|
"step": 316 |
|
}, |
|
{ |
|
"Batch Mean": -0.7309417724609375, |
|
"accuracy": 0.75, |
|
"epoch": 0.79, |
|
"step": 316 |
|
}, |
|
{ |
|
"Batch Mean": 0.302001953125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.79, |
|
"step": 316 |
|
}, |
|
{ |
|
"Batch Mean": -0.494842529296875, |
|
"accuracy": 0.84375, |
|
"epoch": 0.79, |
|
"step": 316 |
|
}, |
|
{ |
|
"Batch Mean": -0.5057373046875, |
|
"accuracy": 0.78125, |
|
"epoch": 0.79, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7925, |
|
"grad_norm": 11.263664245605469, |
|
"learning_rate": 6.552631578947369e-07, |
|
"loss": 0.4126, |
|
"step": 317 |
|
}, |
|
{ |
|
"Batch Mean": 0.3507499694824219, |
|
"accuracy": 0.71875, |
|
"epoch": 0.7925, |
|
"step": 317 |
|
}, |
|
{ |
|
"Batch Mean": -0.23482513427734375, |
|
"accuracy": 0.6875, |
|
"epoch": 0.7925, |
|
"step": 317 |
|
}, |
|
{ |
|
"Batch Mean": -0.19596099853515625, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7925, |
|
"step": 317 |
|
}, |
|
{ |
|
"Batch Mean": -0.0801849365234375, |
|
"accuracy": 0.78125, |
|
"epoch": 0.7925, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.795, |
|
"grad_norm": 11.502315521240234, |
|
"learning_rate": 6.473684210526316e-07, |
|
"loss": 0.4278, |
|
"step": 318 |
|
}, |
|
{ |
|
"Batch Mean": 0.5050125122070312, |
|
"accuracy": 0.8125, |
|
"epoch": 0.795, |
|
"step": 318 |
|
}, |
|
{ |
|
"Batch Mean": -1.025543212890625, |
|
"accuracy": 0.75, |
|
"epoch": 0.795, |
|
"step": 318 |
|
}, |
|
{ |
|
"Batch Mean": 0.04312896728515625, |
|
"accuracy": 0.875, |
|
"epoch": 0.795, |
|
"step": 318 |
|
}, |
|
{ |
|
"Batch Mean": -0.8218994140625, |
|
"accuracy": 0.8125, |
|
"epoch": 0.795, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7975, |
|
"grad_norm": 9.564278602600098, |
|
"learning_rate": 6.394736842105264e-07, |
|
"loss": 0.3704, |
|
"step": 319 |
|
}, |
|
{ |
|
"Batch Mean": -0.7440528869628906, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7975, |
|
"step": 319 |
|
}, |
|
{ |
|
"Batch Mean": -1.0272712707519531, |
|
"accuracy": 0.8125, |
|
"epoch": 0.7975, |
|
"step": 319 |
|
}, |
|
{ |
|
"Batch Mean": -0.21501922607421875, |
|
"accuracy": 0.65625, |
|
"epoch": 0.7975, |
|
"step": 319 |
|
}, |
|
{ |
|
"Batch Mean": -0.5200958251953125, |
|
"accuracy": 0.84375, |
|
"epoch": 0.7975, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 10.255894660949707, |
|
"learning_rate": 6.31578947368421e-07, |
|
"loss": 0.421, |
|
"step": 320 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 80, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|