File size: 2,488 Bytes
1a4bf79 ec7b2c2 1a4bf79 ec7b2c2 1a4bf79 862289d dc00cbb 862289d 1a4bf79 f46db50 e9d9f1e 64dfaa7 8156721 fb14efa 69af666 b442e8a 2051b35 86a0fcd 678205d 71f7e6d ec7b2c2 1a4bf79 dbb75cb 1a4bf79 dbb75cb 1a4bf79 ec7b2c2 862289d 1a4bf79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.00020234565307059528,
"eval_steps": 500,
"global_step": 260,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 2.555440902709961,
"learning_rate": 3.3333333333333333e-06,
"loss": 3.6397,
"step": 20
},
{
"epoch": 0.0,
"grad_norm": 2.218903064727783,
"learning_rate": 6.666666666666667e-06,
"loss": 3.6917,
"step": 40
},
{
"epoch": 0.0,
"grad_norm": 0.9262466430664062,
"learning_rate": 1e-05,
"loss": 3.5828,
"step": 60
},
{
"epoch": 0.0,
"grad_norm": 2.782036542892456,
"learning_rate": 1.3166666666666665e-05,
"loss": 3.5865,
"step": 80
},
{
"epoch": 0.0,
"grad_norm": 1.9482054710388184,
"learning_rate": 1.65e-05,
"loss": 3.3337,
"step": 100
},
{
"epoch": 0.0,
"grad_norm": 4.047863006591797,
"learning_rate": 1.9833333333333335e-05,
"loss": 3.1903,
"step": 120
},
{
"epoch": 0.0,
"grad_norm": 3.08722186088562,
"learning_rate": 2.3166666666666666e-05,
"loss": 3.5379,
"step": 140
},
{
"epoch": 0.0,
"grad_norm": 3.540940046310425,
"learning_rate": 2.6500000000000004e-05,
"loss": 3.16,
"step": 160
},
{
"epoch": 0.0,
"grad_norm": 5.391817092895508,
"learning_rate": 2.9833333333333335e-05,
"loss": 3.2489,
"step": 180
},
{
"epoch": 0.0,
"grad_norm": 5.890682220458984,
"learning_rate": 3.316666666666667e-05,
"loss": 3.0499,
"step": 200
},
{
"epoch": 0.0,
"grad_norm": 6.314597129821777,
"learning_rate": 3.65e-05,
"loss": 2.8568,
"step": 220
},
{
"epoch": 0.0,
"grad_norm": 1.0859078168869019,
"learning_rate": 3.983333333333333e-05,
"loss": 2.8566,
"step": 240
},
{
"epoch": 0.0,
"grad_norm": 4.688353538513184,
"learning_rate": 4.316666666666667e-05,
"loss": 3.0079,
"step": 260
}
],
"logging_steps": 20,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 20,
"total_flos": 1238496472055808.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|