File size: 3,431 Bytes
1a4bf79 08eef6f 1a4bf79 08eef6f 1a4bf79 862289d dc00cbb 862289d 1a4bf79 f46db50 e9d9f1e 64dfaa7 8156721 fb14efa 69af666 b442e8a 2051b35 86a0fcd 678205d 71f7e6d ec7b2c2 a6cab9b 2b52e2e 54ae7a4 21a082b 33ccafb 08eef6f 1a4bf79 dbb75cb 1a4bf79 dbb75cb 1a4bf79 08eef6f 862289d 1a4bf79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.0002957359544877931,
"eval_steps": 500,
"global_step": 380,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 2.555440902709961,
"learning_rate": 3.3333333333333333e-06,
"loss": 3.6397,
"step": 20
},
{
"epoch": 0.0,
"grad_norm": 2.218903064727783,
"learning_rate": 6.666666666666667e-06,
"loss": 3.6917,
"step": 40
},
{
"epoch": 0.0,
"grad_norm": 0.9262466430664062,
"learning_rate": 1e-05,
"loss": 3.5828,
"step": 60
},
{
"epoch": 0.0,
"grad_norm": 2.782036542892456,
"learning_rate": 1.3166666666666665e-05,
"loss": 3.5865,
"step": 80
},
{
"epoch": 0.0,
"grad_norm": 1.9482054710388184,
"learning_rate": 1.65e-05,
"loss": 3.3337,
"step": 100
},
{
"epoch": 0.0,
"grad_norm": 4.047863006591797,
"learning_rate": 1.9833333333333335e-05,
"loss": 3.1903,
"step": 120
},
{
"epoch": 0.0,
"grad_norm": 3.08722186088562,
"learning_rate": 2.3166666666666666e-05,
"loss": 3.5379,
"step": 140
},
{
"epoch": 0.0,
"grad_norm": 3.540940046310425,
"learning_rate": 2.6500000000000004e-05,
"loss": 3.16,
"step": 160
},
{
"epoch": 0.0,
"grad_norm": 5.391817092895508,
"learning_rate": 2.9833333333333335e-05,
"loss": 3.2489,
"step": 180
},
{
"epoch": 0.0,
"grad_norm": 5.890682220458984,
"learning_rate": 3.316666666666667e-05,
"loss": 3.0499,
"step": 200
},
{
"epoch": 0.0,
"grad_norm": 6.314597129821777,
"learning_rate": 3.65e-05,
"loss": 2.8568,
"step": 220
},
{
"epoch": 0.0,
"grad_norm": 1.0859078168869019,
"learning_rate": 3.983333333333333e-05,
"loss": 2.8566,
"step": 240
},
{
"epoch": 0.0,
"grad_norm": 4.688353538513184,
"learning_rate": 4.316666666666667e-05,
"loss": 3.0079,
"step": 260
},
{
"epoch": 0.0,
"grad_norm": 4.502331256866455,
"learning_rate": 4.6500000000000005e-05,
"loss": 2.6839,
"step": 280
},
{
"epoch": 0.0,
"grad_norm": 8.951983451843262,
"learning_rate": 4.9833333333333336e-05,
"loss": 2.7932,
"step": 300
},
{
"epoch": 0.0,
"grad_norm": 4.788575172424316,
"learning_rate": 4.9999526661182696e-05,
"loss": 2.9341,
"step": 320
},
{
"epoch": 0.0,
"grad_norm": 7.716049671173096,
"learning_rate": 4.999800570348766e-05,
"loss": 2.5987,
"step": 340
},
{
"epoch": 0.0,
"grad_norm": 4.9223952293396,
"learning_rate": 4.9995435879539254e-05,
"loss": 2.7863,
"step": 360
},
{
"epoch": 0.0,
"grad_norm": 7.647037506103516,
"learning_rate": 4.999181729716214e-05,
"loss": 2.6197,
"step": 380
}
],
"logging_steps": 20,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 20,
"total_flos": 1740689046355968.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|