File size: 2,873 Bytes
2442366 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.968,
"eval_steps": 62,
"global_step": 248,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.24,
"grad_norm": 1.5051484779217006,
"learning_rate": 1.2096774193548388e-05,
"loss": 0.0697,
"step": 15
},
{
"epoch": 0.48,
"grad_norm": 3.6557213411275504,
"learning_rate": 2.4193548387096777e-05,
"loss": 0.0453,
"step": 30
},
{
"epoch": 0.72,
"grad_norm": 5.8190678296309155,
"learning_rate": 3.6290322580645165e-05,
"loss": 0.1379,
"step": 45
},
{
"epoch": 0.96,
"grad_norm": 5.082131701290993,
"learning_rate": 4.8387096774193554e-05,
"loss": 0.3262,
"step": 60
},
{
"epoch": 1.2,
"grad_norm": 4.533397429277955,
"learning_rate": 4.74e-05,
"loss": 0.4413,
"step": 75
},
{
"epoch": 1.44,
"grad_norm": 3.9912559897316915,
"learning_rate": 4.44e-05,
"loss": 0.4631,
"step": 90
},
{
"epoch": 1.6800000000000002,
"grad_norm": 3.605033658976913,
"learning_rate": 4.14e-05,
"loss": 0.3972,
"step": 105
},
{
"epoch": 1.92,
"grad_norm": 3.1324872605719025,
"learning_rate": 3.8400000000000005e-05,
"loss": 0.3926,
"step": 120
},
{
"epoch": 2.16,
"grad_norm": 2.7954439766075647,
"learning_rate": 3.54e-05,
"loss": 0.3268,
"step": 135
},
{
"epoch": 2.4,
"grad_norm": 2.4279604222176263,
"learning_rate": 3.24e-05,
"loss": 0.2419,
"step": 150
},
{
"epoch": 2.64,
"grad_norm": 2.2595799598751447,
"learning_rate": 2.94e-05,
"loss": 0.2212,
"step": 165
},
{
"epoch": 2.88,
"grad_norm": 2.573622729528534,
"learning_rate": 2.64e-05,
"loss": 0.1586,
"step": 180
},
{
"epoch": 3.12,
"grad_norm": 2.2604733828967016,
"learning_rate": 2.3400000000000003e-05,
"loss": 0.1431,
"step": 195
},
{
"epoch": 3.36,
"grad_norm": 1.4987841874421017,
"learning_rate": 2.04e-05,
"loss": 0.1047,
"step": 210
},
{
"epoch": 3.6,
"grad_norm": 0.6724445238800598,
"learning_rate": 1.74e-05,
"loss": 0.0896,
"step": 225
},
{
"epoch": 3.84,
"grad_norm": 1.3552689605969652,
"learning_rate": 1.44e-05,
"loss": 0.0871,
"step": 240
}
],
"logging_steps": 15,
"max_steps": 312,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 62,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|