Training in progress, step 260000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66c0b735c70ca1ca003eab9d686d7e3ea552331bf016385c8fad2dfe60c740ac
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8e8c193ef48de967d7501e0b15a06f4b379c56e8efdf426bfb091824f5c716b
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89edb9f9ff26027cc98aa82c2133f2e1b82d5b92ed39de235eb4cb2271e68c3d
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54ee6f005be263ffc1028eeae009e64fa85afa0eb3360fe9f44a8e6025237de1
|
3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3500ada53b759cb5c8307ed7d4c05bb6055c04f57f48c3b210c40f88bacd59f
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9805eeb18ee0853d0258572c1ad542ff4bc6ff2393ff8df1da096af07bacd6d
|
3 |
+
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b00333ce97a5a96a0369400120eb38a7bfd7ee5df94e91c7d6993e0e7f5524ba
|
3 |
+
size 14439
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4802de77ca51e4efac2063b9b4a2ac08f4acde74a027da9b43cf90af44cf0108
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14439
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f2a04b63259c1a14b093267b1fd26d6b21fab4af2fc765329473d9fca239907
|
3 |
size 14439
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f74dc2cbdc738c6fbf513addd5269b878530a18e75ada83dcaec9120da52354f
|
3 |
+
size 14439
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b7fe86b6bf62db9f7989d6e264b9b70447a29a8d4bbea419af77ab1989ca356
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5006,11 +5006,211 @@
|
|
5006 |
"eval_samples_per_second": 1847.755,
|
5007 |
"eval_steps_per_second": 29.564,
|
5008 |
"step": 250000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5009 |
}
|
5010 |
],
|
5011 |
"max_steps": 500000,
|
5012 |
"num_train_epochs": 16,
|
5013 |
-
"total_flos":
|
5014 |
"trial_name": null,
|
5015 |
"trial_params": null
|
5016 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.965930328747817,
|
5 |
+
"global_step": 260000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5006 |
"eval_samples_per_second": 1847.755,
|
5007 |
"eval_steps_per_second": 29.564,
|
5008 |
"step": 250000
|
5009 |
+
},
|
5010 |
+
{
|
5011 |
+
"epoch": 7.67,
|
5012 |
+
"learning_rate": 0.00016649607200138356,
|
5013 |
+
"loss": 0.3383,
|
5014 |
+
"step": 250500
|
5015 |
+
},
|
5016 |
+
{
|
5017 |
+
"epoch": 7.69,
|
5018 |
+
"learning_rate": 0.00016601801319007743,
|
5019 |
+
"loss": 0.3382,
|
5020 |
+
"step": 251000
|
5021 |
+
},
|
5022 |
+
{
|
5023 |
+
"epoch": 7.69,
|
5024 |
+
"eval_loss": 0.782693088054657,
|
5025 |
+
"eval_runtime": 0.5248,
|
5026 |
+
"eval_samples_per_second": 1905.624,
|
5027 |
+
"eval_steps_per_second": 30.49,
|
5028 |
+
"step": 251000
|
5029 |
+
},
|
5030 |
+
{
|
5031 |
+
"epoch": 7.71,
|
5032 |
+
"learning_rate": 0.00016553983388754428,
|
5033 |
+
"loss": 0.3381,
|
5034 |
+
"step": 251500
|
5035 |
+
},
|
5036 |
+
{
|
5037 |
+
"epoch": 7.72,
|
5038 |
+
"learning_rate": 0.00016506153932307636,
|
5039 |
+
"loss": 0.3377,
|
5040 |
+
"step": 252000
|
5041 |
+
},
|
5042 |
+
{
|
5043 |
+
"epoch": 7.72,
|
5044 |
+
"eval_loss": 0.7844077944755554,
|
5045 |
+
"eval_runtime": 0.5281,
|
5046 |
+
"eval_samples_per_second": 1893.751,
|
5047 |
+
"eval_steps_per_second": 30.3,
|
5048 |
+
"step": 252000
|
5049 |
+
},
|
5050 |
+
{
|
5051 |
+
"epoch": 7.74,
|
5052 |
+
"learning_rate": 0.00016458313472722638,
|
5053 |
+
"loss": 0.3376,
|
5054 |
+
"step": 252500
|
5055 |
+
},
|
5056 |
+
{
|
5057 |
+
"epoch": 7.75,
|
5058 |
+
"learning_rate": 0.00016410462533175045,
|
5059 |
+
"loss": 0.3377,
|
5060 |
+
"step": 253000
|
5061 |
+
},
|
5062 |
+
{
|
5063 |
+
"epoch": 7.75,
|
5064 |
+
"eval_loss": 0.7837400436401367,
|
5065 |
+
"eval_runtime": 0.5195,
|
5066 |
+
"eval_samples_per_second": 1924.894,
|
5067 |
+
"eval_steps_per_second": 30.798,
|
5068 |
+
"step": 253000
|
5069 |
+
},
|
5070 |
+
{
|
5071 |
+
"epoch": 7.77,
|
5072 |
+
"learning_rate": 0.00016362601636955049,
|
5073 |
+
"loss": 0.3378,
|
5074 |
+
"step": 253500
|
5075 |
+
},
|
5076 |
+
{
|
5077 |
+
"epoch": 7.78,
|
5078 |
+
"learning_rate": 0.00016314731307461754,
|
5079 |
+
"loss": 0.3377,
|
5080 |
+
"step": 254000
|
5081 |
+
},
|
5082 |
+
{
|
5083 |
+
"epoch": 7.78,
|
5084 |
+
"eval_loss": 0.7803494930267334,
|
5085 |
+
"eval_runtime": 0.5126,
|
5086 |
+
"eval_samples_per_second": 1950.683,
|
5087 |
+
"eval_steps_per_second": 31.211,
|
5088 |
+
"step": 254000
|
5089 |
+
},
|
5090 |
+
{
|
5091 |
+
"epoch": 7.8,
|
5092 |
+
"learning_rate": 0.0001626685206819742,
|
5093 |
+
"loss": 0.3376,
|
5094 |
+
"step": 254500
|
5095 |
+
},
|
5096 |
+
{
|
5097 |
+
"epoch": 7.81,
|
5098 |
+
"learning_rate": 0.0001621896444276172,
|
5099 |
+
"loss": 0.337,
|
5100 |
+
"step": 255000
|
5101 |
+
},
|
5102 |
+
{
|
5103 |
+
"epoch": 7.81,
|
5104 |
+
"eval_loss": 0.7785842418670654,
|
5105 |
+
"eval_runtime": 0.5059,
|
5106 |
+
"eval_samples_per_second": 1976.634,
|
5107 |
+
"eval_steps_per_second": 31.626,
|
5108 |
+
"step": 255000
|
5109 |
+
},
|
5110 |
+
{
|
5111 |
+
"epoch": 7.83,
|
5112 |
+
"learning_rate": 0.00016171068954846067,
|
5113 |
+
"loss": 0.3373,
|
5114 |
+
"step": 255500
|
5115 |
+
},
|
5116 |
+
{
|
5117 |
+
"epoch": 7.84,
|
5118 |
+
"learning_rate": 0.00016123166128227835,
|
5119 |
+
"loss": 0.3372,
|
5120 |
+
"step": 256000
|
5121 |
+
},
|
5122 |
+
{
|
5123 |
+
"epoch": 7.84,
|
5124 |
+
"eval_loss": 0.7810379266738892,
|
5125 |
+
"eval_runtime": 0.5109,
|
5126 |
+
"eval_samples_per_second": 1957.233,
|
5127 |
+
"eval_steps_per_second": 31.316,
|
5128 |
+
"step": 256000
|
5129 |
+
},
|
5130 |
+
{
|
5131 |
+
"epoch": 7.86,
|
5132 |
+
"learning_rate": 0.0001607525648676467,
|
5133 |
+
"loss": 0.3368,
|
5134 |
+
"step": 256500
|
5135 |
+
},
|
5136 |
+
{
|
5137 |
+
"epoch": 7.87,
|
5138 |
+
"learning_rate": 0.0001602734055438873,
|
5139 |
+
"loss": 0.337,
|
5140 |
+
"step": 257000
|
5141 |
+
},
|
5142 |
+
{
|
5143 |
+
"epoch": 7.87,
|
5144 |
+
"eval_loss": 0.7789760828018188,
|
5145 |
+
"eval_runtime": 0.541,
|
5146 |
+
"eval_samples_per_second": 1848.491,
|
5147 |
+
"eval_steps_per_second": 29.576,
|
5148 |
+
"step": 257000
|
5149 |
+
},
|
5150 |
+
{
|
5151 |
+
"epoch": 7.89,
|
5152 |
+
"learning_rate": 0.00015979418855100963,
|
5153 |
+
"loss": 0.3367,
|
5154 |
+
"step": 257500
|
5155 |
+
},
|
5156 |
+
{
|
5157 |
+
"epoch": 7.9,
|
5158 |
+
"learning_rate": 0.00015931491912965417,
|
5159 |
+
"loss": 0.3366,
|
5160 |
+
"step": 258000
|
5161 |
+
},
|
5162 |
+
{
|
5163 |
+
"epoch": 7.9,
|
5164 |
+
"eval_loss": 0.7777426838874817,
|
5165 |
+
"eval_runtime": 0.5219,
|
5166 |
+
"eval_samples_per_second": 1916.204,
|
5167 |
+
"eval_steps_per_second": 30.659,
|
5168 |
+
"step": 258000
|
5169 |
+
},
|
5170 |
+
{
|
5171 |
+
"epoch": 7.92,
|
5172 |
+
"learning_rate": 0.0001588356025210344,
|
5173 |
+
"loss": 0.3369,
|
5174 |
+
"step": 258500
|
5175 |
+
},
|
5176 |
+
{
|
5177 |
+
"epoch": 7.94,
|
5178 |
+
"learning_rate": 0.00015835624396688,
|
5179 |
+
"loss": 0.3364,
|
5180 |
+
"step": 259000
|
5181 |
+
},
|
5182 |
+
{
|
5183 |
+
"epoch": 7.94,
|
5184 |
+
"eval_loss": 0.7759175300598145,
|
5185 |
+
"eval_runtime": 0.5164,
|
5186 |
+
"eval_samples_per_second": 1936.553,
|
5187 |
+
"eval_steps_per_second": 30.985,
|
5188 |
+
"step": 259000
|
5189 |
+
},
|
5190 |
+
{
|
5191 |
+
"epoch": 7.95,
|
5192 |
+
"learning_rate": 0.00015787684870937924,
|
5193 |
+
"loss": 0.3364,
|
5194 |
+
"step": 259500
|
5195 |
+
},
|
5196 |
+
{
|
5197 |
+
"epoch": 7.97,
|
5198 |
+
"learning_rate": 0.00015739742199112196,
|
5199 |
+
"loss": 0.3364,
|
5200 |
+
"step": 260000
|
5201 |
+
},
|
5202 |
+
{
|
5203 |
+
"epoch": 7.97,
|
5204 |
+
"eval_loss": 0.7805649638175964,
|
5205 |
+
"eval_runtime": 0.5211,
|
5206 |
+
"eval_samples_per_second": 1919.019,
|
5207 |
+
"eval_steps_per_second": 30.704,
|
5208 |
+
"step": 260000
|
5209 |
}
|
5210 |
],
|
5211 |
"max_steps": 500000,
|
5212 |
"num_train_epochs": 16,
|
5213 |
+
"total_flos": 8.306609692473169e+21,
|
5214 |
"trial_name": null,
|
5215 |
"trial_params": null
|
5216 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8e8c193ef48de967d7501e0b15a06f4b379c56e8efdf426bfb091824f5c716b
|
3 |
size 102501541
|