Training in progress, step 270000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +2 -2
- last-checkpoint/rng_state_7.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:185444010f2414af8f6b292f79fa769076772990bf1219a1dafd09b6faae29a4
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94365aa60c35fc7ec52e5e8ba19311622aaab34d18bf7ce3dd77401881509519
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6f6ae6faade50eb043968b5667df29128b1e4a2530013bd32474d5a36afc850
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cc3f7b88227092a0043ff66b55085eb9bd377bb70cc891cc293b48f870db21f
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f3a5a051f0b0618eb7cb1692de034b0ac1fd365c0c181b09a598798b6235801
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9c6b8fbeaadc53fb4ee209c1d104938ceaf8c8a8cbc2fa87ebcabcc6284da17
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14439
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d753925d97c5520e660dcdd16394471ced5c4bc24193ed0a377ef70d8717a46
|
3 |
size 14439
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdee02ae130781f905df4745f98c06bad459194317ec411b25af7a96f282fee2
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:823a83d9a98cde647a58d74cc1bea63c670933d602a5a07e7fc2bfa68d3b9e32
|
3 |
+
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4ef1c7732ec4132391d0d6a205bc292fbd5fe79d85d00447b48ac3c30b01e18
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cb61348726887e329b19406ea4e3e39ac391edeec6dfd8508b3cb524aa33e28
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5206,11 +5206,211 @@
|
|
5206 |
"eval_samples_per_second": 1919.019,
|
5207 |
"eval_steps_per_second": 30.704,
|
5208 |
"step": 260000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5209 |
}
|
5210 |
],
|
5211 |
"max_steps": 500000,
|
5212 |
"num_train_epochs": 16,
|
5213 |
-
"total_flos": 8.
|
5214 |
"trial_name": null,
|
5215 |
"trial_params": null
|
5216 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.272312264468887,
|
5 |
+
"global_step": 270000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5206 |
"eval_samples_per_second": 1919.019,
|
5207 |
"eval_steps_per_second": 30.704,
|
5208 |
"step": 260000
|
5209 |
+
},
|
5210 |
+
{
|
5211 |
+
"epoch": 7.98,
|
5212 |
+
"learning_rate": 0.00015691796905504187,
|
5213 |
+
"loss": 0.3366,
|
5214 |
+
"step": 260500
|
5215 |
+
},
|
5216 |
+
{
|
5217 |
+
"epoch": 8.0,
|
5218 |
+
"learning_rate": 0.00015643849514435944,
|
5219 |
+
"loss": 0.3364,
|
5220 |
+
"step": 261000
|
5221 |
+
},
|
5222 |
+
{
|
5223 |
+
"epoch": 8.0,
|
5224 |
+
"eval_loss": 0.776075005531311,
|
5225 |
+
"eval_runtime": 0.5349,
|
5226 |
+
"eval_samples_per_second": 1869.618,
|
5227 |
+
"eval_steps_per_second": 29.914,
|
5228 |
+
"step": 261000
|
5229 |
+
},
|
5230 |
+
{
|
5231 |
+
"epoch": 8.01,
|
5232 |
+
"learning_rate": 0.00015595900550252463,
|
5233 |
+
"loss": 0.3362,
|
5234 |
+
"step": 261500
|
5235 |
+
},
|
5236 |
+
{
|
5237 |
+
"epoch": 8.03,
|
5238 |
+
"learning_rate": 0.00015547950537315926,
|
5239 |
+
"loss": 0.3363,
|
5240 |
+
"step": 262000
|
5241 |
+
},
|
5242 |
+
{
|
5243 |
+
"epoch": 8.03,
|
5244 |
+
"eval_loss": 0.7857484221458435,
|
5245 |
+
"eval_runtime": 0.538,
|
5246 |
+
"eval_samples_per_second": 1858.594,
|
5247 |
+
"eval_steps_per_second": 29.738,
|
5248 |
+
"step": 262000
|
5249 |
+
},
|
5250 |
+
{
|
5251 |
+
"epoch": 8.04,
|
5252 |
+
"learning_rate": 0.00015499999999999997,
|
5253 |
+
"loss": 0.336,
|
5254 |
+
"step": 262500
|
5255 |
+
},
|
5256 |
+
{
|
5257 |
+
"epoch": 8.06,
|
5258 |
+
"learning_rate": 0.00015452049462684068,
|
5259 |
+
"loss": 0.3359,
|
5260 |
+
"step": 263000
|
5261 |
+
},
|
5262 |
+
{
|
5263 |
+
"epoch": 8.06,
|
5264 |
+
"eval_loss": 0.7803733348846436,
|
5265 |
+
"eval_runtime": 0.5241,
|
5266 |
+
"eval_samples_per_second": 1908.102,
|
5267 |
+
"eval_steps_per_second": 30.53,
|
5268 |
+
"step": 263000
|
5269 |
+
},
|
5270 |
+
{
|
5271 |
+
"epoch": 8.07,
|
5272 |
+
"learning_rate": 0.00015404099449747535,
|
5273 |
+
"loss": 0.3356,
|
5274 |
+
"step": 263500
|
5275 |
+
},
|
5276 |
+
{
|
5277 |
+
"epoch": 8.09,
|
5278 |
+
"learning_rate": 0.0001535615048556405,
|
5279 |
+
"loss": 0.3357,
|
5280 |
+
"step": 264000
|
5281 |
+
},
|
5282 |
+
{
|
5283 |
+
"epoch": 8.09,
|
5284 |
+
"eval_loss": 0.7824040651321411,
|
5285 |
+
"eval_runtime": 0.5311,
|
5286 |
+
"eval_samples_per_second": 1882.719,
|
5287 |
+
"eval_steps_per_second": 30.124,
|
5288 |
+
"step": 264000
|
5289 |
+
},
|
5290 |
+
{
|
5291 |
+
"epoch": 8.1,
|
5292 |
+
"learning_rate": 0.0001530820309449581,
|
5293 |
+
"loss": 0.3355,
|
5294 |
+
"step": 264500
|
5295 |
+
},
|
5296 |
+
{
|
5297 |
+
"epoch": 8.12,
|
5298 |
+
"learning_rate": 0.00015260257800887798,
|
5299 |
+
"loss": 0.3354,
|
5300 |
+
"step": 265000
|
5301 |
+
},
|
5302 |
+
{
|
5303 |
+
"epoch": 8.12,
|
5304 |
+
"eval_loss": 0.776350200176239,
|
5305 |
+
"eval_runtime": 0.5238,
|
5306 |
+
"eval_samples_per_second": 1908.966,
|
5307 |
+
"eval_steps_per_second": 30.543,
|
5308 |
+
"step": 265000
|
5309 |
+
},
|
5310 |
+
{
|
5311 |
+
"epoch": 8.13,
|
5312 |
+
"learning_rate": 0.0001521231512906207,
|
5313 |
+
"loss": 0.3359,
|
5314 |
+
"step": 265500
|
5315 |
+
},
|
5316 |
+
{
|
5317 |
+
"epoch": 8.15,
|
5318 |
+
"learning_rate": 0.00015164375603311998,
|
5319 |
+
"loss": 0.3355,
|
5320 |
+
"step": 266000
|
5321 |
+
},
|
5322 |
+
{
|
5323 |
+
"epoch": 8.15,
|
5324 |
+
"eval_loss": 0.7818763256072998,
|
5325 |
+
"eval_runtime": 0.5354,
|
5326 |
+
"eval_samples_per_second": 1867.774,
|
5327 |
+
"eval_steps_per_second": 29.884,
|
5328 |
+
"step": 266000
|
5329 |
+
},
|
5330 |
+
{
|
5331 |
+
"epoch": 8.17,
|
5332 |
+
"learning_rate": 0.00015116439747896553,
|
5333 |
+
"loss": 0.3364,
|
5334 |
+
"step": 266500
|
5335 |
+
},
|
5336 |
+
{
|
5337 |
+
"epoch": 8.18,
|
5338 |
+
"learning_rate": 0.00015068508087034578,
|
5339 |
+
"loss": 0.3352,
|
5340 |
+
"step": 267000
|
5341 |
+
},
|
5342 |
+
{
|
5343 |
+
"epoch": 8.18,
|
5344 |
+
"eval_loss": 0.7747774720191956,
|
5345 |
+
"eval_runtime": 0.5186,
|
5346 |
+
"eval_samples_per_second": 1928.442,
|
5347 |
+
"eval_steps_per_second": 30.855,
|
5348 |
+
"step": 267000
|
5349 |
+
},
|
5350 |
+
{
|
5351 |
+
"epoch": 8.2,
|
5352 |
+
"learning_rate": 0.00015020581144899027,
|
5353 |
+
"loss": 0.335,
|
5354 |
+
"step": 267500
|
5355 |
+
},
|
5356 |
+
{
|
5357 |
+
"epoch": 8.21,
|
5358 |
+
"learning_rate": 0.0001497265944561127,
|
5359 |
+
"loss": 0.3347,
|
5360 |
+
"step": 268000
|
5361 |
+
},
|
5362 |
+
{
|
5363 |
+
"epoch": 8.21,
|
5364 |
+
"eval_loss": 0.7758739590644836,
|
5365 |
+
"eval_runtime": 0.5149,
|
5366 |
+
"eval_samples_per_second": 1942.219,
|
5367 |
+
"eval_steps_per_second": 31.076,
|
5368 |
+
"step": 268000
|
5369 |
+
},
|
5370 |
+
{
|
5371 |
+
"epoch": 8.23,
|
5372 |
+
"learning_rate": 0.00014924743513235327,
|
5373 |
+
"loss": 0.3347,
|
5374 |
+
"step": 268500
|
5375 |
+
},
|
5376 |
+
{
|
5377 |
+
"epoch": 8.24,
|
5378 |
+
"learning_rate": 0.0001487683387177216,
|
5379 |
+
"loss": 0.3347,
|
5380 |
+
"step": 269000
|
5381 |
+
},
|
5382 |
+
{
|
5383 |
+
"epoch": 8.24,
|
5384 |
+
"eval_loss": 0.777352511882782,
|
5385 |
+
"eval_runtime": 0.511,
|
5386 |
+
"eval_samples_per_second": 1956.914,
|
5387 |
+
"eval_steps_per_second": 31.311,
|
5388 |
+
"step": 269000
|
5389 |
+
},
|
5390 |
+
{
|
5391 |
+
"epoch": 8.26,
|
5392 |
+
"learning_rate": 0.00014828931045153928,
|
5393 |
+
"loss": 0.6491,
|
5394 |
+
"step": 269500
|
5395 |
+
},
|
5396 |
+
{
|
5397 |
+
"epoch": 8.27,
|
5398 |
+
"learning_rate": 0.00014781035557238272,
|
5399 |
+
"loss": 0.737,
|
5400 |
+
"step": 270000
|
5401 |
+
},
|
5402 |
+
{
|
5403 |
+
"epoch": 8.27,
|
5404 |
+
"eval_loss": 0.9018945097923279,
|
5405 |
+
"eval_runtime": 0.5226,
|
5406 |
+
"eval_samples_per_second": 1913.356,
|
5407 |
+
"eval_steps_per_second": 30.614,
|
5408 |
+
"step": 270000
|
5409 |
}
|
5410 |
],
|
5411 |
"max_steps": 500000,
|
5412 |
"num_train_epochs": 16,
|
5413 |
+
"total_flos": 8.626088843295693e+21,
|
5414 |
"trial_name": null,
|
5415 |
"trial_params": null
|
5416 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94365aa60c35fc7ec52e5e8ba19311622aaab34d18bf7ce3dd77401881509519
|
3 |
size 102501541
|