Training in progress, step 270000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0f45e9ea7aeadf9f10b1643bf00c0b8ef58ef51944d08e1b0ad72f902bd82cd
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1752842aa50d3948c8a46f98f668fd33584b25521cc864747236962908637e8
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cb61348726887e329b19406ea4e3e39ac391edeec6dfd8508b3cb524aa33e28
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5206,11 +5206,211 @@
|
|
5206 |
"eval_samples_per_second": 748.473,
|
5207 |
"eval_steps_per_second": 11.976,
|
5208 |
"step": 260000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5209 |
}
|
5210 |
],
|
5211 |
"max_steps": 500000,
|
5212 |
"num_train_epochs": 13,
|
5213 |
-
"total_flos": 8.
|
5214 |
"trial_name": null,
|
5215 |
"trial_params": null
|
5216 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.881084662826852,
|
5 |
+
"global_step": 270000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5206 |
"eval_samples_per_second": 748.473,
|
5207 |
"eval_steps_per_second": 11.976,
|
5208 |
"step": 260000
|
5209 |
+
},
|
5210 |
+
{
|
5211 |
+
"epoch": 6.64,
|
5212 |
+
"learning_rate": 0.00015691796905504187,
|
5213 |
+
"loss": 0.2862,
|
5214 |
+
"step": 260500
|
5215 |
+
},
|
5216 |
+
{
|
5217 |
+
"epoch": 6.65,
|
5218 |
+
"learning_rate": 0.00015643849514435944,
|
5219 |
+
"loss": 0.2871,
|
5220 |
+
"step": 261000
|
5221 |
+
},
|
5222 |
+
{
|
5223 |
+
"epoch": 6.65,
|
5224 |
+
"eval_loss": 0.8193889260292053,
|
5225 |
+
"eval_runtime": 1.2087,
|
5226 |
+
"eval_samples_per_second": 827.32,
|
5227 |
+
"eval_steps_per_second": 13.237,
|
5228 |
+
"step": 261000
|
5229 |
+
},
|
5230 |
+
{
|
5231 |
+
"epoch": 6.66,
|
5232 |
+
"learning_rate": 0.00015595900550252463,
|
5233 |
+
"loss": 0.2867,
|
5234 |
+
"step": 261500
|
5235 |
+
},
|
5236 |
+
{
|
5237 |
+
"epoch": 6.68,
|
5238 |
+
"learning_rate": 0.00015547950537315926,
|
5239 |
+
"loss": 0.2863,
|
5240 |
+
"step": 262000
|
5241 |
+
},
|
5242 |
+
{
|
5243 |
+
"epoch": 6.68,
|
5244 |
+
"eval_loss": 0.8058978319168091,
|
5245 |
+
"eval_runtime": 1.2707,
|
5246 |
+
"eval_samples_per_second": 786.99,
|
5247 |
+
"eval_steps_per_second": 12.592,
|
5248 |
+
"step": 262000
|
5249 |
+
},
|
5250 |
+
{
|
5251 |
+
"epoch": 6.69,
|
5252 |
+
"learning_rate": 0.00015499999999999997,
|
5253 |
+
"loss": 0.2862,
|
5254 |
+
"step": 262500
|
5255 |
+
},
|
5256 |
+
{
|
5257 |
+
"epoch": 6.7,
|
5258 |
+
"learning_rate": 0.00015452049462684068,
|
5259 |
+
"loss": 0.2864,
|
5260 |
+
"step": 263000
|
5261 |
+
},
|
5262 |
+
{
|
5263 |
+
"epoch": 6.7,
|
5264 |
+
"eval_loss": 0.8194664120674133,
|
5265 |
+
"eval_runtime": 1.2245,
|
5266 |
+
"eval_samples_per_second": 816.647,
|
5267 |
+
"eval_steps_per_second": 13.066,
|
5268 |
+
"step": 263000
|
5269 |
+
},
|
5270 |
+
{
|
5271 |
+
"epoch": 6.72,
|
5272 |
+
"learning_rate": 0.00015404099449747535,
|
5273 |
+
"loss": 0.2861,
|
5274 |
+
"step": 263500
|
5275 |
+
},
|
5276 |
+
{
|
5277 |
+
"epoch": 6.73,
|
5278 |
+
"learning_rate": 0.0001535615048556405,
|
5279 |
+
"loss": 0.2863,
|
5280 |
+
"step": 264000
|
5281 |
+
},
|
5282 |
+
{
|
5283 |
+
"epoch": 6.73,
|
5284 |
+
"eval_loss": 0.8099100589752197,
|
5285 |
+
"eval_runtime": 1.2591,
|
5286 |
+
"eval_samples_per_second": 794.217,
|
5287 |
+
"eval_steps_per_second": 12.707,
|
5288 |
+
"step": 264000
|
5289 |
+
},
|
5290 |
+
{
|
5291 |
+
"epoch": 6.74,
|
5292 |
+
"learning_rate": 0.0001530820309449581,
|
5293 |
+
"loss": 0.2861,
|
5294 |
+
"step": 264500
|
5295 |
+
},
|
5296 |
+
{
|
5297 |
+
"epoch": 6.75,
|
5298 |
+
"learning_rate": 0.00015260257800887798,
|
5299 |
+
"loss": 0.2868,
|
5300 |
+
"step": 265000
|
5301 |
+
},
|
5302 |
+
{
|
5303 |
+
"epoch": 6.75,
|
5304 |
+
"eval_loss": 0.8127309679985046,
|
5305 |
+
"eval_runtime": 1.3337,
|
5306 |
+
"eval_samples_per_second": 749.803,
|
5307 |
+
"eval_steps_per_second": 11.997,
|
5308 |
+
"step": 265000
|
5309 |
+
},
|
5310 |
+
{
|
5311 |
+
"epoch": 6.77,
|
5312 |
+
"learning_rate": 0.0001521231512906207,
|
5313 |
+
"loss": 0.2868,
|
5314 |
+
"step": 265500
|
5315 |
+
},
|
5316 |
+
{
|
5317 |
+
"epoch": 6.78,
|
5318 |
+
"learning_rate": 0.00015164375603311998,
|
5319 |
+
"loss": 0.2863,
|
5320 |
+
"step": 266000
|
5321 |
+
},
|
5322 |
+
{
|
5323 |
+
"epoch": 6.78,
|
5324 |
+
"eval_loss": 0.806861162185669,
|
5325 |
+
"eval_runtime": 1.2726,
|
5326 |
+
"eval_samples_per_second": 785.768,
|
5327 |
+
"eval_steps_per_second": 12.572,
|
5328 |
+
"step": 266000
|
5329 |
+
},
|
5330 |
+
{
|
5331 |
+
"epoch": 6.79,
|
5332 |
+
"learning_rate": 0.00015116439747896553,
|
5333 |
+
"loss": 0.2856,
|
5334 |
+
"step": 266500
|
5335 |
+
},
|
5336 |
+
{
|
5337 |
+
"epoch": 6.8,
|
5338 |
+
"learning_rate": 0.00015068508087034578,
|
5339 |
+
"loss": 0.2854,
|
5340 |
+
"step": 267000
|
5341 |
+
},
|
5342 |
+
{
|
5343 |
+
"epoch": 6.8,
|
5344 |
+
"eval_loss": 0.8032740354537964,
|
5345 |
+
"eval_runtime": 1.28,
|
5346 |
+
"eval_samples_per_second": 781.242,
|
5347 |
+
"eval_steps_per_second": 12.5,
|
5348 |
+
"step": 267000
|
5349 |
+
},
|
5350 |
+
{
|
5351 |
+
"epoch": 6.82,
|
5352 |
+
"learning_rate": 0.00015020581144899027,
|
5353 |
+
"loss": 0.2857,
|
5354 |
+
"step": 267500
|
5355 |
+
},
|
5356 |
+
{
|
5357 |
+
"epoch": 6.83,
|
5358 |
+
"learning_rate": 0.0001497265944561127,
|
5359 |
+
"loss": 0.2855,
|
5360 |
+
"step": 268000
|
5361 |
+
},
|
5362 |
+
{
|
5363 |
+
"epoch": 6.83,
|
5364 |
+
"eval_loss": 0.8096611499786377,
|
5365 |
+
"eval_runtime": 1.2917,
|
5366 |
+
"eval_samples_per_second": 774.163,
|
5367 |
+
"eval_steps_per_second": 12.387,
|
5368 |
+
"step": 268000
|
5369 |
+
},
|
5370 |
+
{
|
5371 |
+
"epoch": 6.84,
|
5372 |
+
"learning_rate": 0.00014924743513235327,
|
5373 |
+
"loss": 0.2856,
|
5374 |
+
"step": 268500
|
5375 |
+
},
|
5376 |
+
{
|
5377 |
+
"epoch": 6.86,
|
5378 |
+
"learning_rate": 0.0001487683387177216,
|
5379 |
+
"loss": 0.2864,
|
5380 |
+
"step": 269000
|
5381 |
+
},
|
5382 |
+
{
|
5383 |
+
"epoch": 6.86,
|
5384 |
+
"eval_loss": 0.8095938563346863,
|
5385 |
+
"eval_runtime": 1.2896,
|
5386 |
+
"eval_samples_per_second": 775.42,
|
5387 |
+
"eval_steps_per_second": 12.407,
|
5388 |
+
"step": 269000
|
5389 |
+
},
|
5390 |
+
{
|
5391 |
+
"epoch": 6.87,
|
5392 |
+
"learning_rate": 0.00014828931045153928,
|
5393 |
+
"loss": 0.2857,
|
5394 |
+
"step": 269500
|
5395 |
+
},
|
5396 |
+
{
|
5397 |
+
"epoch": 6.88,
|
5398 |
+
"learning_rate": 0.00014781035557238272,
|
5399 |
+
"loss": 0.2865,
|
5400 |
+
"step": 270000
|
5401 |
+
},
|
5402 |
+
{
|
5403 |
+
"epoch": 6.88,
|
5404 |
+
"eval_loss": 0.8193797469139099,
|
5405 |
+
"eval_runtime": 1.2225,
|
5406 |
+
"eval_samples_per_second": 817.974,
|
5407 |
+
"eval_steps_per_second": 13.088,
|
5408 |
+
"step": 270000
|
5409 |
}
|
5410 |
],
|
5411 |
"max_steps": 500000,
|
5412 |
"num_train_epochs": 13,
|
5413 |
+
"total_flos": 8.62607187343323e+21,
|
5414 |
"trial_name": null,
|
5415 |
"trial_params": null
|
5416 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1752842aa50d3948c8a46f98f668fd33584b25521cc864747236962908637e8
|
3 |
size 102501541
|