plip commited on
Commit
80dcf4a
1 Parent(s): 546540b

Training in progress, step 420000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23757c0288979d27bb8e7d987ca91414953ed2fb98096163be33a545f5af5ab4
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac94a60c95ba2596b6d1d05315af6a3ea5bad0a2404c9fbebd7db8a3b7f370f8
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c16dce11ea3eb455970382761036fdc46346bc29673783c86afe6088466197d
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cf499ab54bc6b5b46a8f12dd922823d0795f7339d994c55bfa6c710a91461c
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359855992dc1f001a1fd11d5cfc159df4486cdababa23a98e30e2a0152bffc69
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359855992dc1f001a1fd11d5cfc159df4486cdababa23a98e30e2a0152bffc69
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359855992dc1f001a1fd11d5cfc159df4486cdababa23a98e30e2a0152bffc69
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359855992dc1f001a1fd11d5cfc159df4486cdababa23a98e30e2a0152bffc69
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359855992dc1f001a1fd11d5cfc159df4486cdababa23a98e30e2a0152bffc69
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359855992dc1f001a1fd11d5cfc159df4486cdababa23a98e30e2a0152bffc69
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359855992dc1f001a1fd11d5cfc159df4486cdababa23a98e30e2a0152bffc69
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359855992dc1f001a1fd11d5cfc159df4486cdababa23a98e30e2a0152bffc69
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:498c95fdcab578849c69bf4a10f0532f5bf3ba8048fa37c1b416b3afd0ea5f33
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21fa0c13fc0e3348f6228394f5e318945295debe26ba21ec91b2c06a47593869
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.44905448799633,
5
- "global_step": 410000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8206,11 +8206,211 @@
8206
  "eval_samples_per_second": 742.097,
8207
  "eval_steps_per_second": 11.874,
8208
  "step": 410000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8209
  }
8210
  ],
8211
  "max_steps": 500000,
8212
  "num_train_epochs": 13,
8213
- "total_flos": 1.309883790062405e+22,
8214
  "trial_name": null,
8215
  "trial_params": null
8216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.703909475508436,
5
+ "global_step": 420000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8206
  "eval_samples_per_second": 742.097,
8207
  "eval_steps_per_second": 11.874,
8208
  "step": 410000
8209
+ },
8210
+ {
8211
+ "epoch": 10.46,
8212
+ "learning_rate": 3.4670510086189736e-05,
8213
+ "loss": 0.271,
8214
+ "step": 410500
8215
+ },
8216
+ {
8217
+ "epoch": 10.47,
8218
+ "learning_rate": 3.440361659286563e-05,
8219
+ "loss": 0.2712,
8220
+ "step": 411000
8221
+ },
8222
+ {
8223
+ "epoch": 10.47,
8224
+ "eval_loss": 0.8162291049957275,
8225
+ "eval_runtime": 1.3642,
8226
+ "eval_samples_per_second": 733.056,
8227
+ "eval_steps_per_second": 11.729,
8228
+ "step": 411000
8229
+ },
8230
+ {
8231
+ "epoch": 10.49,
8232
+ "learning_rate": 3.413804192226918e-05,
8233
+ "loss": 0.271,
8234
+ "step": 411500
8235
+ },
8236
+ {
8237
+ "epoch": 10.5,
8238
+ "learning_rate": 3.387378897868246e-05,
8239
+ "loss": 0.2709,
8240
+ "step": 412000
8241
+ },
8242
+ {
8243
+ "epoch": 10.5,
8244
+ "eval_loss": 0.8157584071159363,
8245
+ "eval_runtime": 1.3922,
8246
+ "eval_samples_per_second": 718.295,
8247
+ "eval_steps_per_second": 11.493,
8248
+ "step": 412000
8249
+ },
8250
+ {
8251
+ "epoch": 10.51,
8252
+ "learning_rate": 3.361086065193336e-05,
8253
+ "loss": 0.2705,
8254
+ "step": 412500
8255
+ },
8256
+ {
8257
+ "epoch": 10.53,
8258
+ "learning_rate": 3.334925981736389e-05,
8259
+ "loss": 0.2706,
8260
+ "step": 413000
8261
+ },
8262
+ {
8263
+ "epoch": 10.53,
8264
+ "eval_loss": 0.8102970123291016,
8265
+ "eval_runtime": 1.343,
8266
+ "eval_samples_per_second": 744.614,
8267
+ "eval_steps_per_second": 11.914,
8268
+ "step": 413000
8269
+ },
8270
+ {
8271
+ "epoch": 10.54,
8272
+ "learning_rate": 3.3088989335798925e-05,
8273
+ "loss": 0.2707,
8274
+ "step": 413500
8275
+ },
8276
+ {
8277
+ "epoch": 10.55,
8278
+ "learning_rate": 3.283005205351467e-05,
8279
+ "loss": 0.2709,
8280
+ "step": 414000
8281
+ },
8282
+ {
8283
+ "epoch": 10.55,
8284
+ "eval_loss": 0.8069249987602234,
8285
+ "eval_runtime": 1.3236,
8286
+ "eval_samples_per_second": 755.504,
8287
+ "eval_steps_per_second": 12.088,
8288
+ "step": 414000
8289
+ },
8290
+ {
8291
+ "epoch": 10.56,
8292
+ "learning_rate": 3.2572450802207845e-05,
8293
+ "loss": 0.2705,
8294
+ "step": 414500
8295
+ },
8296
+ {
8297
+ "epoch": 10.58,
8298
+ "learning_rate": 3.2316188398964344e-05,
8299
+ "loss": 0.2706,
8300
+ "step": 415000
8301
+ },
8302
+ {
8303
+ "epoch": 10.58,
8304
+ "eval_loss": 0.812959611415863,
8305
+ "eval_runtime": 1.3895,
8306
+ "eval_samples_per_second": 719.683,
8307
+ "eval_steps_per_second": 11.515,
8308
+ "step": 415000
8309
+ },
8310
+ {
8311
+ "epoch": 10.59,
8312
+ "learning_rate": 3.206126764622888e-05,
8313
+ "loss": 0.2708,
8314
+ "step": 415500
8315
+ },
8316
+ {
8317
+ "epoch": 10.6,
8318
+ "learning_rate": 3.180769133177392e-05,
8319
+ "loss": 0.2706,
8320
+ "step": 416000
8321
+ },
8322
+ {
8323
+ "epoch": 10.6,
8324
+ "eval_loss": 0.8125792145729065,
8325
+ "eval_runtime": 1.3667,
8326
+ "eval_samples_per_second": 731.714,
8327
+ "eval_steps_per_second": 11.707,
8328
+ "step": 416000
8329
+ },
8330
+ {
8331
+ "epoch": 10.61,
8332
+ "learning_rate": 3.155546222866939e-05,
8333
+ "loss": 0.2704,
8334
+ "step": 416500
8335
+ },
8336
+ {
8337
+ "epoch": 10.63,
8338
+ "learning_rate": 3.130458309525239e-05,
8339
+ "loss": 0.2704,
8340
+ "step": 417000
8341
+ },
8342
+ {
8343
+ "epoch": 10.63,
8344
+ "eval_loss": 0.8181024193763733,
8345
+ "eval_runtime": 1.3224,
8346
+ "eval_samples_per_second": 756.173,
8347
+ "eval_steps_per_second": 12.099,
8348
+ "step": 417000
8349
+ },
8350
+ {
8351
+ "epoch": 10.64,
8352
+ "learning_rate": 3.1055056675096826e-05,
8353
+ "loss": 0.2702,
8354
+ "step": 417500
8355
+ },
8356
+ {
8357
+ "epoch": 10.65,
8358
+ "learning_rate": 3.0806885696983816e-05,
8359
+ "loss": 0.2704,
8360
+ "step": 418000
8361
+ },
8362
+ {
8363
+ "epoch": 10.65,
8364
+ "eval_loss": 0.8099979162216187,
8365
+ "eval_runtime": 1.388,
8366
+ "eval_samples_per_second": 720.469,
8367
+ "eval_steps_per_second": 11.528,
8368
+ "step": 418000
8369
+ },
8370
+ {
8371
+ "epoch": 10.67,
8372
+ "learning_rate": 3.056007287487128e-05,
8373
+ "loss": 0.2705,
8374
+ "step": 418500
8375
+ },
8376
+ {
8377
+ "epoch": 10.68,
8378
+ "learning_rate": 3.0314620907864744e-05,
8379
+ "loss": 0.2702,
8380
+ "step": 419000
8381
+ },
8382
+ {
8383
+ "epoch": 10.68,
8384
+ "eval_loss": 0.8089344501495361,
8385
+ "eval_runtime": 1.3483,
8386
+ "eval_samples_per_second": 741.661,
8387
+ "eval_steps_per_second": 11.867,
8388
+ "step": 419000
8389
+ },
8390
+ {
8391
+ "epoch": 10.69,
8392
+ "learning_rate": 3.0070532480187637e-05,
8393
+ "loss": 0.2701,
8394
+ "step": 419500
8395
+ },
8396
+ {
8397
+ "epoch": 10.7,
8398
+ "learning_rate": 2.9827810261151784e-05,
8399
+ "loss": 0.2702,
8400
+ "step": 420000
8401
+ },
8402
+ {
8403
+ "epoch": 10.7,
8404
+ "eval_loss": 0.8132815957069397,
8405
+ "eval_runtime": 1.3269,
8406
+ "eval_samples_per_second": 753.638,
8407
+ "eval_steps_per_second": 12.058,
8408
+ "step": 420000
8409
  }
8410
  ],
8411
  "max_steps": 500000,
8412
  "num_train_epochs": 13,
8413
+ "total_flos": 1.341832503939855e+22,
8414
  "trial_name": null,
8415
  "trial_params": null
8416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c16dce11ea3eb455970382761036fdc46346bc29673783c86afe6088466197d
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cf499ab54bc6b5b46a8f12dd922823d0795f7339d994c55bfa6c710a91461c
3
  size 102501541