plip commited on
Commit
cdf95c9
1 Parent(s): 4118a39

Training in progress, step 420000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07f03ea18fd5d4a8b1e4e3c742e1d56a40794f2a3b257ce44d2264feed3af286
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6c710dd18f96e58101abb4543f93762b3065e883b1aa3865d95449d6dcff2ef
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28c6809c972a6eabd269160656909344105b6d4e4ab486435a7eb7ab6f03cd31
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26a1dec101691ebd7c104c916b6fbf2b961123d72841379924840b574ab6cdb5
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be43869b2d26de64620114c0cd31802e0f535f8b4066fd45c9c93b0dc7fc8193
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:482727048b96ef6295d46f8e1c756249fe05df8f4565790853af683f2f73d3d9
3
+ size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3ef4114c1e842456ef64f4180a6eab84b1c65069eb4d0f673451378faba5e88
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec58e861ecfc9f74cf3526c7f7e07d40e14496fd6bc4013607b9344e9c1e618d
3
+ size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d2f54aa3ce711401cd4ee540e9c92a11b601491f08344941d6924f5e6c29f97
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2aebd434102fdb5bc0174b7c6fb694db378bfa14b7f46447d196181b9cfcb39
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db27b64031de4e8fdf403dbbf855618a1ce97321f71e80ad2ee2fd8a0ecff85b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f00f2e685c708cfd0cbbb80e095c6bf343a9624eb506d7fe315e91e79d2cc8c7
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:111428f29ffec49187b68d16c54a6c29ee4fbb60ae7b0a594f8a3a5492faefcc
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8dd01bc2de28ac7cee0c2566f096951eb4ee6698f2cb4f09a40119c0f514708
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38ed3db5c699900337804672d3763c810ccb3d79001c4a96e25a3be74c283a16
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed4ca2d2c2eb8082634a2e8e07ce3220676e6442b8fc3172e0e2229e0f0ad540
3
+ size 14439
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fba203c82676ee0f0f6d805eb229d404fcf81bb695cf23304c62df491ffe623d
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a08068423bd0da8de286f2bfe952fac174abba9ea4b1b0d5c4dc9fed6c69200
3
+ size 14439
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18a98f099a2c4465105355e169c3be433597657bb0fd97136841e01a3bc9cfc2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a39f4a2af63c824a2393a7a188b35765b425e608da84400d9022e1ad2be2efc
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:498c95fdcab578849c69bf4a10f0532f5bf3ba8048fa37c1b416b3afd0ea5f33
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21fa0c13fc0e3348f6228394f5e318945295debe26ba21ec91b2c06a47593869
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.561659364563866,
5
- "global_step": 410000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8206,11 +8206,211 @@
8206
  "eval_samples_per_second": 1952.734,
8207
  "eval_steps_per_second": 31.244,
8208
  "step": 410000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8209
  }
8210
  ],
8211
  "max_steps": 500000,
8212
  "num_train_epochs": 16,
8213
- "total_flos": 1.309887683433079e+22,
8214
  "trial_name": null,
8215
  "trial_params": null
8216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.868041300284935,
5
+ "global_step": 420000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8206
  "eval_samples_per_second": 1952.734,
8207
  "eval_steps_per_second": 31.244,
8208
  "step": 410000
8209
+ },
8210
+ {
8211
+ "epoch": 12.58,
8212
+ "learning_rate": 3.4670510086189736e-05,
8213
+ "loss": 0.3172,
8214
+ "step": 410500
8215
+ },
8216
+ {
8217
+ "epoch": 12.59,
8218
+ "learning_rate": 3.440361659286563e-05,
8219
+ "loss": 0.3168,
8220
+ "step": 411000
8221
+ },
8222
+ {
8223
+ "epoch": 12.59,
8224
+ "eval_loss": 0.7748520374298096,
8225
+ "eval_runtime": 0.5115,
8226
+ "eval_samples_per_second": 1955.169,
8227
+ "eval_steps_per_second": 31.283,
8228
+ "step": 411000
8229
+ },
8230
+ {
8231
+ "epoch": 12.61,
8232
+ "learning_rate": 3.413804192226918e-05,
8233
+ "loss": 0.317,
8234
+ "step": 411500
8235
+ },
8236
+ {
8237
+ "epoch": 12.62,
8238
+ "learning_rate": 3.387378897868246e-05,
8239
+ "loss": 0.317,
8240
+ "step": 412000
8241
+ },
8242
+ {
8243
+ "epoch": 12.62,
8244
+ "eval_loss": 0.7790003418922424,
8245
+ "eval_runtime": 0.5251,
8246
+ "eval_samples_per_second": 1904.548,
8247
+ "eval_steps_per_second": 30.473,
8248
+ "step": 412000
8249
+ },
8250
+ {
8251
+ "epoch": 12.64,
8252
+ "learning_rate": 3.361086065193336e-05,
8253
+ "loss": 0.3169,
8254
+ "step": 412500
8255
+ },
8256
+ {
8257
+ "epoch": 12.65,
8258
+ "learning_rate": 3.334925981736389e-05,
8259
+ "loss": 0.3169,
8260
+ "step": 413000
8261
+ },
8262
+ {
8263
+ "epoch": 12.65,
8264
+ "eval_loss": 0.7779929041862488,
8265
+ "eval_runtime": 0.5032,
8266
+ "eval_samples_per_second": 1987.349,
8267
+ "eval_steps_per_second": 31.798,
8268
+ "step": 413000
8269
+ },
8270
+ {
8271
+ "epoch": 12.67,
8272
+ "learning_rate": 3.3088989335798925e-05,
8273
+ "loss": 0.3169,
8274
+ "step": 413500
8275
+ },
8276
+ {
8277
+ "epoch": 12.68,
8278
+ "learning_rate": 3.283005205351467e-05,
8279
+ "loss": 0.3165,
8280
+ "step": 414000
8281
+ },
8282
+ {
8283
+ "epoch": 12.68,
8284
+ "eval_loss": 0.7767853736877441,
8285
+ "eval_runtime": 0.5028,
8286
+ "eval_samples_per_second": 1988.956,
8287
+ "eval_steps_per_second": 31.823,
8288
+ "step": 414000
8289
+ },
8290
+ {
8291
+ "epoch": 12.7,
8292
+ "learning_rate": 3.2572450802207845e-05,
8293
+ "loss": 0.3169,
8294
+ "step": 414500
8295
+ },
8296
+ {
8297
+ "epoch": 12.71,
8298
+ "learning_rate": 3.2316188398964344e-05,
8299
+ "loss": 0.3165,
8300
+ "step": 415000
8301
+ },
8302
+ {
8303
+ "epoch": 12.71,
8304
+ "eval_loss": 0.7794355750083923,
8305
+ "eval_runtime": 0.5308,
8306
+ "eval_samples_per_second": 1884.074,
8307
+ "eval_steps_per_second": 30.145,
8308
+ "step": 415000
8309
+ },
8310
+ {
8311
+ "epoch": 12.73,
8312
+ "learning_rate": 3.206126764622888e-05,
8313
+ "loss": 0.3166,
8314
+ "step": 415500
8315
+ },
8316
+ {
8317
+ "epoch": 12.75,
8318
+ "learning_rate": 3.180769133177392e-05,
8319
+ "loss": 0.3165,
8320
+ "step": 416000
8321
+ },
8322
+ {
8323
+ "epoch": 12.75,
8324
+ "eval_loss": 0.7837159633636475,
8325
+ "eval_runtime": 0.5042,
8326
+ "eval_samples_per_second": 1983.371,
8327
+ "eval_steps_per_second": 31.734,
8328
+ "step": 416000
8329
+ },
8330
+ {
8331
+ "epoch": 12.76,
8332
+ "learning_rate": 3.155546222866939e-05,
8333
+ "loss": 0.3168,
8334
+ "step": 416500
8335
+ },
8336
+ {
8337
+ "epoch": 12.78,
8338
+ "learning_rate": 3.130458309525239e-05,
8339
+ "loss": 0.3164,
8340
+ "step": 417000
8341
+ },
8342
+ {
8343
+ "epoch": 12.78,
8344
+ "eval_loss": 0.7775956988334656,
8345
+ "eval_runtime": 0.5205,
8346
+ "eval_samples_per_second": 1921.225,
8347
+ "eval_steps_per_second": 30.74,
8348
+ "step": 417000
8349
+ },
8350
+ {
8351
+ "epoch": 12.79,
8352
+ "learning_rate": 3.1055056675096826e-05,
8353
+ "loss": 0.3165,
8354
+ "step": 417500
8355
+ },
8356
+ {
8357
+ "epoch": 12.81,
8358
+ "learning_rate": 3.0806885696983816e-05,
8359
+ "loss": 0.3165,
8360
+ "step": 418000
8361
+ },
8362
+ {
8363
+ "epoch": 12.81,
8364
+ "eval_loss": 0.7772942185401917,
8365
+ "eval_runtime": 0.5181,
8366
+ "eval_samples_per_second": 1929.962,
8367
+ "eval_steps_per_second": 30.879,
8368
+ "step": 418000
8369
+ },
8370
+ {
8371
+ "epoch": 12.82,
8372
+ "learning_rate": 3.056007287487128e-05,
8373
+ "loss": 0.3164,
8374
+ "step": 418500
8375
+ },
8376
+ {
8377
+ "epoch": 12.84,
8378
+ "learning_rate": 3.0314620907864744e-05,
8379
+ "loss": 0.3164,
8380
+ "step": 419000
8381
+ },
8382
+ {
8383
+ "epoch": 12.84,
8384
+ "eval_loss": 0.7730494141578674,
8385
+ "eval_runtime": 0.5018,
8386
+ "eval_samples_per_second": 1992.671,
8387
+ "eval_steps_per_second": 31.883,
8388
+ "step": 419000
8389
+ },
8390
+ {
8391
+ "epoch": 12.85,
8392
+ "learning_rate": 3.0070532480187637e-05,
8393
+ "loss": 0.3164,
8394
+ "step": 419500
8395
+ },
8396
+ {
8397
+ "epoch": 12.87,
8398
+ "learning_rate": 2.9827810261151784e-05,
8399
+ "loss": 0.3161,
8400
+ "step": 420000
8401
+ },
8402
+ {
8403
+ "epoch": 12.87,
8404
+ "eval_loss": 0.7735152840614319,
8405
+ "eval_runtime": 0.5135,
8406
+ "eval_samples_per_second": 1947.554,
8407
+ "eval_steps_per_second": 31.161,
8408
+ "step": 420000
8409
  }
8410
  ],
8411
  "max_steps": 500000,
8412
  "num_train_epochs": 16,
8413
+ "total_flos": 1.341836397310529e+22,
8414
  "trial_name": null,
8415
  "trial_params": null
8416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28c6809c972a6eabd269160656909344105b6d4e4ab486435a7eb7ab6f03cd31
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26a1dec101691ebd7c104c916b6fbf2b961123d72841379924840b574ab6cdb5
3
  size 102501541