plip commited on
Commit
cc88fea
1 Parent(s): 2573838

Training in progress, step 470000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3851379e290e595ff406c21b8b10ddb1e73359dd0f6752ee66fd50b92159710
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bc4138b4d57f8e4a675a838e7f7c445fa2f2850d452063b02b0b163a34a7798
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51cc55a03d0db3d8e5de6630971dddba20e9587291496d77623230dc60cf541c
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:399d2fd5053b20d89296de77093797a9b20b303b26b5c54e0ac988e918c3a2b8
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca511f19607ffe13077a404fa6aa93a3f99da5d803f11fc8cc4ffd982f7eaa96
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39649affd710e5288e8103a7ce71440cea00f53d7c4b1b77287e55e4fba903c9
3
+ size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4c5e63367d8a48da7291ac4cad9dbf22e45f23c04ad6dc36fb819eee5567b30
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24fdf31146f9e2573e4fd97d3ef660e7e36be173064143bba073688856627f33
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5ef310867dca4f3b22b044f838f0a52fca0a2ae9be44b0e97a33f54ca14b37c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5340a3b2a3128c15a2e26d8d92bda85367d97c29b7ab61268eaf585f02d028cc
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07690c863ee7f95b67ca0330c6c558692a4c5296256a3c0c1e6edf6ee11f4a99
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac2d9083cc062d05a152912781673b09c3efbb96bd37d28e564815b34b3ab7b4
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd623ae3bafd7bee294fb51f9ab76259bcc1a1e3cb80be656ec13fa7aea6663f
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a59c37be606f12c13a42a1df1734ee1dfacdb5c6d84f45ffa367935178d29c37
3
+ size 14439
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18f7a345bdac720f75c3e4ebf8a9d6d8895e230a0405992893aeb1c567a9dd75
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b5928441e789260c503b832ccd0e73fec0b6dedeb24fbfc4a89078adb4680e9
3
  size 14439
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:099663ce4dbefceb2b014dd475ab003207ed8b9ddbfafed2491187b9c10f927a
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cfa650386285df001bb485d9e06712c20b2d44c391fb3f87f5e92d68fae75c4
3
  size 14439
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4435aedaa43e7ef631652a5bc79634e81959ca100d2c5dc8b85db021834925b4
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e02ab0e747b127b3be6acd4dea7efa17b59fc5315b526da3be35482035d021d
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60fd80961b777bf4901f5c7189278f8f31f61a50c51a19e170f6a1919a5ce33
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d15e11a6de6abd55121a73bc214cc950fb971f927ae2b1d5067145da50de5d0
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.093569043169214,
5
- "global_step": 460000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9206,11 +9206,211 @@
9206
  "eval_samples_per_second": 1998.696,
9207
  "eval_steps_per_second": 31.979,
9208
  "step": 460000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9209
  }
9210
  ],
9211
  "max_steps": 500000,
9212
  "num_train_epochs": 16,
9213
- "total_flos": 1.4696296552299338e+22,
9214
  "trial_name": null,
9215
  "trial_params": null
9216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.399950978890285,
5
+ "global_step": 470000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9206
  "eval_samples_per_second": 1998.696,
9207
  "eval_steps_per_second": 31.979,
9208
  "step": 460000
9209
+ },
9210
+ {
9211
+ "epoch": 14.11,
9212
+ "learning_rate": 1.4920092237448903e-05,
9213
+ "loss": 0.3138,
9214
+ "step": 460500
9215
+ },
9216
+ {
9217
+ "epoch": 14.12,
9218
+ "learning_rate": 1.4797008356541874e-05,
9219
+ "loss": 0.3134,
9220
+ "step": 461000
9221
+ },
9222
+ {
9223
+ "epoch": 14.12,
9224
+ "eval_loss": 0.7742542028427124,
9225
+ "eval_runtime": 0.4997,
9226
+ "eval_samples_per_second": 2001.196,
9227
+ "eval_steps_per_second": 32.019,
9228
+ "step": 461000
9229
+ },
9230
+ {
9231
+ "epoch": 14.14,
9232
+ "learning_rate": 1.4675457713088947e-05,
9233
+ "loss": 0.3134,
9234
+ "step": 461500
9235
+ },
9236
+ {
9237
+ "epoch": 14.15,
9238
+ "learning_rate": 1.4555441636348494e-05,
9239
+ "loss": 0.3136,
9240
+ "step": 462000
9241
+ },
9242
+ {
9243
+ "epoch": 14.15,
9244
+ "eval_loss": 0.7772946357727051,
9245
+ "eval_runtime": 0.5121,
9246
+ "eval_samples_per_second": 1952.647,
9247
+ "eval_steps_per_second": 31.242,
9248
+ "step": 462000
9249
+ },
9250
+ {
9251
+ "epoch": 14.17,
9252
+ "learning_rate": 1.4436961438797095e-05,
9253
+ "loss": 0.3136,
9254
+ "step": 462500
9255
+ },
9256
+ {
9257
+ "epoch": 14.19,
9258
+ "learning_rate": 1.4320018416115206e-05,
9259
+ "loss": 0.3132,
9260
+ "step": 463000
9261
+ },
9262
+ {
9263
+ "epoch": 14.19,
9264
+ "eval_loss": 0.7742455005645752,
9265
+ "eval_runtime": 0.5004,
9266
+ "eval_samples_per_second": 1998.244,
9267
+ "eval_steps_per_second": 31.972,
9268
+ "step": 463000
9269
+ },
9270
+ {
9271
+ "epoch": 14.2,
9272
+ "learning_rate": 1.4204613847173003e-05,
9273
+ "loss": 0.3134,
9274
+ "step": 463500
9275
+ },
9276
+ {
9277
+ "epoch": 14.22,
9278
+ "learning_rate": 1.4090748994016354e-05,
9279
+ "loss": 0.3135,
9280
+ "step": 464000
9281
+ },
9282
+ {
9283
+ "epoch": 14.22,
9284
+ "eval_loss": 0.7742483019828796,
9285
+ "eval_runtime": 0.5003,
9286
+ "eval_samples_per_second": 1998.899,
9287
+ "eval_steps_per_second": 31.982,
9288
+ "step": 464000
9289
+ },
9290
+ {
9291
+ "epoch": 14.23,
9292
+ "learning_rate": 1.3978425101853049e-05,
9293
+ "loss": 0.3132,
9294
+ "step": 464500
9295
+ },
9296
+ {
9297
+ "epoch": 14.25,
9298
+ "learning_rate": 1.3867643399039165e-05,
9299
+ "loss": 0.3134,
9300
+ "step": 465000
9301
+ },
9302
+ {
9303
+ "epoch": 14.25,
9304
+ "eval_loss": 0.7741165161132812,
9305
+ "eval_runtime": 0.5054,
9306
+ "eval_samples_per_second": 1978.738,
9307
+ "eval_steps_per_second": 31.66,
9308
+ "step": 465000
9309
+ },
9310
+ {
9311
+ "epoch": 14.26,
9312
+ "learning_rate": 1.3758405097065648e-05,
9313
+ "loss": 0.3128,
9314
+ "step": 465500
9315
+ },
9316
+ {
9317
+ "epoch": 14.28,
9318
+ "learning_rate": 1.3650711390545131e-05,
9319
+ "loss": 0.313,
9320
+ "step": 466000
9321
+ },
9322
+ {
9323
+ "epoch": 14.28,
9324
+ "eval_loss": 0.7737331986427307,
9325
+ "eval_runtime": 0.505,
9326
+ "eval_samples_per_second": 1980.27,
9327
+ "eval_steps_per_second": 31.684,
9328
+ "step": 466000
9329
+ },
9330
+ {
9331
+ "epoch": 14.29,
9332
+ "learning_rate": 1.3544563457198657e-05,
9333
+ "loss": 0.3132,
9334
+ "step": 466500
9335
+ },
9336
+ {
9337
+ "epoch": 14.31,
9338
+ "learning_rate": 1.343996245784307e-05,
9339
+ "loss": 0.3131,
9340
+ "step": 467000
9341
+ },
9342
+ {
9343
+ "epoch": 14.31,
9344
+ "eval_loss": 0.7716790437698364,
9345
+ "eval_runtime": 0.504,
9346
+ "eval_samples_per_second": 1983.936,
9347
+ "eval_steps_per_second": 31.743,
9348
+ "step": 467000
9349
+ },
9350
+ {
9351
+ "epoch": 14.32,
9352
+ "learning_rate": 1.3336909536378107e-05,
9353
+ "loss": 0.313,
9354
+ "step": 467500
9355
+ },
9356
+ {
9357
+ "epoch": 14.34,
9358
+ "learning_rate": 1.3235405819774022e-05,
9359
+ "loss": 0.3131,
9360
+ "step": 468000
9361
+ },
9362
+ {
9363
+ "epoch": 14.34,
9364
+ "eval_loss": 0.7737191319465637,
9365
+ "eval_runtime": 0.5136,
9366
+ "eval_samples_per_second": 1947.194,
9367
+ "eval_steps_per_second": 31.155,
9368
+ "step": 468000
9369
+ },
9370
+ {
9371
+ "epoch": 14.35,
9372
+ "learning_rate": 1.3135452418059208e-05,
9373
+ "loss": 0.3132,
9374
+ "step": 468500
9375
+ },
9376
+ {
9377
+ "epoch": 14.37,
9378
+ "learning_rate": 1.3037050424308027e-05,
9379
+ "loss": 0.3126,
9380
+ "step": 469000
9381
+ },
9382
+ {
9383
+ "epoch": 14.37,
9384
+ "eval_loss": 0.7744223475456238,
9385
+ "eval_runtime": 0.5036,
9386
+ "eval_samples_per_second": 1985.727,
9387
+ "eval_steps_per_second": 31.772,
9388
+ "step": 469000
9389
+ },
9390
+ {
9391
+ "epoch": 14.38,
9392
+ "learning_rate": 1.2940200914628945e-05,
9393
+ "loss": 0.3132,
9394
+ "step": 469500
9395
+ },
9396
+ {
9397
+ "epoch": 14.4,
9398
+ "learning_rate": 1.2844904948152644e-05,
9399
+ "loss": 0.3132,
9400
+ "step": 470000
9401
+ },
9402
+ {
9403
+ "epoch": 14.4,
9404
+ "eval_loss": 0.7742005586624146,
9405
+ "eval_runtime": 0.5167,
9406
+ "eval_samples_per_second": 1935.302,
9407
+ "eval_steps_per_second": 30.965,
9408
+ "step": 470000
9409
  }
9410
  ],
9411
  "max_steps": 500000,
9412
  "num_train_epochs": 16,
9413
+ "total_flos": 1.5015783691073838e+22,
9414
  "trial_name": null,
9415
  "trial_params": null
9416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51cc55a03d0db3d8e5de6630971dddba20e9587291496d77623230dc60cf541c
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:399d2fd5053b20d89296de77093797a9b20b303b26b5c54e0ac988e918c3a2b8
3
  size 102501541