plip commited on
Commit
a58efea
1 Parent(s): 7ad594c

Training in progress, step 470000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00828d7c5b2ee9d934868fffc62db5886616a7f03a8761ef8254b89f6589bb74
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f8b72df9e81d9020de8c0d757b3e594d6732915b9d9225bd73528701124126
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96cedfbffe43fc18644cc6de989fcca94f4a26075ab174ed0f9cb823c12e426d
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:683da2699cf4f6f0cc7ad0bd863d30bc6b3ce3591f2b81f581d00b7a55c36f19
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c841acb13540ac94fe640beda68267e633bf6f66d1bd05ecc4a8b2a3acbdc1
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c841acb13540ac94fe640beda68267e633bf6f66d1bd05ecc4a8b2a3acbdc1
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c841acb13540ac94fe640beda68267e633bf6f66d1bd05ecc4a8b2a3acbdc1
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c841acb13540ac94fe640beda68267e633bf6f66d1bd05ecc4a8b2a3acbdc1
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c841acb13540ac94fe640beda68267e633bf6f66d1bd05ecc4a8b2a3acbdc1
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c841acb13540ac94fe640beda68267e633bf6f66d1bd05ecc4a8b2a3acbdc1
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c841acb13540ac94fe640beda68267e633bf6f66d1bd05ecc4a8b2a3acbdc1
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c841acb13540ac94fe640beda68267e633bf6f66d1bd05ecc4a8b2a3acbdc1
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60fd80961b777bf4901f5c7189278f8f31f61a50c51a19e170f6a1919a5ce33
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d15e11a6de6abd55121a73bc214cc950fb971f927ae2b1d5067145da50de5d0
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.723329425556859,
5
- "global_step": 460000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9206,11 +9206,211 @@
9206
  "eval_samples_per_second": 768.777,
9207
  "eval_steps_per_second": 12.3,
9208
  "step": 460000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9209
  }
9210
  ],
9211
  "max_steps": 500000,
9212
  "num_train_epochs": 13,
9213
- "total_flos": 1.4696260115583505e+22,
9214
  "trial_name": null,
9215
  "trial_params": null
9216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.978184413068965,
5
+ "global_step": 470000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9206
  "eval_samples_per_second": 768.777,
9207
  "eval_steps_per_second": 12.3,
9208
  "step": 460000
9209
+ },
9210
+ {
9211
+ "epoch": 11.74,
9212
+ "learning_rate": 1.4920092237448903e-05,
9213
+ "loss": 0.2677,
9214
+ "step": 460500
9215
+ },
9216
+ {
9217
+ "epoch": 11.75,
9218
+ "learning_rate": 1.4797008356541874e-05,
9219
+ "loss": 0.2677,
9220
+ "step": 461000
9221
+ },
9222
+ {
9223
+ "epoch": 11.75,
9224
+ "eval_loss": 0.8057255744934082,
9225
+ "eval_runtime": 1.2455,
9226
+ "eval_samples_per_second": 802.869,
9227
+ "eval_steps_per_second": 12.846,
9228
+ "step": 461000
9229
+ },
9230
+ {
9231
+ "epoch": 11.76,
9232
+ "learning_rate": 1.4675457713088947e-05,
9233
+ "loss": 0.2676,
9234
+ "step": 461500
9235
+ },
9236
+ {
9237
+ "epoch": 11.77,
9238
+ "learning_rate": 1.4555441636348494e-05,
9239
+ "loss": 0.2676,
9240
+ "step": 462000
9241
+ },
9242
+ {
9243
+ "epoch": 11.77,
9244
+ "eval_loss": 0.8142445087432861,
9245
+ "eval_runtime": 1.2424,
9246
+ "eval_samples_per_second": 804.89,
9247
+ "eval_steps_per_second": 12.878,
9248
+ "step": 462000
9249
+ },
9250
+ {
9251
+ "epoch": 11.79,
9252
+ "learning_rate": 1.4436961438797095e-05,
9253
+ "loss": 0.2679,
9254
+ "step": 462500
9255
+ },
9256
+ {
9257
+ "epoch": 11.8,
9258
+ "learning_rate": 1.4320018416115206e-05,
9259
+ "loss": 0.2679,
9260
+ "step": 463000
9261
+ },
9262
+ {
9263
+ "epoch": 11.8,
9264
+ "eval_loss": 0.8076353073120117,
9265
+ "eval_runtime": 1.3419,
9266
+ "eval_samples_per_second": 745.235,
9267
+ "eval_steps_per_second": 11.924,
9268
+ "step": 463000
9269
+ },
9270
+ {
9271
+ "epoch": 11.81,
9272
+ "learning_rate": 1.4204613847173003e-05,
9273
+ "loss": 0.2674,
9274
+ "step": 463500
9275
+ },
9276
+ {
9277
+ "epoch": 11.83,
9278
+ "learning_rate": 1.4090748994016354e-05,
9279
+ "loss": 0.2676,
9280
+ "step": 464000
9281
+ },
9282
+ {
9283
+ "epoch": 11.83,
9284
+ "eval_loss": 0.8087449669837952,
9285
+ "eval_runtime": 1.2685,
9286
+ "eval_samples_per_second": 788.361,
9287
+ "eval_steps_per_second": 12.614,
9288
+ "step": 464000
9289
+ },
9290
+ {
9291
+ "epoch": 11.84,
9292
+ "learning_rate": 1.3978425101853049e-05,
9293
+ "loss": 0.2681,
9294
+ "step": 464500
9295
+ },
9296
+ {
9297
+ "epoch": 11.85,
9298
+ "learning_rate": 1.3867643399039165e-05,
9299
+ "loss": 0.2677,
9300
+ "step": 465000
9301
+ },
9302
+ {
9303
+ "epoch": 11.85,
9304
+ "eval_loss": 0.8065741062164307,
9305
+ "eval_runtime": 1.2179,
9306
+ "eval_samples_per_second": 821.068,
9307
+ "eval_steps_per_second": 13.137,
9308
+ "step": 465000
9309
+ },
9310
+ {
9311
+ "epoch": 11.86,
9312
+ "learning_rate": 1.3758405097065648e-05,
9313
+ "loss": 0.2676,
9314
+ "step": 465500
9315
+ },
9316
+ {
9317
+ "epoch": 11.88,
9318
+ "learning_rate": 1.3650711390545131e-05,
9319
+ "loss": 0.2673,
9320
+ "step": 466000
9321
+ },
9322
+ {
9323
+ "epoch": 11.88,
9324
+ "eval_loss": 0.8059167861938477,
9325
+ "eval_runtime": 1.3496,
9326
+ "eval_samples_per_second": 740.983,
9327
+ "eval_steps_per_second": 11.856,
9328
+ "step": 466000
9329
+ },
9330
+ {
9331
+ "epoch": 11.89,
9332
+ "learning_rate": 1.3544563457198657e-05,
9333
+ "loss": 0.2673,
9334
+ "step": 466500
9335
+ },
9336
+ {
9337
+ "epoch": 11.9,
9338
+ "learning_rate": 1.343996245784307e-05,
9339
+ "loss": 0.2676,
9340
+ "step": 467000
9341
+ },
9342
+ {
9343
+ "epoch": 11.9,
9344
+ "eval_loss": 0.8066999316215515,
9345
+ "eval_runtime": 1.2927,
9346
+ "eval_samples_per_second": 773.582,
9347
+ "eval_steps_per_second": 12.377,
9348
+ "step": 467000
9349
+ },
9350
+ {
9351
+ "epoch": 11.91,
9352
+ "learning_rate": 1.3336909536378107e-05,
9353
+ "loss": 0.2678,
9354
+ "step": 467500
9355
+ },
9356
+ {
9357
+ "epoch": 11.93,
9358
+ "learning_rate": 1.3235405819774022e-05,
9359
+ "loss": 0.2675,
9360
+ "step": 468000
9361
+ },
9362
+ {
9363
+ "epoch": 11.93,
9364
+ "eval_loss": 0.8042522072792053,
9365
+ "eval_runtime": 1.2835,
9366
+ "eval_samples_per_second": 779.135,
9367
+ "eval_steps_per_second": 12.466,
9368
+ "step": 468000
9369
+ },
9370
+ {
9371
+ "epoch": 11.94,
9372
+ "learning_rate": 1.3135452418059208e-05,
9373
+ "loss": 0.2675,
9374
+ "step": 468500
9375
+ },
9376
+ {
9377
+ "epoch": 11.95,
9378
+ "learning_rate": 1.3037050424308027e-05,
9379
+ "loss": 0.2675,
9380
+ "step": 469000
9381
+ },
9382
+ {
9383
+ "epoch": 11.95,
9384
+ "eval_loss": 0.8102657198905945,
9385
+ "eval_runtime": 1.2622,
9386
+ "eval_samples_per_second": 792.261,
9387
+ "eval_steps_per_second": 12.676,
9388
+ "step": 469000
9389
+ },
9390
+ {
9391
+ "epoch": 11.97,
9392
+ "learning_rate": 1.2940200914628945e-05,
9393
+ "loss": 0.2676,
9394
+ "step": 469500
9395
+ },
9396
+ {
9397
+ "epoch": 11.98,
9398
+ "learning_rate": 1.2844904948152644e-05,
9399
+ "loss": 0.2673,
9400
+ "step": 470000
9401
+ },
9402
+ {
9403
+ "epoch": 11.98,
9404
+ "eval_loss": 0.8092105984687805,
9405
+ "eval_runtime": 1.2931,
9406
+ "eval_samples_per_second": 773.322,
9407
+ "eval_steps_per_second": 12.373,
9408
+ "step": 470000
9409
  }
9410
  ],
9411
  "max_steps": 500000,
9412
  "num_train_epochs": 13,
9413
+ "total_flos": 1.5015747254358005e+22,
9414
  "trial_name": null,
9415
  "trial_params": null
9416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96cedfbffe43fc18644cc6de989fcca94f4a26075ab174ed0f9cb823c12e426d
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:683da2699cf4f6f0cc7ad0bd863d30bc6b3ce3591f2b81f581d00b7a55c36f19
3
  size 102501541