plip commited on
Commit
95db17d
1 Parent(s): 15b0c39

Training in progress, step 270000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a6cf8e52153476c0d24566862211ae73b151847958bccb312e482641f34fe2f
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0f45e9ea7aeadf9f10b1643bf00c0b8ef58ef51944d08e1b0ad72f902bd82cd
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20d31269cc8100e5fc218adcef1522ff7b0dd07e50b44819d9df2a65286c8129
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1752842aa50d3948c8a46f98f668fd33584b25521cc864747236962908637e8
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20a1051f008a83adbc4c881b4baac5cd60ff30165ed01dcd9fea7e631712e7d
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b7fe86b6bf62db9f7989d6e264b9b70447a29a8d4bbea419af77ab1989ca356
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb61348726887e329b19406ea4e3e39ac391edeec6dfd8508b3cb524aa33e28
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.626229675314746,
5
- "global_step": 260000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5206,11 +5206,211 @@
5206
  "eval_samples_per_second": 748.473,
5207
  "eval_steps_per_second": 11.976,
5208
  "step": 260000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5209
  }
5210
  ],
5211
  "max_steps": 500000,
5212
  "num_train_epochs": 13,
5213
- "total_flos": 8.30658473465873e+21,
5214
  "trial_name": null,
5215
  "trial_params": null
5216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.881084662826852,
5
+ "global_step": 270000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5206
  "eval_samples_per_second": 748.473,
5207
  "eval_steps_per_second": 11.976,
5208
  "step": 260000
5209
+ },
5210
+ {
5211
+ "epoch": 6.64,
5212
+ "learning_rate": 0.00015691796905504187,
5213
+ "loss": 0.2862,
5214
+ "step": 260500
5215
+ },
5216
+ {
5217
+ "epoch": 6.65,
5218
+ "learning_rate": 0.00015643849514435944,
5219
+ "loss": 0.2871,
5220
+ "step": 261000
5221
+ },
5222
+ {
5223
+ "epoch": 6.65,
5224
+ "eval_loss": 0.8193889260292053,
5225
+ "eval_runtime": 1.2087,
5226
+ "eval_samples_per_second": 827.32,
5227
+ "eval_steps_per_second": 13.237,
5228
+ "step": 261000
5229
+ },
5230
+ {
5231
+ "epoch": 6.66,
5232
+ "learning_rate": 0.00015595900550252463,
5233
+ "loss": 0.2867,
5234
+ "step": 261500
5235
+ },
5236
+ {
5237
+ "epoch": 6.68,
5238
+ "learning_rate": 0.00015547950537315926,
5239
+ "loss": 0.2863,
5240
+ "step": 262000
5241
+ },
5242
+ {
5243
+ "epoch": 6.68,
5244
+ "eval_loss": 0.8058978319168091,
5245
+ "eval_runtime": 1.2707,
5246
+ "eval_samples_per_second": 786.99,
5247
+ "eval_steps_per_second": 12.592,
5248
+ "step": 262000
5249
+ },
5250
+ {
5251
+ "epoch": 6.69,
5252
+ "learning_rate": 0.00015499999999999997,
5253
+ "loss": 0.2862,
5254
+ "step": 262500
5255
+ },
5256
+ {
5257
+ "epoch": 6.7,
5258
+ "learning_rate": 0.00015452049462684068,
5259
+ "loss": 0.2864,
5260
+ "step": 263000
5261
+ },
5262
+ {
5263
+ "epoch": 6.7,
5264
+ "eval_loss": 0.8194664120674133,
5265
+ "eval_runtime": 1.2245,
5266
+ "eval_samples_per_second": 816.647,
5267
+ "eval_steps_per_second": 13.066,
5268
+ "step": 263000
5269
+ },
5270
+ {
5271
+ "epoch": 6.72,
5272
+ "learning_rate": 0.00015404099449747535,
5273
+ "loss": 0.2861,
5274
+ "step": 263500
5275
+ },
5276
+ {
5277
+ "epoch": 6.73,
5278
+ "learning_rate": 0.0001535615048556405,
5279
+ "loss": 0.2863,
5280
+ "step": 264000
5281
+ },
5282
+ {
5283
+ "epoch": 6.73,
5284
+ "eval_loss": 0.8099100589752197,
5285
+ "eval_runtime": 1.2591,
5286
+ "eval_samples_per_second": 794.217,
5287
+ "eval_steps_per_second": 12.707,
5288
+ "step": 264000
5289
+ },
5290
+ {
5291
+ "epoch": 6.74,
5292
+ "learning_rate": 0.0001530820309449581,
5293
+ "loss": 0.2861,
5294
+ "step": 264500
5295
+ },
5296
+ {
5297
+ "epoch": 6.75,
5298
+ "learning_rate": 0.00015260257800887798,
5299
+ "loss": 0.2868,
5300
+ "step": 265000
5301
+ },
5302
+ {
5303
+ "epoch": 6.75,
5304
+ "eval_loss": 0.8127309679985046,
5305
+ "eval_runtime": 1.3337,
5306
+ "eval_samples_per_second": 749.803,
5307
+ "eval_steps_per_second": 11.997,
5308
+ "step": 265000
5309
+ },
5310
+ {
5311
+ "epoch": 6.77,
5312
+ "learning_rate": 0.0001521231512906207,
5313
+ "loss": 0.2868,
5314
+ "step": 265500
5315
+ },
5316
+ {
5317
+ "epoch": 6.78,
5318
+ "learning_rate": 0.00015164375603311998,
5319
+ "loss": 0.2863,
5320
+ "step": 266000
5321
+ },
5322
+ {
5323
+ "epoch": 6.78,
5324
+ "eval_loss": 0.806861162185669,
5325
+ "eval_runtime": 1.2726,
5326
+ "eval_samples_per_second": 785.768,
5327
+ "eval_steps_per_second": 12.572,
5328
+ "step": 266000
5329
+ },
5330
+ {
5331
+ "epoch": 6.79,
5332
+ "learning_rate": 0.00015116439747896553,
5333
+ "loss": 0.2856,
5334
+ "step": 266500
5335
+ },
5336
+ {
5337
+ "epoch": 6.8,
5338
+ "learning_rate": 0.00015068508087034578,
5339
+ "loss": 0.2854,
5340
+ "step": 267000
5341
+ },
5342
+ {
5343
+ "epoch": 6.8,
5344
+ "eval_loss": 0.8032740354537964,
5345
+ "eval_runtime": 1.28,
5346
+ "eval_samples_per_second": 781.242,
5347
+ "eval_steps_per_second": 12.5,
5348
+ "step": 267000
5349
+ },
5350
+ {
5351
+ "epoch": 6.82,
5352
+ "learning_rate": 0.00015020581144899027,
5353
+ "loss": 0.2857,
5354
+ "step": 267500
5355
+ },
5356
+ {
5357
+ "epoch": 6.83,
5358
+ "learning_rate": 0.0001497265944561127,
5359
+ "loss": 0.2855,
5360
+ "step": 268000
5361
+ },
5362
+ {
5363
+ "epoch": 6.83,
5364
+ "eval_loss": 0.8096611499786377,
5365
+ "eval_runtime": 1.2917,
5366
+ "eval_samples_per_second": 774.163,
5367
+ "eval_steps_per_second": 12.387,
5368
+ "step": 268000
5369
+ },
5370
+ {
5371
+ "epoch": 6.84,
5372
+ "learning_rate": 0.00014924743513235327,
5373
+ "loss": 0.2856,
5374
+ "step": 268500
5375
+ },
5376
+ {
5377
+ "epoch": 6.86,
5378
+ "learning_rate": 0.0001487683387177216,
5379
+ "loss": 0.2864,
5380
+ "step": 269000
5381
+ },
5382
+ {
5383
+ "epoch": 6.86,
5384
+ "eval_loss": 0.8095938563346863,
5385
+ "eval_runtime": 1.2896,
5386
+ "eval_samples_per_second": 775.42,
5387
+ "eval_steps_per_second": 12.407,
5388
+ "step": 269000
5389
+ },
5390
+ {
5391
+ "epoch": 6.87,
5392
+ "learning_rate": 0.00014828931045153928,
5393
+ "loss": 0.2857,
5394
+ "step": 269500
5395
+ },
5396
+ {
5397
+ "epoch": 6.88,
5398
+ "learning_rate": 0.00014781035557238272,
5399
+ "loss": 0.2865,
5400
+ "step": 270000
5401
+ },
5402
+ {
5403
+ "epoch": 6.88,
5404
+ "eval_loss": 0.8193797469139099,
5405
+ "eval_runtime": 1.2225,
5406
+ "eval_samples_per_second": 817.974,
5407
+ "eval_steps_per_second": 13.088,
5408
+ "step": 270000
5409
  }
5410
  ],
5411
  "max_steps": 500000,
5412
  "num_train_epochs": 13,
5413
+ "total_flos": 8.62607187343323e+21,
5414
  "trial_name": null,
5415
  "trial_params": null
5416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20d31269cc8100e5fc218adcef1522ff7b0dd07e50b44819d9df2a65286c8129
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1752842aa50d3948c8a46f98f668fd33584b25521cc864747236962908637e8
3
  size 102501541