plip commited on
Commit
17d96ba
1 Parent(s): fa3956e

Training in progress, step 270000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66c0b735c70ca1ca003eab9d686d7e3ea552331bf016385c8fad2dfe60c740ac
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:185444010f2414af8f6b292f79fa769076772990bf1219a1dafd09b6faae29a4
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8e8c193ef48de967d7501e0b15a06f4b379c56e8efdf426bfb091824f5c716b
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94365aa60c35fc7ec52e5e8ba19311622aaab34d18bf7ce3dd77401881509519
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89edb9f9ff26027cc98aa82c2133f2e1b82d5b92ed39de235eb4cb2271e68c3d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6f6ae6faade50eb043968b5667df29128b1e4a2530013bd32474d5a36afc850
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54ee6f005be263ffc1028eeae009e64fa85afa0eb3360fe9f44a8e6025237de1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cc3f7b88227092a0043ff66b55085eb9bd377bb70cc891cc293b48f870db21f
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3500ada53b759cb5c8307ed7d4c05bb6055c04f57f48c3b210c40f88bacd59f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3a5a051f0b0618eb7cb1692de034b0ac1fd365c0c181b09a598798b6235801
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9805eeb18ee0853d0258572c1ad542ff4bc6ff2393ff8df1da096af07bacd6d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9c6b8fbeaadc53fb4ee209c1d104938ceaf8c8a8cbc2fa87ebcabcc6284da17
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b00333ce97a5a96a0369400120eb38a7bfd7ee5df94e91c7d6993e0e7f5524ba
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d753925d97c5520e660dcdd16394471ced5c4bc24193ed0a377ef70d8717a46
3
  size 14439
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4802de77ca51e4efac2063b9b4a2ac08f4acde74a027da9b43cf90af44cf0108
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdee02ae130781f905df4745f98c06bad459194317ec411b25af7a96f282fee2
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f2a04b63259c1a14b093267b1fd26d6b21fab4af2fc765329473d9fca239907
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:823a83d9a98cde647a58d74cc1bea63c670933d602a5a07e7fc2bfa68d3b9e32
3
+ size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f74dc2cbdc738c6fbf513addd5269b878530a18e75ada83dcaec9120da52354f
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4ef1c7732ec4132391d0d6a205bc292fbd5fe79d85d00447b48ac3c30b01e18
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b7fe86b6bf62db9f7989d6e264b9b70447a29a8d4bbea419af77ab1989ca356
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb61348726887e329b19406ea4e3e39ac391edeec6dfd8508b3cb524aa33e28
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.965930328747817,
5
- "global_step": 260000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5206,11 +5206,211 @@
5206
  "eval_samples_per_second": 1919.019,
5207
  "eval_steps_per_second": 30.704,
5208
  "step": 260000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5209
  }
5210
  ],
5211
  "max_steps": 500000,
5212
  "num_train_epochs": 16,
5213
- "total_flos": 8.306609692473169e+21,
5214
  "trial_name": null,
5215
  "trial_params": null
5216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.272312264468887,
5
+ "global_step": 270000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5206
  "eval_samples_per_second": 1919.019,
5207
  "eval_steps_per_second": 30.704,
5208
  "step": 260000
5209
+ },
5210
+ {
5211
+ "epoch": 7.98,
5212
+ "learning_rate": 0.00015691796905504187,
5213
+ "loss": 0.3366,
5214
+ "step": 260500
5215
+ },
5216
+ {
5217
+ "epoch": 8.0,
5218
+ "learning_rate": 0.00015643849514435944,
5219
+ "loss": 0.3364,
5220
+ "step": 261000
5221
+ },
5222
+ {
5223
+ "epoch": 8.0,
5224
+ "eval_loss": 0.776075005531311,
5225
+ "eval_runtime": 0.5349,
5226
+ "eval_samples_per_second": 1869.618,
5227
+ "eval_steps_per_second": 29.914,
5228
+ "step": 261000
5229
+ },
5230
+ {
5231
+ "epoch": 8.01,
5232
+ "learning_rate": 0.00015595900550252463,
5233
+ "loss": 0.3362,
5234
+ "step": 261500
5235
+ },
5236
+ {
5237
+ "epoch": 8.03,
5238
+ "learning_rate": 0.00015547950537315926,
5239
+ "loss": 0.3363,
5240
+ "step": 262000
5241
+ },
5242
+ {
5243
+ "epoch": 8.03,
5244
+ "eval_loss": 0.7857484221458435,
5245
+ "eval_runtime": 0.538,
5246
+ "eval_samples_per_second": 1858.594,
5247
+ "eval_steps_per_second": 29.738,
5248
+ "step": 262000
5249
+ },
5250
+ {
5251
+ "epoch": 8.04,
5252
+ "learning_rate": 0.00015499999999999997,
5253
+ "loss": 0.336,
5254
+ "step": 262500
5255
+ },
5256
+ {
5257
+ "epoch": 8.06,
5258
+ "learning_rate": 0.00015452049462684068,
5259
+ "loss": 0.3359,
5260
+ "step": 263000
5261
+ },
5262
+ {
5263
+ "epoch": 8.06,
5264
+ "eval_loss": 0.7803733348846436,
5265
+ "eval_runtime": 0.5241,
5266
+ "eval_samples_per_second": 1908.102,
5267
+ "eval_steps_per_second": 30.53,
5268
+ "step": 263000
5269
+ },
5270
+ {
5271
+ "epoch": 8.07,
5272
+ "learning_rate": 0.00015404099449747535,
5273
+ "loss": 0.3356,
5274
+ "step": 263500
5275
+ },
5276
+ {
5277
+ "epoch": 8.09,
5278
+ "learning_rate": 0.0001535615048556405,
5279
+ "loss": 0.3357,
5280
+ "step": 264000
5281
+ },
5282
+ {
5283
+ "epoch": 8.09,
5284
+ "eval_loss": 0.7824040651321411,
5285
+ "eval_runtime": 0.5311,
5286
+ "eval_samples_per_second": 1882.719,
5287
+ "eval_steps_per_second": 30.124,
5288
+ "step": 264000
5289
+ },
5290
+ {
5291
+ "epoch": 8.1,
5292
+ "learning_rate": 0.0001530820309449581,
5293
+ "loss": 0.3355,
5294
+ "step": 264500
5295
+ },
5296
+ {
5297
+ "epoch": 8.12,
5298
+ "learning_rate": 0.00015260257800887798,
5299
+ "loss": 0.3354,
5300
+ "step": 265000
5301
+ },
5302
+ {
5303
+ "epoch": 8.12,
5304
+ "eval_loss": 0.776350200176239,
5305
+ "eval_runtime": 0.5238,
5306
+ "eval_samples_per_second": 1908.966,
5307
+ "eval_steps_per_second": 30.543,
5308
+ "step": 265000
5309
+ },
5310
+ {
5311
+ "epoch": 8.13,
5312
+ "learning_rate": 0.0001521231512906207,
5313
+ "loss": 0.3359,
5314
+ "step": 265500
5315
+ },
5316
+ {
5317
+ "epoch": 8.15,
5318
+ "learning_rate": 0.00015164375603311998,
5319
+ "loss": 0.3355,
5320
+ "step": 266000
5321
+ },
5322
+ {
5323
+ "epoch": 8.15,
5324
+ "eval_loss": 0.7818763256072998,
5325
+ "eval_runtime": 0.5354,
5326
+ "eval_samples_per_second": 1867.774,
5327
+ "eval_steps_per_second": 29.884,
5328
+ "step": 266000
5329
+ },
5330
+ {
5331
+ "epoch": 8.17,
5332
+ "learning_rate": 0.00015116439747896553,
5333
+ "loss": 0.3364,
5334
+ "step": 266500
5335
+ },
5336
+ {
5337
+ "epoch": 8.18,
5338
+ "learning_rate": 0.00015068508087034578,
5339
+ "loss": 0.3352,
5340
+ "step": 267000
5341
+ },
5342
+ {
5343
+ "epoch": 8.18,
5344
+ "eval_loss": 0.7747774720191956,
5345
+ "eval_runtime": 0.5186,
5346
+ "eval_samples_per_second": 1928.442,
5347
+ "eval_steps_per_second": 30.855,
5348
+ "step": 267000
5349
+ },
5350
+ {
5351
+ "epoch": 8.2,
5352
+ "learning_rate": 0.00015020581144899027,
5353
+ "loss": 0.335,
5354
+ "step": 267500
5355
+ },
5356
+ {
5357
+ "epoch": 8.21,
5358
+ "learning_rate": 0.0001497265944561127,
5359
+ "loss": 0.3347,
5360
+ "step": 268000
5361
+ },
5362
+ {
5363
+ "epoch": 8.21,
5364
+ "eval_loss": 0.7758739590644836,
5365
+ "eval_runtime": 0.5149,
5366
+ "eval_samples_per_second": 1942.219,
5367
+ "eval_steps_per_second": 31.076,
5368
+ "step": 268000
5369
+ },
5370
+ {
5371
+ "epoch": 8.23,
5372
+ "learning_rate": 0.00014924743513235327,
5373
+ "loss": 0.3347,
5374
+ "step": 268500
5375
+ },
5376
+ {
5377
+ "epoch": 8.24,
5378
+ "learning_rate": 0.0001487683387177216,
5379
+ "loss": 0.3347,
5380
+ "step": 269000
5381
+ },
5382
+ {
5383
+ "epoch": 8.24,
5384
+ "eval_loss": 0.777352511882782,
5385
+ "eval_runtime": 0.511,
5386
+ "eval_samples_per_second": 1956.914,
5387
+ "eval_steps_per_second": 31.311,
5388
+ "step": 269000
5389
+ },
5390
+ {
5391
+ "epoch": 8.26,
5392
+ "learning_rate": 0.00014828931045153928,
5393
+ "loss": 0.6491,
5394
+ "step": 269500
5395
+ },
5396
+ {
5397
+ "epoch": 8.27,
5398
+ "learning_rate": 0.00014781035557238272,
5399
+ "loss": 0.737,
5400
+ "step": 270000
5401
+ },
5402
+ {
5403
+ "epoch": 8.27,
5404
+ "eval_loss": 0.9018945097923279,
5405
+ "eval_runtime": 0.5226,
5406
+ "eval_samples_per_second": 1913.356,
5407
+ "eval_steps_per_second": 30.614,
5408
+ "step": 270000
5409
  }
5410
  ],
5411
  "max_steps": 500000,
5412
  "num_train_epochs": 16,
5413
+ "total_flos": 8.626088843295693e+21,
5414
  "trial_name": null,
5415
  "trial_params": null
5416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8e8c193ef48de967d7501e0b15a06f4b379c56e8efdf426bfb091824f5c716b
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94365aa60c35fc7ec52e5e8ba19311622aaab34d18bf7ce3dd77401881509519
3
  size 102501541