plip commited on
Commit
02e363a
1 Parent(s): eceb427

Training in progress, step 320000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d30543a63c3f49005c19382ae197f2cf2879229339898c2554a8df1a8a2421f
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229a42ebe682c3ef3fa77824f414f8052ce22269902d2cf833bbceae01b4ee94
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07e69690c144d55eab2611717957556e397812111ed907e27900f2d419339d70
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2234ce12404bc4801fab809254f08127bd71fca09a8dab3e0c720a225f0006ed
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:548110cdd23dffd1b7f96561a08e3cbe6bf67f48bac9ee8d0ebf3314491785e9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b54a8c5749446bd4a65592cc408c92cd1c1a63789b632ec709bae613de880e8
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:992c07370b8db97dbb38b7b82bcfcf2b4063ad427f2a7c1da17dc6b7936247f4
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e12e956d2c1594d69772425e394d5c7340f5558535a744e143a62985c9f6b3a
3
+ size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:309d45549a36b33343b31b19dff62c40cbca438acddc29ad6300e29a9b1364dd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5034bb9708a34c35b3368c1e0fff63513e2cb5f1c0dd56fffa0328312b7e4831
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9ac8da3681828a13f22af7d6fffaed7afbe91d2bf410206cc27808e2ecc0ca
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e06a3dca10a2bccff3cb0c6a7b393b12b0f08503dc63d7b7533eeb15ed495c6
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9df044a6fee5eeed1506b8dbce5bf547aca68cd5a201fa67f209077ea7b2dfb1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a94f09290bc683f43d0869ce2fa5f9751184b5e70371828d250a3714d35fe40
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c04c575cebc96e7d40383dac3dbdc79efa292cede7a20d6c65225e6c903061e2
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4915e195da15bfd64d34239234d248cd0ab1ad7df671f2845974753597da8bc3
3
+ size 14439
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:472356308bd424b33e07d9161a75bf7fa03c160c8593923578d6c5cdc5036926
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:057d8e4139ad7708d7871dd8361365fbd9951b2ae3daf5aded867e56c2fe457c
3
+ size 14439
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74c5657fa915531128d8604d48f9f541ac95d7782dff879847610bd62a46c3d1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7115cc7cbcc32a343bb9b4e7b15f1fa12bd3bb61d63d5248eaa0a65935d6e80c
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b8c10dab6d3ee824fc8fe4628d3bf3ceea806ce0d2fbe513f32af4d508ab89e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5673377a057c7734bd1a0ee14d972f6f3bfc67bb8208ac49ae618347d18d616b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.497840007353167,
5
- "global_step": 310000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6206,11 +6206,211 @@
6206
  "eval_samples_per_second": 1914.938,
6207
  "eval_steps_per_second": 30.639,
6208
  "step": 310000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6209
  }
6210
  ],
6211
  "max_steps": 500000,
6212
  "num_train_epochs": 16,
6213
- "total_flos": 9.904029410441717e+21,
6214
  "trial_name": null,
6215
  "trial_params": null
6216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.804221943074236,
5
+ "global_step": 320000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6206
  "eval_samples_per_second": 1914.938,
6207
  "eval_steps_per_second": 30.639,
6208
  "step": 310000
6209
+ },
6210
+ {
6211
+ "epoch": 9.51,
6212
+ "learning_rate": 0.00010973674410951567,
6213
+ "loss": 0.3293,
6214
+ "step": 310500
6215
+ },
6216
+ {
6217
+ "epoch": 9.53,
6218
+ "learning_rate": 0.00010928144739511337,
6219
+ "loss": 0.329,
6220
+ "step": 311000
6221
+ },
6222
+ {
6223
+ "epoch": 9.53,
6224
+ "eval_loss": 0.776207685470581,
6225
+ "eval_runtime": 0.5118,
6226
+ "eval_samples_per_second": 1953.912,
6227
+ "eval_steps_per_second": 31.263,
6228
+ "step": 311000
6229
+ },
6230
+ {
6231
+ "epoch": 9.54,
6232
+ "learning_rate": 0.00010882665065147757,
6233
+ "loss": 0.3287,
6234
+ "step": 311500
6235
+ },
6236
+ {
6237
+ "epoch": 9.56,
6238
+ "learning_rate": 0.00010837235885219267,
6239
+ "loss": 0.3286,
6240
+ "step": 312000
6241
+ },
6242
+ {
6243
+ "epoch": 9.56,
6244
+ "eval_loss": 0.7779992818832397,
6245
+ "eval_runtime": 0.5097,
6246
+ "eval_samples_per_second": 1962.124,
6247
+ "eval_steps_per_second": 31.394,
6248
+ "step": 312000
6249
+ },
6250
+ {
6251
+ "epoch": 9.57,
6252
+ "learning_rate": 0.00010791857696532089,
6253
+ "loss": 0.3287,
6254
+ "step": 312500
6255
+ },
6256
+ {
6257
+ "epoch": 9.59,
6258
+ "learning_rate": 0.00010746530995334832,
6259
+ "loss": 0.3285,
6260
+ "step": 313000
6261
+ },
6262
+ {
6263
+ "epoch": 9.59,
6264
+ "eval_loss": 0.7776817679405212,
6265
+ "eval_runtime": 0.5012,
6266
+ "eval_samples_per_second": 1995.205,
6267
+ "eval_steps_per_second": 31.923,
6268
+ "step": 313000
6269
+ },
6270
+ {
6271
+ "epoch": 9.61,
6272
+ "learning_rate": 0.0001070125627731304,
6273
+ "loss": 0.3285,
6274
+ "step": 313500
6275
+ },
6276
+ {
6277
+ "epoch": 9.62,
6278
+ "learning_rate": 0.0001065603403758377,
6279
+ "loss": 0.3288,
6280
+ "step": 314000
6281
+ },
6282
+ {
6283
+ "epoch": 9.62,
6284
+ "eval_loss": 0.7795534133911133,
6285
+ "eval_runtime": 0.523,
6286
+ "eval_samples_per_second": 1912.141,
6287
+ "eval_steps_per_second": 30.594,
6288
+ "step": 314000
6289
+ },
6290
+ {
6291
+ "epoch": 9.64,
6292
+ "learning_rate": 0.00010610864770690196,
6293
+ "loss": 0.3285,
6294
+ "step": 314500
6295
+ },
6296
+ {
6297
+ "epoch": 9.65,
6298
+ "learning_rate": 0.00010565748970596172,
6299
+ "loss": 0.3281,
6300
+ "step": 315000
6301
+ },
6302
+ {
6303
+ "epoch": 9.65,
6304
+ "eval_loss": 0.7744332551956177,
6305
+ "eval_runtime": 0.5161,
6306
+ "eval_samples_per_second": 1937.422,
6307
+ "eval_steps_per_second": 30.999,
6308
+ "step": 315000
6309
+ },
6310
+ {
6311
+ "epoch": 9.67,
6312
+ "learning_rate": 0.00010520687130680884,
6313
+ "loss": 0.3279,
6314
+ "step": 315500
6315
+ },
6316
+ {
6317
+ "epoch": 9.68,
6318
+ "learning_rate": 0.00010475679743733364,
6319
+ "loss": 0.3284,
6320
+ "step": 316000
6321
+ },
6322
+ {
6323
+ "epoch": 9.68,
6324
+ "eval_loss": 0.7782894968986511,
6325
+ "eval_runtime": 0.5278,
6326
+ "eval_samples_per_second": 1894.746,
6327
+ "eval_steps_per_second": 30.316,
6328
+ "step": 316000
6329
+ },
6330
+ {
6331
+ "epoch": 9.7,
6332
+ "learning_rate": 0.00010430727301947202,
6333
+ "loss": 0.3282,
6334
+ "step": 316500
6335
+ },
6336
+ {
6337
+ "epoch": 9.71,
6338
+ "learning_rate": 0.00010385830296915104,
6339
+ "loss": 0.328,
6340
+ "step": 317000
6341
+ },
6342
+ {
6343
+ "epoch": 9.71,
6344
+ "eval_loss": 0.7791895866394043,
6345
+ "eval_runtime": 0.4908,
6346
+ "eval_samples_per_second": 2037.475,
6347
+ "eval_steps_per_second": 32.6,
6348
+ "step": 317000
6349
+ },
6350
+ {
6351
+ "epoch": 9.73,
6352
+ "learning_rate": 0.00010340989219623508,
6353
+ "loss": 0.328,
6354
+ "step": 317500
6355
+ },
6356
+ {
6357
+ "epoch": 9.74,
6358
+ "learning_rate": 0.0001029620456044727,
6359
+ "loss": 0.3278,
6360
+ "step": 318000
6361
+ },
6362
+ {
6363
+ "epoch": 9.74,
6364
+ "eval_loss": 0.7816545367240906,
6365
+ "eval_runtime": 0.5081,
6366
+ "eval_samples_per_second": 1968.272,
6367
+ "eval_steps_per_second": 31.492,
6368
+ "step": 318000
6369
+ },
6370
+ {
6371
+ "epoch": 9.76,
6372
+ "learning_rate": 0.00010251476809144226,
6373
+ "loss": 0.3279,
6374
+ "step": 318500
6375
+ },
6376
+ {
6377
+ "epoch": 9.77,
6378
+ "learning_rate": 0.00010206806454849917,
6379
+ "loss": 0.3276,
6380
+ "step": 319000
6381
+ },
6382
+ {
6383
+ "epoch": 9.77,
6384
+ "eval_loss": 0.7802248597145081,
6385
+ "eval_runtime": 0.4946,
6386
+ "eval_samples_per_second": 2021.73,
6387
+ "eval_steps_per_second": 32.348,
6388
+ "step": 319000
6389
+ },
6390
+ {
6391
+ "epoch": 9.79,
6392
+ "learning_rate": 0.00010162193986072167,
6393
+ "loss": 0.3272,
6394
+ "step": 319500
6395
+ },
6396
+ {
6397
+ "epoch": 9.8,
6398
+ "learning_rate": 0.00010117639890685795,
6399
+ "loss": 0.3273,
6400
+ "step": 320000
6401
+ },
6402
+ {
6403
+ "epoch": 9.8,
6404
+ "eval_loss": 0.7808557152748108,
6405
+ "eval_runtime": 0.499,
6406
+ "eval_samples_per_second": 2004.192,
6407
+ "eval_steps_per_second": 32.067,
6408
+ "step": 320000
6409
  }
6410
  ],
6411
  "max_steps": 500000,
6412
  "num_train_epochs": 16,
6413
+ "total_flos": 1.0223516549216217e+22,
6414
  "trial_name": null,
6415
  "trial_params": null
6416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07e69690c144d55eab2611717957556e397812111ed907e27900f2d419339d70
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2234ce12404bc4801fab809254f08127bd71fca09a8dab3e0c720a225f0006ed
3
  size 102501541