plip commited on
Commit
8b1df80
1 Parent(s): f7dec81

Training in progress, step 370000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e16c64f372a1e3d1a1bc418a8e836e1cdfb665e54de868c2b11e7665a8124bd
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9c8cce7fc16d49d0f7371660f21497251dd70fb6a0863ced538e8e9c69a0ca9
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc2fe2e5c738c8106278780fab9f7f87cb208c3a8d2ade89df93aa0343c89bdb
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb573bc42125377ae261521a84adbf685b2347ffb43eb62b536c886544fe2d46
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:005b2e75d9554e018ed97e7633dc38306c4b6264c09e33e752be796ffbb52bec
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ee12284053cf83e8530cb65257ff01350f611fa2a8f8b92c00004c4644ba728
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f90d85a554f4240712d12f4bc1ca42d7122c1ea252e7027b0c918e99e4e1de85
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c588aab0631e1cbe9b9bc91bd8fdb267f40c2efed21da72dbc11089c341db1a
3
+ size 14567
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:795aa2456aa093dc231a0ad24cc817827fd817979cb85490c41b919c7e2ac93d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d81b56af08f24fc8d507e3068f145577049df6c261f921cf212def6001138c25
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3a87c23715b7e025f85b63ac84fed0695e2db4c59ea40d605d467271ee7eaff
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3d36fad09b43ee50c94cb8bbf636c2e82070e751a5d530c5d483d7fea96111
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2870379f6b63b47a9122657f592b0d575a5671da1a1fb7b494f10475da4ee08
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:738ab6b372718dc5257fa8c5c82fc8480566553405b2c1fe74ac6bb9bd72e750
3
+ size 14567
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccc684958440988665bb83e21073c7e935f44d96d06218e7d486fc15f417721c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a56c60b33853d8e6230ccd6985b78412b5623c0d98e6fcecc17ad9dcdf56630f
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7790e1f5d1d539cc24a5c7f36951077137e9c4173691b395bbb8021eb1098267
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c77a0d4fbb407266eb2a3608ccf0fffdba85f7cd116784ad470dea58425561
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:296d7ff2fcb06c6f8604bd8a18f2173bb33ea1cf17b1b7e10c614ffd53051e05
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0880cbd756484509ced83a375e5248ed2190c14baa2ebe717f828a8c21b981bd
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6e54422706a010aa16b679660182e5a0c0f546c43656852cb88a82c1d45dccf
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580594f4768ecd1bd92e87dca92e874365a397305161c1f8781b79f1f0b613ba
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.029749685958516,
5
- "global_step": 360000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7206,11 +7206,211 @@
7206
  "eval_samples_per_second": 1921.599,
7207
  "eval_steps_per_second": 30.746,
7208
  "step": 360000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7209
  }
7210
  ],
7211
  "max_steps": 500000,
7212
  "num_train_epochs": 16,
7213
- "total_flos": 1.1501449128410266e+22,
7214
  "trial_name": null,
7215
  "trial_params": null
7216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.336131621679586,
5
+ "global_step": 370000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7206
  "eval_samples_per_second": 1921.599,
7207
  "eval_steps_per_second": 30.746,
7208
  "step": 360000
7209
+ },
7210
+ {
7211
+ "epoch": 11.05,
7212
+ "learning_rate": 6.746049527150238e-05,
7213
+ "loss": 0.3226,
7214
+ "step": 360500
7215
+ },
7216
+ {
7217
+ "epoch": 11.06,
7218
+ "learning_rate": 6.707871421704209e-05,
7219
+ "loss": 0.3225,
7220
+ "step": 361000
7221
+ },
7222
+ {
7223
+ "epoch": 11.06,
7224
+ "eval_loss": 0.7777162790298462,
7225
+ "eval_runtime": 0.5171,
7226
+ "eval_samples_per_second": 1933.691,
7227
+ "eval_steps_per_second": 30.939,
7228
+ "step": 361000
7229
+ },
7230
+ {
7231
+ "epoch": 11.08,
7232
+ "learning_rate": 6.669789465567683e-05,
7233
+ "loss": 0.3226,
7234
+ "step": 361500
7235
+ },
7236
+ {
7237
+ "epoch": 11.09,
7238
+ "learning_rate": 6.631804075198838e-05,
7239
+ "loss": 0.3223,
7240
+ "step": 362000
7241
+ },
7242
+ {
7243
+ "epoch": 11.09,
7244
+ "eval_loss": 0.7790626287460327,
7245
+ "eval_runtime": 0.512,
7246
+ "eval_samples_per_second": 1953.298,
7247
+ "eval_steps_per_second": 31.253,
7248
+ "step": 362000
7249
+ },
7250
+ {
7251
+ "epoch": 11.11,
7252
+ "learning_rate": 6.593915665999816e-05,
7253
+ "loss": 0.3221,
7254
+ "step": 362500
7255
+ },
7256
+ {
7257
+ "epoch": 11.12,
7258
+ "learning_rate": 6.55612465231219e-05,
7259
+ "loss": 0.3222,
7260
+ "step": 363000
7261
+ },
7262
+ {
7263
+ "epoch": 11.12,
7264
+ "eval_loss": 0.7765858173370361,
7265
+ "eval_runtime": 0.5305,
7266
+ "eval_samples_per_second": 1884.91,
7267
+ "eval_steps_per_second": 30.159,
7268
+ "step": 363000
7269
+ },
7270
+ {
7271
+ "epoch": 11.14,
7272
+ "learning_rate": 6.518431447412434e-05,
7273
+ "loss": 0.3219,
7274
+ "step": 363500
7275
+ },
7276
+ {
7277
+ "epoch": 11.15,
7278
+ "learning_rate": 6.480836463507392e-05,
7279
+ "loss": 0.322,
7280
+ "step": 364000
7281
+ },
7282
+ {
7283
+ "epoch": 11.15,
7284
+ "eval_loss": 0.7808003425598145,
7285
+ "eval_runtime": 0.5024,
7286
+ "eval_samples_per_second": 1990.262,
7287
+ "eval_steps_per_second": 31.844,
7288
+ "step": 364000
7289
+ },
7290
+ {
7291
+ "epoch": 11.17,
7292
+ "learning_rate": 6.443340111729786e-05,
7293
+ "loss": 0.3219,
7294
+ "step": 364500
7295
+ },
7296
+ {
7297
+ "epoch": 11.18,
7298
+ "learning_rate": 6.405942802133713e-05,
7299
+ "loss": 0.322,
7300
+ "step": 365000
7301
+ },
7302
+ {
7303
+ "epoch": 11.18,
7304
+ "eval_loss": 0.7744137048721313,
7305
+ "eval_runtime": 0.5239,
7306
+ "eval_samples_per_second": 1908.642,
7307
+ "eval_steps_per_second": 30.538,
7308
+ "step": 365000
7309
+ },
7310
+ {
7311
+ "epoch": 11.2,
7312
+ "learning_rate": 6.36864494369016e-05,
7313
+ "loss": 0.3218,
7314
+ "step": 365500
7315
+ },
7316
+ {
7317
+ "epoch": 11.21,
7318
+ "learning_rate": 6.331446944282534e-05,
7319
+ "loss": 0.3217,
7320
+ "step": 366000
7321
+ },
7322
+ {
7323
+ "epoch": 11.21,
7324
+ "eval_loss": 0.7770714163780212,
7325
+ "eval_runtime": 0.5069,
7326
+ "eval_samples_per_second": 1972.691,
7327
+ "eval_steps_per_second": 31.563,
7328
+ "step": 366000
7329
+ },
7330
+ {
7331
+ "epoch": 11.23,
7332
+ "learning_rate": 6.294349210702188e-05,
7333
+ "loss": 0.3219,
7334
+ "step": 366500
7335
+ },
7336
+ {
7337
+ "epoch": 11.24,
7338
+ "learning_rate": 6.257352148643998e-05,
7339
+ "loss": 0.3217,
7340
+ "step": 367000
7341
+ },
7342
+ {
7343
+ "epoch": 11.24,
7344
+ "eval_loss": 0.7741234302520752,
7345
+ "eval_runtime": 0.5107,
7346
+ "eval_samples_per_second": 1958.008,
7347
+ "eval_steps_per_second": 31.328,
7348
+ "step": 367000
7349
+ },
7350
+ {
7351
+ "epoch": 11.26,
7352
+ "learning_rate": 6.220456162701908e-05,
7353
+ "loss": 0.3215,
7354
+ "step": 367500
7355
+ },
7356
+ {
7357
+ "epoch": 11.27,
7358
+ "learning_rate": 6.183661656364515e-05,
7359
+ "loss": 0.3216,
7360
+ "step": 368000
7361
+ },
7362
+ {
7363
+ "epoch": 11.27,
7364
+ "eval_loss": 0.7758086919784546,
7365
+ "eval_runtime": 0.5088,
7366
+ "eval_samples_per_second": 1965.315,
7367
+ "eval_steps_per_second": 31.445,
7368
+ "step": 368000
7369
+ },
7370
+ {
7371
+ "epoch": 11.29,
7372
+ "learning_rate": 6.146969032010631e-05,
7373
+ "loss": 0.3214,
7374
+ "step": 368500
7375
+ },
7376
+ {
7377
+ "epoch": 11.31,
7378
+ "learning_rate": 6.110378690904928e-05,
7379
+ "loss": 0.3216,
7380
+ "step": 369000
7381
+ },
7382
+ {
7383
+ "epoch": 11.31,
7384
+ "eval_loss": 0.7766358256340027,
7385
+ "eval_runtime": 0.5033,
7386
+ "eval_samples_per_second": 1986.831,
7387
+ "eval_steps_per_second": 31.789,
7388
+ "step": 369000
7389
+ },
7390
+ {
7391
+ "epoch": 11.32,
7392
+ "learning_rate": 6.073891033193507e-05,
7393
+ "loss": 0.3214,
7394
+ "step": 369500
7395
+ },
7396
+ {
7397
+ "epoch": 11.34,
7398
+ "learning_rate": 6.037506457899553e-05,
7399
+ "loss": 0.3213,
7400
+ "step": 370000
7401
+ },
7402
+ {
7403
+ "epoch": 11.34,
7404
+ "eval_loss": 0.7805209159851074,
7405
+ "eval_runtime": 0.5089,
7406
+ "eval_samples_per_second": 1964.929,
7407
+ "eval_steps_per_second": 31.439,
7408
+ "step": 370000
7409
  }
7410
  ],
7411
  "max_steps": 500000,
7412
  "num_train_epochs": 16,
7413
+ "total_flos": 1.1820936267184766e+22,
7414
  "trial_name": null,
7415
  "trial_params": null
7416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc2fe2e5c738c8106278780fab9f7f87cb208c3a8d2ade89df93aa0343c89bdb
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb573bc42125377ae261521a84adbf685b2347ffb43eb62b536c886544fe2d46
3
  size 102501541