plip commited on
Commit
367ccde
1 Parent(s): 0fd1843

Training in progress, step 370000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c017dc7aa51a43ed67b54be4391afb3209331a95386c7a0f1faefbfda3688b82
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8884578ecff20bdc143758e28e0d85c34dad8dbfa37132c1e5318c8f80a9a1be
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86d6df4782178506acacd0c83df02e5b041758e0dad6be6ff1fb20dae19c22b2
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:797df8ba5787bd4a5fbf872ca4b3fe3fdb3db1dba64902a8c55adee164838ae0
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6e54422706a010aa16b679660182e5a0c0f546c43656852cb88a82c1d45dccf
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580594f4768ecd1bd92e87dca92e874365a397305161c1f8781b79f1f0b613ba
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.174779550435803,
5
- "global_step": 360000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7206,11 +7206,211 @@
7206
  "eval_samples_per_second": 733.817,
7207
  "eval_steps_per_second": 11.741,
7208
  "step": 360000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7209
  }
7210
  ],
7211
  "max_steps": 500000,
7212
  "num_train_epochs": 13,
7213
- "total_flos": 1.1501415685664595e+22,
7214
  "trial_name": null,
7215
  "trial_params": null
7216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.429634537947909,
5
+ "global_step": 370000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7206
  "eval_samples_per_second": 733.817,
7207
  "eval_steps_per_second": 11.741,
7208
  "step": 360000
7209
+ },
7210
+ {
7211
+ "epoch": 9.19,
7212
+ "learning_rate": 6.746049527150238e-05,
7213
+ "loss": 0.276,
7214
+ "step": 360500
7215
+ },
7216
+ {
7217
+ "epoch": 9.2,
7218
+ "learning_rate": 6.707871421704209e-05,
7219
+ "loss": 0.2757,
7220
+ "step": 361000
7221
+ },
7222
+ {
7223
+ "epoch": 9.2,
7224
+ "eval_loss": 0.8061186075210571,
7225
+ "eval_runtime": 1.2926,
7226
+ "eval_samples_per_second": 773.629,
7227
+ "eval_steps_per_second": 12.378,
7228
+ "step": 361000
7229
+ },
7230
+ {
7231
+ "epoch": 9.21,
7232
+ "learning_rate": 6.669789465567683e-05,
7233
+ "loss": 0.2754,
7234
+ "step": 361500
7235
+ },
7236
+ {
7237
+ "epoch": 9.23,
7238
+ "learning_rate": 6.631804075198838e-05,
7239
+ "loss": 0.2754,
7240
+ "step": 362000
7241
+ },
7242
+ {
7243
+ "epoch": 9.23,
7244
+ "eval_loss": 0.8106433153152466,
7245
+ "eval_runtime": 1.3154,
7246
+ "eval_samples_per_second": 760.201,
7247
+ "eval_steps_per_second": 12.163,
7248
+ "step": 362000
7249
+ },
7250
+ {
7251
+ "epoch": 9.24,
7252
+ "learning_rate": 6.593915665999816e-05,
7253
+ "loss": 0.275,
7254
+ "step": 362500
7255
+ },
7256
+ {
7257
+ "epoch": 9.25,
7258
+ "learning_rate": 6.55612465231219e-05,
7259
+ "loss": 0.2755,
7260
+ "step": 363000
7261
+ },
7262
+ {
7263
+ "epoch": 9.25,
7264
+ "eval_loss": 0.8047894835472107,
7265
+ "eval_runtime": 1.3007,
7266
+ "eval_samples_per_second": 768.793,
7267
+ "eval_steps_per_second": 12.301,
7268
+ "step": 363000
7269
+ },
7270
+ {
7271
+ "epoch": 9.26,
7272
+ "learning_rate": 6.518431447412434e-05,
7273
+ "loss": 0.2754,
7274
+ "step": 363500
7275
+ },
7276
+ {
7277
+ "epoch": 9.28,
7278
+ "learning_rate": 6.480836463507392e-05,
7279
+ "loss": 0.2753,
7280
+ "step": 364000
7281
+ },
7282
+ {
7283
+ "epoch": 9.28,
7284
+ "eval_loss": 0.8103929758071899,
7285
+ "eval_runtime": 1.3172,
7286
+ "eval_samples_per_second": 759.199,
7287
+ "eval_steps_per_second": 12.147,
7288
+ "step": 364000
7289
+ },
7290
+ {
7291
+ "epoch": 9.29,
7292
+ "learning_rate": 6.443340111729786e-05,
7293
+ "loss": 0.2753,
7294
+ "step": 364500
7295
+ },
7296
+ {
7297
+ "epoch": 9.3,
7298
+ "learning_rate": 6.405942802133713e-05,
7299
+ "loss": 0.2753,
7300
+ "step": 365000
7301
+ },
7302
+ {
7303
+ "epoch": 9.3,
7304
+ "eval_loss": 0.8095191717147827,
7305
+ "eval_runtime": 1.3369,
7306
+ "eval_samples_per_second": 747.972,
7307
+ "eval_steps_per_second": 11.968,
7308
+ "step": 365000
7309
+ },
7310
+ {
7311
+ "epoch": 9.31,
7312
+ "learning_rate": 6.36864494369016e-05,
7313
+ "loss": 0.2754,
7314
+ "step": 365500
7315
+ },
7316
+ {
7317
+ "epoch": 9.33,
7318
+ "learning_rate": 6.331446944282534e-05,
7319
+ "loss": 0.2753,
7320
+ "step": 366000
7321
+ },
7322
+ {
7323
+ "epoch": 9.33,
7324
+ "eval_loss": 0.8096633553504944,
7325
+ "eval_runtime": 1.2639,
7326
+ "eval_samples_per_second": 791.187,
7327
+ "eval_steps_per_second": 12.659,
7328
+ "step": 366000
7329
+ },
7330
+ {
7331
+ "epoch": 9.34,
7332
+ "learning_rate": 6.294349210702188e-05,
7333
+ "loss": 0.2748,
7334
+ "step": 366500
7335
+ },
7336
+ {
7337
+ "epoch": 9.35,
7338
+ "learning_rate": 6.257352148643998e-05,
7339
+ "loss": 0.2752,
7340
+ "step": 367000
7341
+ },
7342
+ {
7343
+ "epoch": 9.35,
7344
+ "eval_loss": 0.808983564376831,
7345
+ "eval_runtime": 1.349,
7346
+ "eval_samples_per_second": 741.265,
7347
+ "eval_steps_per_second": 11.86,
7348
+ "step": 367000
7349
+ },
7350
+ {
7351
+ "epoch": 9.37,
7352
+ "learning_rate": 6.220456162701908e-05,
7353
+ "loss": 0.2751,
7354
+ "step": 367500
7355
+ },
7356
+ {
7357
+ "epoch": 9.38,
7358
+ "learning_rate": 6.183661656364515e-05,
7359
+ "loss": 0.2749,
7360
+ "step": 368000
7361
+ },
7362
+ {
7363
+ "epoch": 9.38,
7364
+ "eval_loss": 0.8059231638908386,
7365
+ "eval_runtime": 1.3338,
7366
+ "eval_samples_per_second": 749.736,
7367
+ "eval_steps_per_second": 11.996,
7368
+ "step": 368000
7369
+ },
7370
+ {
7371
+ "epoch": 9.39,
7372
+ "learning_rate": 6.146969032010631e-05,
7373
+ "loss": 0.2747,
7374
+ "step": 368500
7375
+ },
7376
+ {
7377
+ "epoch": 9.4,
7378
+ "learning_rate": 6.110378690904928e-05,
7379
+ "loss": 0.2749,
7380
+ "step": 369000
7381
+ },
7382
+ {
7383
+ "epoch": 9.4,
7384
+ "eval_loss": 0.8113678097724915,
7385
+ "eval_runtime": 1.2927,
7386
+ "eval_samples_per_second": 773.561,
7387
+ "eval_steps_per_second": 12.377,
7388
+ "step": 369000
7389
+ },
7390
+ {
7391
+ "epoch": 9.42,
7392
+ "learning_rate": 6.073891033193507e-05,
7393
+ "loss": 0.2749,
7394
+ "step": 369500
7395
+ },
7396
+ {
7397
+ "epoch": 9.43,
7398
+ "learning_rate": 6.037506457899553e-05,
7399
+ "loss": 0.2747,
7400
+ "step": 370000
7401
+ },
7402
+ {
7403
+ "epoch": 9.43,
7404
+ "eval_loss": 0.8089262843132019,
7405
+ "eval_runtime": 1.3047,
7406
+ "eval_samples_per_second": 766.44,
7407
+ "eval_steps_per_second": 12.263,
7408
+ "step": 370000
7409
  }
7410
  ],
7411
  "max_steps": 500000,
7412
  "num_train_epochs": 13,
7413
+ "total_flos": 1.1820902824439095e+22,
7414
  "trial_name": null,
7415
  "trial_params": null
7416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86d6df4782178506acacd0c83df02e5b041758e0dad6be6ff1fb20dae19c22b2
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:797df8ba5787bd4a5fbf872ca4b3fe3fdb3db1dba64902a8c55adee164838ae0
3
  size 102501541