plip commited on
Commit
df3edf5
1 Parent(s): cdf95c9

Training in progress, step 430000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6c710dd18f96e58101abb4543f93762b3065e883b1aa3865d95449d6dcff2ef
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2037a39653e9a86b52eb755df70a2048c15c66b97fc5c5b2e730133643a54847
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26a1dec101691ebd7c104c916b6fbf2b961123d72841379924840b574ab6cdb5
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68b8d9c04c07b07e7749f58776534bd53918c6e2c126a3b13ccf6e4c8fe0c11
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:482727048b96ef6295d46f8e1c756249fe05df8f4565790853af683f2f73d3d9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2daf51037932e93e0733ea24caef617dc93e09ea15b9fd221dcc7444c15f0e
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec58e861ecfc9f74cf3526c7f7e07d40e14496fd6bc4013607b9344e9c1e618d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3a3ebd3780a0e2b9539a7e9e3280f39aab22ea827f57421c4c22612d438cdfb
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2aebd434102fdb5bc0174b7c6fb694db378bfa14b7f46447d196181b9cfcb39
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86247109bf049f289c15c3221972c6c603aeaaab8c73e9572f0b173c34fca77b
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f00f2e685c708cfd0cbbb80e095c6bf343a9624eb506d7fe315e91e79d2cc8c7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cefc44f7d07864c913b59c01e54c2d25ff69e14a2f34a7e0e54f8b7ef0768fa
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8dd01bc2de28ac7cee0c2566f096951eb4ee6698f2cb4f09a40119c0f514708
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e44801ad618d45c1ec0026a0fb986a5b4b4808493b86436a804a9e6412bb1dd2
3
+ size 14439
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed4ca2d2c2eb8082634a2e8e07ce3220676e6442b8fc3172e0e2229e0f0ad540
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb1f285c2e8ce7b907118ccf23b5cb322a34d0e9eb54ad8436c82bf6abb8cfb2
3
+ size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a08068423bd0da8de286f2bfe952fac174abba9ea4b1b0d5c4dc9fed6c69200
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b32a0be0231f2f19d42d8561635e493d94c812c82d1070981857c9fbb59cf960
3
+ size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a39f4a2af63c824a2393a7a188b35765b425e608da84400d9022e1ad2be2efc
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:847320643082b3f276294a5fa2113e3868a05e1a1e4d298c40cb20665e82af38
3
+ size 14439
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21fa0c13fc0e3348f6228394f5e318945295debe26ba21ec91b2c06a47593869
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f427c751ea4b109969727e0c5f2ef9ef6fd7587de8192ab50fc2201ab4ba3ed9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.868041300284935,
5
- "global_step": 420000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8406,11 +8406,211 @@
8406
  "eval_samples_per_second": 1947.554,
8407
  "eval_steps_per_second": 31.161,
8408
  "step": 420000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8409
  }
8410
  ],
8411
  "max_steps": 500000,
8412
  "num_train_epochs": 16,
8413
- "total_flos": 1.341836397310529e+22,
8414
  "trial_name": null,
8415
  "trial_params": null
8416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.174423236006005,
5
+ "global_step": 430000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8406
  "eval_samples_per_second": 1947.554,
8407
  "eval_steps_per_second": 31.161,
8408
  "step": 420000
8409
+ },
8410
+ {
8411
+ "epoch": 12.88,
8412
+ "learning_rate": 2.9586456905128618e-05,
8413
+ "loss": 0.3162,
8414
+ "step": 420500
8415
+ },
8416
+ {
8417
+ "epoch": 12.9,
8418
+ "learning_rate": 2.9346475051519687e-05,
8419
+ "loss": 0.3163,
8420
+ "step": 421000
8421
+ },
8422
+ {
8423
+ "epoch": 12.9,
8424
+ "eval_loss": 0.7781884670257568,
8425
+ "eval_runtime": 0.5072,
8426
+ "eval_samples_per_second": 1971.754,
8427
+ "eval_steps_per_second": 31.548,
8428
+ "step": 421000
8429
+ },
8430
+ {
8431
+ "epoch": 12.91,
8432
+ "learning_rate": 2.910786732472815e-05,
8433
+ "loss": 0.3162,
8434
+ "step": 421500
8435
+ },
8436
+ {
8437
+ "epoch": 12.93,
8438
+ "learning_rate": 2.887063633412981e-05,
8439
+ "loss": 0.3161,
8440
+ "step": 422000
8441
+ },
8442
+ {
8443
+ "epoch": 12.93,
8444
+ "eval_loss": 0.7753953337669373,
8445
+ "eval_runtime": 0.5016,
8446
+ "eval_samples_per_second": 1993.582,
8447
+ "eval_steps_per_second": 31.897,
8448
+ "step": 422000
8449
+ },
8450
+ {
8451
+ "epoch": 12.94,
8452
+ "learning_rate": 2.863478467404478e-05,
8453
+ "loss": 0.3159,
8454
+ "step": 422500
8455
+ },
8456
+ {
8457
+ "epoch": 12.96,
8458
+ "learning_rate": 2.8400314923709112e-05,
8459
+ "loss": 0.3158,
8460
+ "step": 423000
8461
+ },
8462
+ {
8463
+ "epoch": 12.96,
8464
+ "eval_loss": 0.7729361653327942,
8465
+ "eval_runtime": 0.5075,
8466
+ "eval_samples_per_second": 1970.45,
8467
+ "eval_steps_per_second": 31.527,
8468
+ "step": 423000
8469
+ },
8470
+ {
8471
+ "epoch": 12.98,
8472
+ "learning_rate": 2.816722964724636e-05,
8473
+ "loss": 0.3159,
8474
+ "step": 423500
8475
+ },
8476
+ {
8477
+ "epoch": 12.99,
8478
+ "learning_rate": 2.793553139363981e-05,
8479
+ "loss": 0.3157,
8480
+ "step": 424000
8481
+ },
8482
+ {
8483
+ "epoch": 12.99,
8484
+ "eval_loss": 0.7751319408416748,
8485
+ "eval_runtime": 0.5075,
8486
+ "eval_samples_per_second": 1970.502,
8487
+ "eval_steps_per_second": 31.528,
8488
+ "step": 424000
8489
+ },
8490
+ {
8491
+ "epoch": 13.01,
8492
+ "learning_rate": 2.7705222696704366e-05,
8493
+ "loss": 0.3159,
8494
+ "step": 424500
8495
+ },
8496
+ {
8497
+ "epoch": 13.02,
8498
+ "learning_rate": 2.7476306075059096e-05,
8499
+ "loss": 0.3157,
8500
+ "step": 425000
8501
+ },
8502
+ {
8503
+ "epoch": 13.02,
8504
+ "eval_loss": 0.7777685523033142,
8505
+ "eval_runtime": 0.5223,
8506
+ "eval_samples_per_second": 1914.553,
8507
+ "eval_steps_per_second": 30.633,
8508
+ "step": 425000
8509
+ },
8510
+ {
8511
+ "epoch": 13.04,
8512
+ "learning_rate": 2.7248784032099478e-05,
8513
+ "loss": 0.3157,
8514
+ "step": 425500
8515
+ },
8516
+ {
8517
+ "epoch": 13.05,
8518
+ "learning_rate": 2.7022659055970144e-05,
8519
+ "loss": 0.3156,
8520
+ "step": 426000
8521
+ },
8522
+ {
8523
+ "epoch": 13.05,
8524
+ "eval_loss": 0.7756606340408325,
8525
+ "eval_runtime": 0.511,
8526
+ "eval_samples_per_second": 1957.114,
8527
+ "eval_steps_per_second": 31.314,
8528
+ "step": 426000
8529
+ },
8530
+ {
8531
+ "epoch": 13.07,
8532
+ "learning_rate": 2.6797933619537604e-05,
8533
+ "loss": 0.3155,
8534
+ "step": 426500
8535
+ },
8536
+ {
8537
+ "epoch": 13.08,
8538
+ "learning_rate": 2.6574610180363166e-05,
8539
+ "loss": 0.3155,
8540
+ "step": 427000
8541
+ },
8542
+ {
8543
+ "epoch": 13.08,
8544
+ "eval_loss": 0.7737278938293457,
8545
+ "eval_runtime": 0.5036,
8546
+ "eval_samples_per_second": 1985.649,
8547
+ "eval_steps_per_second": 31.77,
8548
+ "step": 427000
8549
+ },
8550
+ {
8551
+ "epoch": 13.1,
8552
+ "learning_rate": 2.6352691180676286e-05,
8553
+ "loss": 0.3157,
8554
+ "step": 427500
8555
+ },
8556
+ {
8557
+ "epoch": 13.11,
8558
+ "learning_rate": 2.6132179047347505e-05,
8559
+ "loss": 0.3156,
8560
+ "step": 428000
8561
+ },
8562
+ {
8563
+ "epoch": 13.11,
8564
+ "eval_loss": 0.7748771905899048,
8565
+ "eval_runtime": 0.5178,
8566
+ "eval_samples_per_second": 1931.107,
8567
+ "eval_steps_per_second": 30.898,
8568
+ "step": 428000
8569
+ },
8570
+ {
8571
+ "epoch": 13.13,
8572
+ "learning_rate": 2.5913076191862238e-05,
8573
+ "loss": 0.3156,
8574
+ "step": 428500
8575
+ },
8576
+ {
8577
+ "epoch": 13.14,
8578
+ "learning_rate": 2.5695385010294165e-05,
8579
+ "loss": 0.3156,
8580
+ "step": 429000
8581
+ },
8582
+ {
8583
+ "epoch": 13.14,
8584
+ "eval_loss": 0.7763614058494568,
8585
+ "eval_runtime": 0.5128,
8586
+ "eval_samples_per_second": 1949.92,
8587
+ "eval_steps_per_second": 31.199,
8588
+ "step": 429000
8589
+ },
8590
+ {
8591
+ "epoch": 13.16,
8592
+ "learning_rate": 2.5479107883279144e-05,
8593
+ "loss": 0.3155,
8594
+ "step": 429500
8595
+ },
8596
+ {
8597
+ "epoch": 13.17,
8598
+ "learning_rate": 2.5264247175989292e-05,
8599
+ "loss": 0.3151,
8600
+ "step": 430000
8601
+ },
8602
+ {
8603
+ "epoch": 13.17,
8604
+ "eval_loss": 0.7742259502410889,
8605
+ "eval_runtime": 0.5135,
8606
+ "eval_samples_per_second": 1947.536,
8607
+ "eval_steps_per_second": 31.161,
8608
+ "step": 430000
8609
  }
8610
  ],
8611
  "max_steps": 500000,
8612
  "num_train_epochs": 16,
8613
+ "total_flos": 1.3737843123927814e+22,
8614
  "trial_name": null,
8615
  "trial_params": null
8616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26a1dec101691ebd7c104c916b6fbf2b961123d72841379924840b574ab6cdb5
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68b8d9c04c07b07e7749f58776534bd53918c6e2c126a3b13ccf6e4c8fe0c11
3
  size 102501541