plip commited on
Commit
90879ab
1 Parent(s): 80dcf4a

Training in progress, step 430000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac94a60c95ba2596b6d1d05315af6a3ea5bad0a2404c9fbebd7db8a3b7f370f8
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:485576f1d48c6a4e3bf5c2faeabd28b10bf14e54c686faf5ee4b916aae291248
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09cf499ab54bc6b5b46a8f12dd922823d0795f7339d994c55bfa6c710a91461c
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:402c24544f87635066120a062e0f54ba50aad454021fed3207324dcff5633224
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0132b64d8716021d47991e50e22003dfc788052c2c88a7966cf5395f369a4260
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0132b64d8716021d47991e50e22003dfc788052c2c88a7966cf5395f369a4260
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0132b64d8716021d47991e50e22003dfc788052c2c88a7966cf5395f369a4260
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0132b64d8716021d47991e50e22003dfc788052c2c88a7966cf5395f369a4260
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0132b64d8716021d47991e50e22003dfc788052c2c88a7966cf5395f369a4260
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0132b64d8716021d47991e50e22003dfc788052c2c88a7966cf5395f369a4260
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0132b64d8716021d47991e50e22003dfc788052c2c88a7966cf5395f369a4260
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a10c6d1216669debf3750d55e62e1d7be5eb19a554ae8b1f1f2b6955d97a50b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0132b64d8716021d47991e50e22003dfc788052c2c88a7966cf5395f369a4260
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21fa0c13fc0e3348f6228394f5e318945295debe26ba21ec91b2c06a47593869
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f427c751ea4b109969727e0c5f2ef9ef6fd7587de8192ab50fc2201ab4ba3ed9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.703909475508436,
5
- "global_step": 420000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8406,11 +8406,211 @@
8406
  "eval_samples_per_second": 753.638,
8407
  "eval_steps_per_second": 12.058,
8408
  "step": 420000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8409
  }
8410
  ],
8411
  "max_steps": 500000,
8412
  "num_train_epochs": 13,
8413
- "total_flos": 1.341832503939855e+22,
8414
  "trial_name": null,
8415
  "trial_params": null
8416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.958764463020541,
5
+ "global_step": 430000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8406
  "eval_samples_per_second": 753.638,
8407
  "eval_steps_per_second": 12.058,
8408
  "step": 420000
8409
+ },
8410
+ {
8411
+ "epoch": 10.72,
8412
+ "learning_rate": 2.9586456905128618e-05,
8413
+ "loss": 0.27,
8414
+ "step": 420500
8415
+ },
8416
+ {
8417
+ "epoch": 10.73,
8418
+ "learning_rate": 2.9346475051519687e-05,
8419
+ "loss": 0.2699,
8420
+ "step": 421000
8421
+ },
8422
+ {
8423
+ "epoch": 10.73,
8424
+ "eval_loss": 0.8154944777488708,
8425
+ "eval_runtime": 1.3065,
8426
+ "eval_samples_per_second": 765.411,
8427
+ "eval_steps_per_second": 12.247,
8428
+ "step": 421000
8429
+ },
8430
+ {
8431
+ "epoch": 10.74,
8432
+ "learning_rate": 2.910786732472815e-05,
8433
+ "loss": 0.27,
8434
+ "step": 421500
8435
+ },
8436
+ {
8437
+ "epoch": 10.75,
8438
+ "learning_rate": 2.887063633412981e-05,
8439
+ "loss": 0.2701,
8440
+ "step": 422000
8441
+ },
8442
+ {
8443
+ "epoch": 10.75,
8444
+ "eval_loss": 0.8139403462409973,
8445
+ "eval_runtime": 1.2861,
8446
+ "eval_samples_per_second": 777.555,
8447
+ "eval_steps_per_second": 12.441,
8448
+ "step": 422000
8449
+ },
8450
+ {
8451
+ "epoch": 10.77,
8452
+ "learning_rate": 2.863478467404478e-05,
8453
+ "loss": 0.2699,
8454
+ "step": 422500
8455
+ },
8456
+ {
8457
+ "epoch": 10.78,
8458
+ "learning_rate": 2.8400314923709112e-05,
8459
+ "loss": 0.2701,
8460
+ "step": 423000
8461
+ },
8462
+ {
8463
+ "epoch": 10.78,
8464
+ "eval_loss": 0.8132787942886353,
8465
+ "eval_runtime": 1.3439,
8466
+ "eval_samples_per_second": 744.104,
8467
+ "eval_steps_per_second": 11.906,
8468
+ "step": 423000
8469
+ },
8470
+ {
8471
+ "epoch": 10.79,
8472
+ "learning_rate": 2.816722964724636e-05,
8473
+ "loss": 0.2697,
8474
+ "step": 423500
8475
+ },
8476
+ {
8477
+ "epoch": 10.81,
8478
+ "learning_rate": 2.793553139363981e-05,
8479
+ "loss": 0.2701,
8480
+ "step": 424000
8481
+ },
8482
+ {
8483
+ "epoch": 10.81,
8484
+ "eval_loss": 0.8099727034568787,
8485
+ "eval_runtime": 1.3264,
8486
+ "eval_samples_per_second": 753.909,
8487
+ "eval_steps_per_second": 12.063,
8488
+ "step": 424000
8489
+ },
8490
+ {
8491
+ "epoch": 10.82,
8492
+ "learning_rate": 2.7705222696704366e-05,
8493
+ "loss": 0.27,
8494
+ "step": 424500
8495
+ },
8496
+ {
8497
+ "epoch": 10.83,
8498
+ "learning_rate": 2.7476306075059096e-05,
8499
+ "loss": 0.2696,
8500
+ "step": 425000
8501
+ },
8502
+ {
8503
+ "epoch": 10.83,
8504
+ "eval_loss": 0.8077329993247986,
8505
+ "eval_runtime": 1.3315,
8506
+ "eval_samples_per_second": 751.038,
8507
+ "eval_steps_per_second": 12.017,
8508
+ "step": 425000
8509
+ },
8510
+ {
8511
+ "epoch": 10.84,
8512
+ "learning_rate": 2.7248784032099478e-05,
8513
+ "loss": 0.2696,
8514
+ "step": 425500
8515
+ },
8516
+ {
8517
+ "epoch": 10.86,
8518
+ "learning_rate": 2.7022659055970144e-05,
8519
+ "loss": 0.2696,
8520
+ "step": 426000
8521
+ },
8522
+ {
8523
+ "epoch": 10.86,
8524
+ "eval_loss": 0.8097303509712219,
8525
+ "eval_runtime": 1.3191,
8526
+ "eval_samples_per_second": 758.11,
8527
+ "eval_steps_per_second": 12.13,
8528
+ "step": 426000
8529
+ },
8530
+ {
8531
+ "epoch": 10.87,
8532
+ "learning_rate": 2.6797933619537604e-05,
8533
+ "loss": 0.2694,
8534
+ "step": 426500
8535
+ },
8536
+ {
8537
+ "epoch": 10.88,
8538
+ "learning_rate": 2.6574610180363166e-05,
8539
+ "loss": 0.2698,
8540
+ "step": 427000
8541
+ },
8542
+ {
8543
+ "epoch": 10.88,
8544
+ "eval_loss": 0.8035640120506287,
8545
+ "eval_runtime": 1.3921,
8546
+ "eval_samples_per_second": 718.328,
8547
+ "eval_steps_per_second": 11.493,
8548
+ "step": 427000
8549
+ },
8550
+ {
8551
+ "epoch": 10.9,
8552
+ "learning_rate": 2.6352691180676286e-05,
8553
+ "loss": 0.2696,
8554
+ "step": 427500
8555
+ },
8556
+ {
8557
+ "epoch": 10.91,
8558
+ "learning_rate": 2.6132179047347505e-05,
8559
+ "loss": 0.2698,
8560
+ "step": 428000
8561
+ },
8562
+ {
8563
+ "epoch": 10.91,
8564
+ "eval_loss": 0.8066699504852295,
8565
+ "eval_runtime": 1.5174,
8566
+ "eval_samples_per_second": 659.02,
8567
+ "eval_steps_per_second": 10.544,
8568
+ "step": 428000
8569
+ },
8570
+ {
8571
+ "epoch": 10.92,
8572
+ "learning_rate": 2.5913076191862238e-05,
8573
+ "loss": 0.2696,
8574
+ "step": 428500
8575
+ },
8576
+ {
8577
+ "epoch": 10.93,
8578
+ "learning_rate": 2.5695385010294165e-05,
8579
+ "loss": 0.2699,
8580
+ "step": 429000
8581
+ },
8582
+ {
8583
+ "epoch": 10.93,
8584
+ "eval_loss": 0.813107430934906,
8585
+ "eval_runtime": 1.3419,
8586
+ "eval_samples_per_second": 745.228,
8587
+ "eval_steps_per_second": 11.924,
8588
+ "step": 429000
8589
+ },
8590
+ {
8591
+ "epoch": 10.95,
8592
+ "learning_rate": 2.5479107883279144e-05,
8593
+ "loss": 0.2692,
8594
+ "step": 429500
8595
+ },
8596
+ {
8597
+ "epoch": 10.96,
8598
+ "learning_rate": 2.5264247175989292e-05,
8599
+ "loss": 0.2695,
8600
+ "step": 430000
8601
+ },
8602
+ {
8603
+ "epoch": 10.96,
8604
+ "eval_loss": 0.8058525323867798,
8605
+ "eval_runtime": 1.3197,
8606
+ "eval_samples_per_second": 757.744,
8607
+ "eval_steps_per_second": 12.124,
8608
+ "step": 430000
8609
  }
8610
  ],
8611
  "max_steps": 500000,
8612
  "num_train_epochs": 13,
8613
+ "total_flos": 1.373781217817305e+22,
8614
  "trial_name": null,
8615
  "trial_params": null
8616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09cf499ab54bc6b5b46a8f12dd922823d0795f7339d994c55bfa6c710a91461c
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:402c24544f87635066120a062e0f54ba50aad454021fed3207324dcff5633224
3
  size 102501541