plip commited on
Commit
256bf23
1 Parent(s): 03d2158

Training in progress, step 140000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b61f0fcd50875833fb0e1f9ce51cdf7d430e60b619e76669b19c533bc334c88
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1a663fa8a1428dfd0e1ea579f8f1884a2fe84a94e67275218c60462158fe8aa
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39c23cb20328d6cd580ce88942d789c9a02552a95a268d89326d508421abbe4a
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a40275396ce53417c9c00526c28902e900eee7fe8a1176e3b167b8fa880709f1
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e29d5733f7ec570f71140fb29faf2306a82ee45c27c9eb9c785e5d73fa8382
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff6473d36a3b0d61e638f6f7adf0e3c4e3a4c90311741bef45eedc8752b4abbb
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dabc5ed16f76e0d1a8c7f2b2c2a3619addb4d0c3eca0a88fa6e7b0ba2beea5dd
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abd20d5dfc80e3bb5cba79ef18d635e6b9d3fba3685d49236ba9bbfcbd245f3d
3
  size 14439
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7531b762a4f77385b5cf4054c1324ef1475bf3e0305baf6834591cd7a2e6b325
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2963be3c0faa28379747d9429ff572f2788db89f6185c9c53832cb9290c1c849
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95971fab031e1bc0e074238eeb9bb97d0dce735f44e80bc1ac009c7c3e11e0ef
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc4f1c2a017f1ba8bd941830cb07d47e6bde89cb5e80ca0663de3f779aa3e16b
3
  size 14439
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b5759d26021494b175082e8aa472aecf989df6cf2e73cc13099dc5f04e8a3a7
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52188af98e11936af8bc69f5366b88b44cd6c73d9b1b37682a5e46e4706f5e3f
3
+ size 14439
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:437af33cac768d06b94a7ff819e2415c2d01a6cb933eea2964edd87e0a74d2a3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:299331297e38ed6c53d0026997de39e06ade131719f6e2f227655bc716b72e7d
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f46bc5befc0dd2669aebf64aa0216b1a5da79235f03660685ed5378a375aacc
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d145aa3067eb9b9e66af4f261f18929e80b865f364d9ea9075acacb8581bccd8
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10d615ff7d950e30ec30974f2cf78479d745d92e39a0b3ba0bec5c1a7fb2cdeb
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8dc2be3757aa011e41ae06df8c828d05d218a32a27ad90767a1b48fa565a909
3
+ size 14439
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1c3de3c2d66025eca4b56b8d5efacb781ba1e9b4daae40acd9c92c0f7cbe98c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9522f343e5b1c37fe7e600f7b39d619e850350d8948dc240940a440582a0eb9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.9829651643739084,
5
- "global_step": 130000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2606,11 +2606,211 @@
2606
  "eval_samples_per_second": 1938.903,
2607
  "eval_steps_per_second": 31.022,
2608
  "step": 130000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2609
  }
2610
  ],
2611
  "max_steps": 500000,
2612
  "num_train_epochs": 16,
2613
- "total_flos": 4.1533088402125724e+21,
2614
  "trial_name": null,
2615
  "trial_params": null
2616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.289347100094979,
5
+ "global_step": 140000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2606
  "eval_samples_per_second": 1938.903,
2607
  "eval_steps_per_second": 31.022,
2608
  "step": 130000
2609
+ },
2610
+ {
2611
+ "epoch": 4.0,
2612
+ "learning_rate": 0.0002661106834948409,
2613
+ "loss": 0.3626,
2614
+ "step": 130500
2615
+ },
2616
+ {
2617
+ "epoch": 4.01,
2618
+ "learning_rate": 0.0002658019908268041,
2619
+ "loss": 0.3625,
2620
+ "step": 131000
2621
+ },
2622
+ {
2623
+ "epoch": 4.01,
2624
+ "eval_loss": 0.7918245792388916,
2625
+ "eval_runtime": 0.521,
2626
+ "eval_samples_per_second": 1919.333,
2627
+ "eval_steps_per_second": 30.709,
2628
+ "step": 131000
2629
+ },
2630
+ {
2631
+ "epoch": 4.03,
2632
+ "learning_rate": 0.00026549208644594766,
2633
+ "loss": 0.3622,
2634
+ "step": 131500
2635
+ },
2636
+ {
2637
+ "epoch": 4.04,
2638
+ "learning_rate": 0.00026518097374133627,
2639
+ "loss": 0.3622,
2640
+ "step": 132000
2641
+ },
2642
+ {
2643
+ "epoch": 4.04,
2644
+ "eval_loss": 0.7886719107627869,
2645
+ "eval_runtime": 0.5166,
2646
+ "eval_samples_per_second": 1935.893,
2647
+ "eval_steps_per_second": 30.974,
2648
+ "step": 132000
2649
+ },
2650
+ {
2651
+ "epoch": 4.06,
2652
+ "learning_rate": 0.00026486865611524853,
2653
+ "loss": 0.3622,
2654
+ "step": 132500
2655
+ },
2656
+ {
2657
+ "epoch": 4.07,
2658
+ "learning_rate": 0.00026455513698314003,
2659
+ "loss": 0.3616,
2660
+ "step": 133000
2661
+ },
2662
+ {
2663
+ "epoch": 4.07,
2664
+ "eval_loss": 0.7906731963157654,
2665
+ "eval_runtime": 0.5202,
2666
+ "eval_samples_per_second": 1922.502,
2667
+ "eval_steps_per_second": 30.76,
2668
+ "step": 133000
2669
+ },
2670
+ {
2671
+ "epoch": 4.09,
2672
+ "learning_rate": 0.0002642404197736058,
2673
+ "loss": 0.3618,
2674
+ "step": 133500
2675
+ },
2676
+ {
2677
+ "epoch": 4.11,
2678
+ "learning_rate": 0.0002639245079283428,
2679
+ "loss": 0.3646,
2680
+ "step": 134000
2681
+ },
2682
+ {
2683
+ "epoch": 4.11,
2684
+ "eval_loss": 0.7901037931442261,
2685
+ "eval_runtime": 0.5304,
2686
+ "eval_samples_per_second": 1885.342,
2687
+ "eval_steps_per_second": 30.165,
2688
+ "step": 134000
2689
+ },
2690
+ {
2691
+ "epoch": 4.12,
2692
+ "learning_rate": 0.00026360740490211234,
2693
+ "loss": 0.3612,
2694
+ "step": 134500
2695
+ },
2696
+ {
2697
+ "epoch": 4.14,
2698
+ "learning_rate": 0.0002632891141627023,
2699
+ "loss": 0.3604,
2700
+ "step": 135000
2701
+ },
2702
+ {
2703
+ "epoch": 4.14,
2704
+ "eval_loss": 0.7916610836982727,
2705
+ "eval_runtime": 0.5131,
2706
+ "eval_samples_per_second": 1948.858,
2707
+ "eval_steps_per_second": 31.182,
2708
+ "step": 135000
2709
+ },
2710
+ {
2711
+ "epoch": 4.15,
2712
+ "learning_rate": 0.00026296963919088923,
2713
+ "loss": 0.3603,
2714
+ "step": 135500
2715
+ },
2716
+ {
2717
+ "epoch": 4.17,
2718
+ "learning_rate": 0.00026264898348040024,
2719
+ "loss": 0.3604,
2720
+ "step": 136000
2721
+ },
2722
+ {
2723
+ "epoch": 4.17,
2724
+ "eval_loss": 0.7712200284004211,
2725
+ "eval_runtime": 0.519,
2726
+ "eval_samples_per_second": 1926.697,
2727
+ "eval_steps_per_second": 30.827,
2728
+ "step": 136000
2729
+ },
2730
+ {
2731
+ "epoch": 4.18,
2732
+ "learning_rate": 0.0002623271505378748,
2733
+ "loss": 0.3608,
2734
+ "step": 136500
2735
+ },
2736
+ {
2737
+ "epoch": 4.2,
2738
+ "learning_rate": 0.00026200414388282637,
2739
+ "loss": 0.361,
2740
+ "step": 137000
2741
+ },
2742
+ {
2743
+ "epoch": 4.2,
2744
+ "eval_loss": 0.7850333452224731,
2745
+ "eval_runtime": 0.5415,
2746
+ "eval_samples_per_second": 1846.815,
2747
+ "eval_steps_per_second": 29.549,
2748
+ "step": 137000
2749
+ },
2750
+ {
2751
+ "epoch": 4.21,
2752
+ "learning_rate": 0.00026167996704760406,
2753
+ "loss": 0.3603,
2754
+ "step": 137500
2755
+ },
2756
+ {
2757
+ "epoch": 4.23,
2758
+ "learning_rate": 0.00026135462357735375,
2759
+ "loss": 0.3604,
2760
+ "step": 138000
2761
+ },
2762
+ {
2763
+ "epoch": 4.23,
2764
+ "eval_loss": 0.7824327349662781,
2765
+ "eval_runtime": 0.5307,
2766
+ "eval_samples_per_second": 1884.218,
2767
+ "eval_steps_per_second": 30.147,
2768
+ "step": 138000
2769
+ },
2770
+ {
2771
+ "epoch": 4.24,
2772
+ "learning_rate": 0.0002610281170299795,
2773
+ "loss": 0.3602,
2774
+ "step": 138500
2775
+ },
2776
+ {
2777
+ "epoch": 4.26,
2778
+ "learning_rate": 0.00026070045097610465,
2779
+ "loss": 0.36,
2780
+ "step": 139000
2781
+ },
2782
+ {
2783
+ "epoch": 4.26,
2784
+ "eval_loss": 0.787578821182251,
2785
+ "eval_runtime": 0.5306,
2786
+ "eval_samples_per_second": 1884.712,
2787
+ "eval_steps_per_second": 30.155,
2788
+ "step": 139000
2789
+ },
2790
+ {
2791
+ "epoch": 4.27,
2792
+ "learning_rate": 0.0002603716289990326,
2793
+ "loss": 0.3601,
2794
+ "step": 139500
2795
+ },
2796
+ {
2797
+ "epoch": 4.29,
2798
+ "learning_rate": 0.00026004165469470787,
2799
+ "loss": 0.3597,
2800
+ "step": 140000
2801
+ },
2802
+ {
2803
+ "epoch": 4.29,
2804
+ "eval_loss": 0.7803857326507568,
2805
+ "eval_runtime": 0.5242,
2806
+ "eval_samples_per_second": 1907.849,
2807
+ "eval_steps_per_second": 30.526,
2808
+ "step": 140000
2809
  }
2810
  ],
2811
  "max_steps": 500000,
2812
  "num_train_epochs": 16,
2813
+ "total_flos": 4.4727879910350966e+21,
2814
  "trial_name": null,
2815
  "trial_params": null
2816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39c23cb20328d6cd580ce88942d789c9a02552a95a268d89326d508421abbe4a
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a40275396ce53417c9c00526c28902e900eee7fe8a1176e3b167b8fa880709f1
3
  size 102501541