plip commited on
Commit
3cb01e9
1 Parent(s): 0e3a5c8

Training in progress, step 140000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16b5536ca8a6e377f81e58375b1a7ad0f81c77249b2b140bbeb9ba4baedae79d
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7f5c98114bb03a0107e9edbb2aa57590411cc0a9aced01e13efc323f9c7511d
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba517ef097f30eafaab51fcd07596ea1dc064b2e4ffa07b172ca6b443b8d5c50
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1151081009ac294a44ef99911473d89538dee75ada188d7d9e2d8f1a9b3231fc
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6905d0008e40eff2d80ae3e67e781129af6c787e6082983c24f27be53737707
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91cc3747c6b314cba53effe7863cb06ebbb2887876cfc49af2ce3563922b45f
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6905d0008e40eff2d80ae3e67e781129af6c787e6082983c24f27be53737707
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91cc3747c6b314cba53effe7863cb06ebbb2887876cfc49af2ce3563922b45f
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6905d0008e40eff2d80ae3e67e781129af6c787e6082983c24f27be53737707
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91cc3747c6b314cba53effe7863cb06ebbb2887876cfc49af2ce3563922b45f
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6905d0008e40eff2d80ae3e67e781129af6c787e6082983c24f27be53737707
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91cc3747c6b314cba53effe7863cb06ebbb2887876cfc49af2ce3563922b45f
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6905d0008e40eff2d80ae3e67e781129af6c787e6082983c24f27be53737707
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91cc3747c6b314cba53effe7863cb06ebbb2887876cfc49af2ce3563922b45f
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6905d0008e40eff2d80ae3e67e781129af6c787e6082983c24f27be53737707
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91cc3747c6b314cba53effe7863cb06ebbb2887876cfc49af2ce3563922b45f
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6905d0008e40eff2d80ae3e67e781129af6c787e6082983c24f27be53737707
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91cc3747c6b314cba53effe7863cb06ebbb2887876cfc49af2ce3563922b45f
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6905d0008e40eff2d80ae3e67e781129af6c787e6082983c24f27be53737707
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91cc3747c6b314cba53effe7863cb06ebbb2887876cfc49af2ce3563922b45f
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1c3de3c2d66025eca4b56b8d5efacb781ba1e9b4daae40acd9c92c0f7cbe98c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9522f343e5b1c37fe7e600f7b39d619e850350d8948dc240940a440582a0eb9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.313114837657373,
5
- "global_step": 130000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2606,11 +2606,211 @@
2606
  "eval_samples_per_second": 766.596,
2607
  "eval_steps_per_second": 12.266,
2608
  "step": 130000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2609
  }
2610
  ],
2611
  "max_steps": 500000,
2612
  "num_train_epochs": 13,
2613
- "total_flos": 4.153292367329365e+21,
2614
  "trial_name": null,
2615
  "trial_params": null
2616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.5679698251694787,
5
+ "global_step": 140000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2606
  "eval_samples_per_second": 766.596,
2607
  "eval_steps_per_second": 12.266,
2608
  "step": 130000
2609
+ },
2610
+ {
2611
+ "epoch": 3.33,
2612
+ "learning_rate": 0.0002661106834948409,
2613
+ "loss": 0.3058,
2614
+ "step": 130500
2615
+ },
2616
+ {
2617
+ "epoch": 3.34,
2618
+ "learning_rate": 0.0002658019908268041,
2619
+ "loss": 0.3048,
2620
+ "step": 131000
2621
+ },
2622
+ {
2623
+ "epoch": 3.34,
2624
+ "eval_loss": 0.8294724822044373,
2625
+ "eval_runtime": 1.2682,
2626
+ "eval_samples_per_second": 788.537,
2627
+ "eval_steps_per_second": 12.617,
2628
+ "step": 131000
2629
+ },
2630
+ {
2631
+ "epoch": 3.35,
2632
+ "learning_rate": 0.00026549208644594766,
2633
+ "loss": 0.305,
2634
+ "step": 131500
2635
+ },
2636
+ {
2637
+ "epoch": 3.36,
2638
+ "learning_rate": 0.00026518097374133627,
2639
+ "loss": 0.3051,
2640
+ "step": 132000
2641
+ },
2642
+ {
2643
+ "epoch": 3.36,
2644
+ "eval_loss": 0.8296982049942017,
2645
+ "eval_runtime": 1.3247,
2646
+ "eval_samples_per_second": 754.912,
2647
+ "eval_steps_per_second": 12.079,
2648
+ "step": 132000
2649
+ },
2650
+ {
2651
+ "epoch": 3.38,
2652
+ "learning_rate": 0.00026486865611524853,
2653
+ "loss": 0.3045,
2654
+ "step": 132500
2655
+ },
2656
+ {
2657
+ "epoch": 3.39,
2658
+ "learning_rate": 0.00026455513698314003,
2659
+ "loss": 0.3045,
2660
+ "step": 133000
2661
+ },
2662
+ {
2663
+ "epoch": 3.39,
2664
+ "eval_loss": 0.8295329213142395,
2665
+ "eval_runtime": 1.3528,
2666
+ "eval_samples_per_second": 739.189,
2667
+ "eval_steps_per_second": 11.827,
2668
+ "step": 133000
2669
+ },
2670
+ {
2671
+ "epoch": 3.4,
2672
+ "learning_rate": 0.0002642404197736058,
2673
+ "loss": 0.3045,
2674
+ "step": 133500
2675
+ },
2676
+ {
2677
+ "epoch": 3.42,
2678
+ "learning_rate": 0.0002639245079283428,
2679
+ "loss": 0.3043,
2680
+ "step": 134000
2681
+ },
2682
+ {
2683
+ "epoch": 3.42,
2684
+ "eval_loss": 0.8244909644126892,
2685
+ "eval_runtime": 1.3196,
2686
+ "eval_samples_per_second": 757.824,
2687
+ "eval_steps_per_second": 12.125,
2688
+ "step": 134000
2689
+ },
2690
+ {
2691
+ "epoch": 3.43,
2692
+ "learning_rate": 0.00026360740490211234,
2693
+ "loss": 0.3038,
2694
+ "step": 134500
2695
+ },
2696
+ {
2697
+ "epoch": 3.44,
2698
+ "learning_rate": 0.0002632891141627023,
2699
+ "loss": 0.3037,
2700
+ "step": 135000
2701
+ },
2702
+ {
2703
+ "epoch": 3.44,
2704
+ "eval_loss": 0.8188607096672058,
2705
+ "eval_runtime": 1.2963,
2706
+ "eval_samples_per_second": 771.439,
2707
+ "eval_steps_per_second": 12.343,
2708
+ "step": 135000
2709
+ },
2710
+ {
2711
+ "epoch": 3.45,
2712
+ "learning_rate": 0.00026296963919088923,
2713
+ "loss": 0.304,
2714
+ "step": 135500
2715
+ },
2716
+ {
2717
+ "epoch": 3.47,
2718
+ "learning_rate": 0.00026264898348040024,
2719
+ "loss": 0.3042,
2720
+ "step": 136000
2721
+ },
2722
+ {
2723
+ "epoch": 3.47,
2724
+ "eval_loss": 0.8286353349685669,
2725
+ "eval_runtime": 1.3187,
2726
+ "eval_samples_per_second": 758.324,
2727
+ "eval_steps_per_second": 12.133,
2728
+ "step": 136000
2729
+ },
2730
+ {
2731
+ "epoch": 3.48,
2732
+ "learning_rate": 0.0002623271505378748,
2733
+ "loss": 0.3038,
2734
+ "step": 136500
2735
+ },
2736
+ {
2737
+ "epoch": 3.49,
2738
+ "learning_rate": 0.00026200414388282637,
2739
+ "loss": 0.3038,
2740
+ "step": 137000
2741
+ },
2742
+ {
2743
+ "epoch": 3.49,
2744
+ "eval_loss": 0.8326404690742493,
2745
+ "eval_runtime": 1.3251,
2746
+ "eval_samples_per_second": 754.645,
2747
+ "eval_steps_per_second": 12.074,
2748
+ "step": 137000
2749
+ },
2750
+ {
2751
+ "epoch": 3.5,
2752
+ "learning_rate": 0.00026167996704760406,
2753
+ "loss": 0.3058,
2754
+ "step": 137500
2755
+ },
2756
+ {
2757
+ "epoch": 3.52,
2758
+ "learning_rate": 0.00026135462357735375,
2759
+ "loss": 0.3033,
2760
+ "step": 138000
2761
+ },
2762
+ {
2763
+ "epoch": 3.52,
2764
+ "eval_loss": 0.8183851838111877,
2765
+ "eval_runtime": 1.3581,
2766
+ "eval_samples_per_second": 736.344,
2767
+ "eval_steps_per_second": 11.782,
2768
+ "step": 138000
2769
+ },
2770
+ {
2771
+ "epoch": 3.53,
2772
+ "learning_rate": 0.0002610281170299795,
2773
+ "loss": 0.3035,
2774
+ "step": 138500
2775
+ },
2776
+ {
2777
+ "epoch": 3.54,
2778
+ "learning_rate": 0.00026070045097610465,
2779
+ "loss": 0.3035,
2780
+ "step": 139000
2781
+ },
2782
+ {
2783
+ "epoch": 3.54,
2784
+ "eval_loss": 0.8135998249053955,
2785
+ "eval_runtime": 1.309,
2786
+ "eval_samples_per_second": 763.92,
2787
+ "eval_steps_per_second": 12.223,
2788
+ "step": 139000
2789
+ },
2790
+ {
2791
+ "epoch": 3.56,
2792
+ "learning_rate": 0.0002603716289990326,
2793
+ "loss": 0.3026,
2794
+ "step": 139500
2795
+ },
2796
+ {
2797
+ "epoch": 3.57,
2798
+ "learning_rate": 0.00026004165469470787,
2799
+ "loss": 0.3027,
2800
+ "step": 140000
2801
+ },
2802
+ {
2803
+ "epoch": 3.57,
2804
+ "eval_loss": 0.8287385106086731,
2805
+ "eval_runtime": 1.2779,
2806
+ "eval_samples_per_second": 782.532,
2807
+ "eval_steps_per_second": 12.521,
2808
+ "step": 140000
2809
  }
2810
  ],
2811
  "max_steps": 500000,
2812
  "num_train_epochs": 13,
2813
+ "total_flos": 4.472779506103865e+21,
2814
  "trial_name": null,
2815
  "trial_params": null
2816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba517ef097f30eafaab51fcd07596ea1dc064b2e4ffa07b172ca6b443b8d5c50
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1151081009ac294a44ef99911473d89538dee75ada188d7d9e2d8f1a9b3231fc
3
  size 102501541