plip commited on
Commit
730b110
1 Parent(s): 770f520

Training in progress, step 100000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f971e1495b95965474408a0b4cdd855e70f28b6c197542006f027ca89e6a9e5
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4c86fed892e5d853a0aa10f530918fd27c33343c4e8c930c0a02ed36b9c3f12
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae321af5d431bdb16494979ea9657b4f788a7b917f7196ff1444688e31808017
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1656fe19fa26589bb005c2ca1f054c8e8bb207866300c7f3b095bee470e6b55
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1986c24d7a3a4d6c5a8895904d8a1d8eb2f722e05eca51a88138a5893ec570fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1986c24d7a3a4d6c5a8895904d8a1d8eb2f722e05eca51a88138a5893ec570fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1986c24d7a3a4d6c5a8895904d8a1d8eb2f722e05eca51a88138a5893ec570fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1986c24d7a3a4d6c5a8895904d8a1d8eb2f722e05eca51a88138a5893ec570fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1986c24d7a3a4d6c5a8895904d8a1d8eb2f722e05eca51a88138a5893ec570fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1986c24d7a3a4d6c5a8895904d8a1d8eb2f722e05eca51a88138a5893ec570fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1986c24d7a3a4d6c5a8895904d8a1d8eb2f722e05eca51a88138a5893ec570fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1986c24d7a3a4d6c5a8895904d8a1d8eb2f722e05eca51a88138a5893ec570fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5810d5d1337eca8d561357c6c9e9920258e5fc2b2f0f70ea4b52e4984949eec8
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7786e0d240c1817a80f936fe537093f6b0f81238abcccea2c0e618f1ac9e9438
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.2936948876089507,
5
- "global_step": 90000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1806,11 +1806,211 @@
1806
  "eval_samples_per_second": 813.129,
1807
  "eval_steps_per_second": 13.01,
1808
  "step": 90000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1809
  }
1810
  ],
1811
  "max_steps": 500000,
1812
  "num_train_epochs": 13,
1813
- "total_flos": 2.87535729114441e+21,
1814
  "trial_name": null,
1815
  "trial_params": null
1816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.548549875121056,
5
+ "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1806
  "eval_samples_per_second": 813.129,
1807
  "eval_steps_per_second": 13.01,
1808
  "step": 90000
1809
+ },
1810
+ {
1811
+ "epoch": 2.31,
1812
+ "learning_rate": 0.00028660537773622294,
1813
+ "loss": 0.3158,
1814
+ "step": 90500
1815
+ },
1816
+ {
1817
+ "epoch": 2.32,
1818
+ "learning_rate": 0.00028640336867499143,
1819
+ "loss": 0.3149,
1820
+ "step": 91000
1821
+ },
1822
+ {
1823
+ "epoch": 2.32,
1824
+ "eval_loss": 0.8462135195732117,
1825
+ "eval_runtime": 1.1973,
1826
+ "eval_samples_per_second": 835.246,
1827
+ "eval_steps_per_second": 13.364,
1828
+ "step": 91000
1829
+ },
1830
+ {
1831
+ "epoch": 2.33,
1832
+ "learning_rate": 0.0002861999226075728,
1833
+ "loss": 0.3148,
1834
+ "step": 91500
1835
+ },
1836
+ {
1837
+ "epoch": 2.34,
1838
+ "learning_rate": 0.0002859950417588206,
1839
+ "loss": 0.3144,
1840
+ "step": 92000
1841
+ },
1842
+ {
1843
+ "epoch": 2.34,
1844
+ "eval_loss": 0.8435172438621521,
1845
+ "eval_runtime": 1.2492,
1846
+ "eval_samples_per_second": 800.501,
1847
+ "eval_steps_per_second": 12.808,
1848
+ "step": 92000
1849
+ },
1850
+ {
1851
+ "epoch": 2.36,
1852
+ "learning_rate": 0.00028578872836927904,
1853
+ "loss": 0.3144,
1854
+ "step": 92500
1855
+ },
1856
+ {
1857
+ "epoch": 2.37,
1858
+ "learning_rate": 0.0002855809846951582,
1859
+ "loss": 0.3141,
1860
+ "step": 93000
1861
+ },
1862
+ {
1863
+ "epoch": 2.37,
1864
+ "eval_loss": 0.8363200426101685,
1865
+ "eval_runtime": 1.2168,
1866
+ "eval_samples_per_second": 821.803,
1867
+ "eval_steps_per_second": 13.149,
1868
+ "step": 93000
1869
+ },
1870
+ {
1871
+ "epoch": 2.38,
1872
+ "learning_rate": 0.00028537181300830963,
1873
+ "loss": 0.3138,
1874
+ "step": 93500
1875
+ },
1876
+ {
1877
+ "epoch": 2.4,
1878
+ "learning_rate": 0.0002851612155962014,
1879
+ "loss": 0.3139,
1880
+ "step": 94000
1881
+ },
1882
+ {
1883
+ "epoch": 2.4,
1884
+ "eval_loss": 0.8434939384460449,
1885
+ "eval_runtime": 1.2528,
1886
+ "eval_samples_per_second": 798.238,
1887
+ "eval_steps_per_second": 12.772,
1888
+ "step": 94000
1889
+ },
1890
+ {
1891
+ "epoch": 2.41,
1892
+ "learning_rate": 0.0002849491947618932,
1893
+ "loss": 0.3139,
1894
+ "step": 94500
1895
+ },
1896
+ {
1897
+ "epoch": 2.42,
1898
+ "learning_rate": 0.0002847357528240107,
1899
+ "loss": 0.3139,
1900
+ "step": 95000
1901
+ },
1902
+ {
1903
+ "epoch": 2.42,
1904
+ "eval_loss": 0.8472431898117065,
1905
+ "eval_runtime": 1.2611,
1906
+ "eval_samples_per_second": 792.952,
1907
+ "eval_steps_per_second": 12.687,
1908
+ "step": 95000
1909
+ },
1910
+ {
1911
+ "epoch": 2.43,
1912
+ "learning_rate": 0.0002845208921167208,
1913
+ "loss": 0.3137,
1914
+ "step": 95500
1915
+ },
1916
+ {
1917
+ "epoch": 2.45,
1918
+ "learning_rate": 0.00028430461498970584,
1919
+ "loss": 0.3131,
1920
+ "step": 96000
1921
+ },
1922
+ {
1923
+ "epoch": 2.45,
1924
+ "eval_loss": 0.8395859599113464,
1925
+ "eval_runtime": 1.2301,
1926
+ "eval_samples_per_second": 812.96,
1927
+ "eval_steps_per_second": 13.007,
1928
+ "step": 96000
1929
+ },
1930
+ {
1931
+ "epoch": 2.46,
1932
+ "learning_rate": 0.00028408692380813775,
1933
+ "loss": 0.3125,
1934
+ "step": 96500
1935
+ },
1936
+ {
1937
+ "epoch": 2.47,
1938
+ "learning_rate": 0.00028386782095265247,
1939
+ "loss": 0.3125,
1940
+ "step": 97000
1941
+ },
1942
+ {
1943
+ "epoch": 2.47,
1944
+ "eval_loss": 0.8419870734214783,
1945
+ "eval_runtime": 1.245,
1946
+ "eval_samples_per_second": 803.234,
1947
+ "eval_steps_per_second": 12.852,
1948
+ "step": 97000
1949
+ },
1950
+ {
1951
+ "epoch": 2.48,
1952
+ "learning_rate": 0.0002836473088193237,
1953
+ "loss": 0.313,
1954
+ "step": 97500
1955
+ },
1956
+ {
1957
+ "epoch": 2.5,
1958
+ "learning_rate": 0.00028342538981963677,
1959
+ "loss": 0.3128,
1960
+ "step": 98000
1961
+ },
1962
+ {
1963
+ "epoch": 2.5,
1964
+ "eval_loss": 0.8410093784332275,
1965
+ "eval_runtime": 1.2759,
1966
+ "eval_samples_per_second": 783.735,
1967
+ "eval_steps_per_second": 12.54,
1968
+ "step": 98000
1969
+ },
1970
+ {
1971
+ "epoch": 2.51,
1972
+ "learning_rate": 0.0002832020663804624,
1973
+ "loss": 0.3124,
1974
+ "step": 98500
1975
+ },
1976
+ {
1977
+ "epoch": 2.52,
1978
+ "learning_rate": 0.00028297734094402986,
1979
+ "loss": 0.3121,
1980
+ "step": 99000
1981
+ },
1982
+ {
1983
+ "epoch": 2.52,
1984
+ "eval_loss": 0.8417559862136841,
1985
+ "eval_runtime": 1.2688,
1986
+ "eval_samples_per_second": 788.14,
1987
+ "eval_steps_per_second": 12.61,
1988
+ "step": 99000
1989
+ },
1990
+ {
1991
+ "epoch": 2.54,
1992
+ "learning_rate": 0.0002827512159679005,
1993
+ "loss": 0.3122,
1994
+ "step": 99500
1995
+ },
1996
+ {
1997
+ "epoch": 2.55,
1998
+ "learning_rate": 0.00028252369392494086,
1999
+ "loss": 0.3118,
2000
+ "step": 100000
2001
+ },
2002
+ {
2003
+ "epoch": 2.55,
2004
+ "eval_loss": 0.8400096893310547,
2005
+ "eval_runtime": 1.2486,
2006
+ "eval_samples_per_second": 800.914,
2007
+ "eval_steps_per_second": 12.815,
2008
+ "step": 100000
2009
  }
2010
  ],
2011
  "max_steps": 500000,
2012
  "num_train_epochs": 13,
2013
+ "total_flos": 3.19484442991891e+21,
2014
  "trial_name": null,
2015
  "trial_params": null
2016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae321af5d431bdb16494979ea9657b4f788a7b917f7196ff1444688e31808017
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1656fe19fa26589bb005c2ca1f054c8e8bb207866300c7f3b095bee470e6b55
3
  size 102501541