plip commited on
Commit
465e9c6
1 Parent(s): 82436a4

Training in progress, step 100000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fafdf3dc4053a478a09fce7cd9cc15053b19a33d6b58c36ca5436ccd97913a6
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9a3fc6351d09bd039d306f53b05fa03590011ea2849b43bd52529e0bd8514b9
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a64deb57f36a27064cc4d0b280f90a72b59990f15860b6537f49879bcfb34aa6
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61dde823420dc7042c5bb2d699d112697bdd3010f540547619caa8b07f20d030
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be2d0bd68e065eb85049372f4abb8ffbf27c8d2f5fa7ea3177bedf20fcf99e26
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:330f3cda58bd3cda7fe4052bcd097fef510e7fce1d130c443ec32e1f5486a104
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1a5b5ce099930b883dc48f18ec0c4aafea3d1e465572f0a3a7b87ea92fa07c4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db9a8646bfa77c67b79c4b30fed86fd905de2821912180ec33d06ceff7f9e882
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93b838f5687f66973e8201c7e8dadb656fe97b315b3bbf77e5a2d000f34b91f5
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcf4e8fe75afcd199d62b4b9a4e3758f2eaf0ffa4730fdb2d98d4bb191801698
3
+ size 14439
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e876d9529d9370e353ad6f8fd0c6b88167a47f97421642e8d8095904566a3c54
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80c9489f4e58dc887e83e88882e5e1382e1ee3173dcbda417ffbbdcb858a9d65
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49e1f1672d845e202794ad0ccc105b119321c0e3d067197efd0a488ecef1d0bf
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf0f5ebb927172859f9f7bd6df79a9456d1222e97acfe9ef9113e346912ff663
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb26a39e3710aced6be33af5040431a47fb2753a31b1d0a91fe360a3e160bbe4
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dae2826d240394f23ebead2538e7f8ab9151433f5d76c787e8de37065711b2b
3
+ size 14567
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae3dd349d6db4f4551324cbce8200b29561e6b98026e7f91e8180d039a45cc34
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d1a3d037a53cd82051029fcc53619c43fd8479867f8522cbd8ad58f9ba5a632
3
+ size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57599eaa96b9ac595309802eb671733bad63e8bd6aa65ca65eb41f0a31f5c9bd
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c028788cc8743c6a98b2e0f67f4bc6d1434318ea95a0cdb39b9728727ca1342
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5810d5d1337eca8d561357c6c9e9920258e5fc2b2f0f70ea4b52e4984949eec8
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7786e0d240c1817a80f936fe537093f6b0f81238abcccea2c0e618f1ac9e9438
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.757437421489629,
5
- "global_step": 90000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1806,11 +1806,211 @@
1806
  "eval_samples_per_second": 1955.578,
1807
  "eval_steps_per_second": 31.289,
1808
  "step": 90000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1809
  }
1810
  ],
1811
  "max_steps": 500000,
1812
  "num_train_epochs": 16,
1813
- "total_flos": 2.8753682730665483e+21,
1814
  "trial_name": null,
1815
  "trial_params": null
1816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.063819357210699,
5
+ "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1806
  "eval_samples_per_second": 1955.578,
1807
  "eval_steps_per_second": 31.289,
1808
  "step": 90000
1809
+ },
1810
+ {
1811
+ "epoch": 2.77,
1812
+ "learning_rate": 0.00028660537773622294,
1813
+ "loss": 0.3803,
1814
+ "step": 90500
1815
+ },
1816
+ {
1817
+ "epoch": 2.79,
1818
+ "learning_rate": 0.00028640336867499143,
1819
+ "loss": 0.3796,
1820
+ "step": 91000
1821
+ },
1822
+ {
1823
+ "epoch": 2.79,
1824
+ "eval_loss": 0.7904797196388245,
1825
+ "eval_runtime": 0.5244,
1826
+ "eval_samples_per_second": 1906.796,
1827
+ "eval_steps_per_second": 30.509,
1828
+ "step": 91000
1829
+ },
1830
+ {
1831
+ "epoch": 2.8,
1832
+ "learning_rate": 0.0002861999226075728,
1833
+ "loss": 0.3798,
1834
+ "step": 91500
1835
+ },
1836
+ {
1837
+ "epoch": 2.82,
1838
+ "learning_rate": 0.0002859950417588206,
1839
+ "loss": 0.3792,
1840
+ "step": 92000
1841
+ },
1842
+ {
1843
+ "epoch": 2.82,
1844
+ "eval_loss": 0.7905736565589905,
1845
+ "eval_runtime": 0.5306,
1846
+ "eval_samples_per_second": 1884.737,
1847
+ "eval_steps_per_second": 30.156,
1848
+ "step": 92000
1849
+ },
1850
+ {
1851
+ "epoch": 2.83,
1852
+ "learning_rate": 0.00028578872836927904,
1853
+ "loss": 0.3788,
1854
+ "step": 92500
1855
+ },
1856
+ {
1857
+ "epoch": 2.85,
1858
+ "learning_rate": 0.0002855809846951582,
1859
+ "loss": 0.3789,
1860
+ "step": 93000
1861
+ },
1862
+ {
1863
+ "epoch": 2.85,
1864
+ "eval_loss": 0.7809098958969116,
1865
+ "eval_runtime": 0.5414,
1866
+ "eval_samples_per_second": 1847.16,
1867
+ "eval_steps_per_second": 29.555,
1868
+ "step": 93000
1869
+ },
1870
+ {
1871
+ "epoch": 2.86,
1872
+ "learning_rate": 0.00028537181300830963,
1873
+ "loss": 0.3782,
1874
+ "step": 93500
1875
+ },
1876
+ {
1877
+ "epoch": 2.88,
1878
+ "learning_rate": 0.0002851612155962014,
1879
+ "loss": 0.3784,
1880
+ "step": 94000
1881
+ },
1882
+ {
1883
+ "epoch": 2.88,
1884
+ "eval_loss": 0.7892218232154846,
1885
+ "eval_runtime": 0.516,
1886
+ "eval_samples_per_second": 1938.108,
1887
+ "eval_steps_per_second": 31.01,
1888
+ "step": 94000
1889
+ },
1890
+ {
1891
+ "epoch": 2.9,
1892
+ "learning_rate": 0.0002849491947618932,
1893
+ "loss": 0.3778,
1894
+ "step": 94500
1895
+ },
1896
+ {
1897
+ "epoch": 2.91,
1898
+ "learning_rate": 0.0002847357528240107,
1899
+ "loss": 0.3775,
1900
+ "step": 95000
1901
+ },
1902
+ {
1903
+ "epoch": 2.91,
1904
+ "eval_loss": 0.7847021818161011,
1905
+ "eval_runtime": 0.5181,
1906
+ "eval_samples_per_second": 1930.051,
1907
+ "eval_steps_per_second": 30.881,
1908
+ "step": 95000
1909
+ },
1910
+ {
1911
+ "epoch": 2.93,
1912
+ "learning_rate": 0.0002845208921167208,
1913
+ "loss": 0.3773,
1914
+ "step": 95500
1915
+ },
1916
+ {
1917
+ "epoch": 2.94,
1918
+ "learning_rate": 0.00028430461498970584,
1919
+ "loss": 0.3768,
1920
+ "step": 96000
1921
+ },
1922
+ {
1923
+ "epoch": 2.94,
1924
+ "eval_loss": 0.78525710105896,
1925
+ "eval_runtime": 0.5373,
1926
+ "eval_samples_per_second": 1861.031,
1927
+ "eval_steps_per_second": 29.776,
1928
+ "step": 96000
1929
+ },
1930
+ {
1931
+ "epoch": 2.96,
1932
+ "learning_rate": 0.00028408692380813775,
1933
+ "loss": 0.3768,
1934
+ "step": 96500
1935
+ },
1936
+ {
1937
+ "epoch": 2.97,
1938
+ "learning_rate": 0.00028386782095265247,
1939
+ "loss": 0.3769,
1940
+ "step": 97000
1941
+ },
1942
+ {
1943
+ "epoch": 2.97,
1944
+ "eval_loss": 0.7837897539138794,
1945
+ "eval_runtime": 0.5288,
1946
+ "eval_samples_per_second": 1891.025,
1947
+ "eval_steps_per_second": 30.256,
1948
+ "step": 97000
1949
+ },
1950
+ {
1951
+ "epoch": 2.99,
1952
+ "learning_rate": 0.0002836473088193237,
1953
+ "loss": 0.3761,
1954
+ "step": 97500
1955
+ },
1956
+ {
1957
+ "epoch": 3.0,
1958
+ "learning_rate": 0.00028342538981963677,
1959
+ "loss": 0.3797,
1960
+ "step": 98000
1961
+ },
1962
+ {
1963
+ "epoch": 3.0,
1964
+ "eval_loss": 0.7850324511528015,
1965
+ "eval_runtime": 0.5376,
1966
+ "eval_samples_per_second": 1860.245,
1967
+ "eval_steps_per_second": 29.764,
1968
+ "step": 98000
1969
+ },
1970
+ {
1971
+ "epoch": 3.02,
1972
+ "learning_rate": 0.0002832020663804624,
1973
+ "loss": 0.3752,
1974
+ "step": 98500
1975
+ },
1976
+ {
1977
+ "epoch": 3.03,
1978
+ "learning_rate": 0.00028297734094402986,
1979
+ "loss": 0.3747,
1980
+ "step": 99000
1981
+ },
1982
+ {
1983
+ "epoch": 3.03,
1984
+ "eval_loss": 0.780020534992218,
1985
+ "eval_runtime": 0.5281,
1986
+ "eval_samples_per_second": 1893.464,
1987
+ "eval_steps_per_second": 30.295,
1988
+ "step": 99000
1989
+ },
1990
+ {
1991
+ "epoch": 3.05,
1992
+ "learning_rate": 0.0002827512159679005,
1993
+ "loss": 0.375,
1994
+ "step": 99500
1995
+ },
1996
+ {
1997
+ "epoch": 3.06,
1998
+ "learning_rate": 0.00028252369392494086,
1999
+ "loss": 0.3746,
2000
+ "step": 100000
2001
+ },
2002
+ {
2003
+ "epoch": 3.06,
2004
+ "eval_loss": 0.7855916619300842,
2005
+ "eval_runtime": 0.5271,
2006
+ "eval_samples_per_second": 1897.09,
2007
+ "eval_steps_per_second": 30.353,
2008
+ "step": 100000
2009
  }
2010
  ],
2011
  "max_steps": 500000,
2012
  "num_train_epochs": 16,
2013
+ "total_flos": 3.1948474238890725e+21,
2014
  "trial_name": null,
2015
  "trial_params": null
2016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a64deb57f36a27064cc4d0b280f90a72b59990f15860b6537f49879bcfb34aa6
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61dde823420dc7042c5bb2d699d112697bdd3010f540547619caa8b07f20d030
3
  size 102501541