plip commited on
Commit
c3f7238
1 Parent(s): 465e9c6

Training in progress, step 110000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9a3fc6351d09bd039d306f53b05fa03590011ea2849b43bd52529e0bd8514b9
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5077b4837e7db854bd139cb7b83c4884833063cbbcc0c135891079eb84eb5023
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61dde823420dc7042c5bb2d699d112697bdd3010f540547619caa8b07f20d030
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b862e1a9d6ed89097b4519d1b3cde5a169841069d056cef214437ef6987452e1
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:330f3cda58bd3cda7fe4052bcd097fef510e7fce1d130c443ec32e1f5486a104
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a28dddede4fc4bc0b2366a2adf3a2a7cc15aa053d48f5784818c30e8f581c8c
3
+ size 14567
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db9a8646bfa77c67b79c4b30fed86fd905de2821912180ec33d06ceff7f9e882
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9027fde6d34c0d3df23bed16cbdb4322526cc2d0a788564dc5bdbc1a2a3cc939
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcf4e8fe75afcd199d62b4b9a4e3758f2eaf0ffa4730fdb2d98d4bb191801698
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ba2bcf5cf6c5910865dc9ad1b993b583122f7246dff018513acde53be4e1cef
3
  size 14439
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80c9489f4e58dc887e83e88882e5e1382e1ee3173dcbda417ffbbdcb858a9d65
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c0ded2329ef53920cb585a63eb47585e2a859a8641b80120f9a203cf7e7c919
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf0f5ebb927172859f9f7bd6df79a9456d1222e97acfe9ef9113e346912ff663
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0b2023ad615bca290ded4b09be5cde4d541c3da29f7726a5a43e59cc6dddea5
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dae2826d240394f23ebead2538e7f8ab9151433f5d76c787e8de37065711b2b
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41b811cc0631c90302be59bb28064c0b766f48934ca956ae5177890d0b44ff21
3
+ size 14439
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d1a3d037a53cd82051029fcc53619c43fd8479867f8522cbd8ad58f9ba5a632
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ace9c134526b185b1490c715e5c10cccba9ced2c0bbb4bf14ec35ff87f84192
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c028788cc8743c6a98b2e0f67f4bc6d1434318ea95a0cdb39b9728727ca1342
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:094bb976952135775a16c6a0f91bc9d97a98726f15f9c9198b320fb5dc6d70ce
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7786e0d240c1817a80f936fe537093f6b0f81238abcccea2c0e618f1ac9e9438
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a36392859753540b5ff28a6690e0fb35c1157de322529d1ae210898db91ddda7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.063819357210699,
5
- "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2006,11 +2006,211 @@
2006
  "eval_samples_per_second": 1897.09,
2007
  "eval_steps_per_second": 30.353,
2008
  "step": 100000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2009
  }
2010
  ],
2011
  "max_steps": 500000,
2012
  "num_train_epochs": 16,
2013
- "total_flos": 3.1948474238890725e+21,
2014
  "trial_name": null,
2015
  "trial_params": null
2016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.370201292931769,
5
+ "global_step": 110000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2006
  "eval_samples_per_second": 1897.09,
2007
  "eval_steps_per_second": 30.353,
2008
  "step": 100000
2009
+ },
2010
+ {
2011
+ "epoch": 3.08,
2012
+ "learning_rate": 0.0002822947773032956,
2013
+ "loss": 0.3743,
2014
+ "step": 100500
2015
+ },
2016
+ {
2017
+ "epoch": 3.09,
2018
+ "learning_rate": 0.0002820644686063602,
2019
+ "loss": 0.3743,
2020
+ "step": 101000
2021
+ },
2022
+ {
2023
+ "epoch": 3.09,
2024
+ "eval_loss": 0.7854102253913879,
2025
+ "eval_runtime": 0.541,
2026
+ "eval_samples_per_second": 1848.349,
2027
+ "eval_steps_per_second": 29.574,
2028
+ "step": 101000
2029
+ },
2030
+ {
2031
+ "epoch": 3.11,
2032
+ "learning_rate": 0.00028183277035275363,
2033
+ "loss": 0.374,
2034
+ "step": 101500
2035
+ },
2036
+ {
2037
+ "epoch": 3.13,
2038
+ "learning_rate": 0.0002815996850762909,
2039
+ "loss": 0.374,
2040
+ "step": 102000
2041
+ },
2042
+ {
2043
+ "epoch": 3.13,
2044
+ "eval_loss": 0.7880498170852661,
2045
+ "eval_runtime": 0.5283,
2046
+ "eval_samples_per_second": 1892.948,
2047
+ "eval_steps_per_second": 30.287,
2048
+ "step": 102000
2049
+ },
2050
+ {
2051
+ "epoch": 3.14,
2052
+ "learning_rate": 0.00028136521532595515,
2053
+ "loss": 0.3737,
2054
+ "step": 102500
2055
+ },
2056
+ {
2057
+ "epoch": 3.16,
2058
+ "learning_rate": 0.00028112936366587023,
2059
+ "loss": 0.3734,
2060
+ "step": 103000
2061
+ },
2062
+ {
2063
+ "epoch": 3.16,
2064
+ "eval_loss": 0.7864383459091187,
2065
+ "eval_runtime": 0.5228,
2066
+ "eval_samples_per_second": 1912.628,
2067
+ "eval_steps_per_second": 30.602,
2068
+ "step": 103000
2069
+ },
2070
+ {
2071
+ "epoch": 3.17,
2072
+ "learning_rate": 0.00028089213267527184,
2073
+ "loss": 0.3733,
2074
+ "step": 103500
2075
+ },
2076
+ {
2077
+ "epoch": 3.19,
2078
+ "learning_rate": 0.0002806535249484803,
2079
+ "loss": 0.3731,
2080
+ "step": 104000
2081
+ },
2082
+ {
2083
+ "epoch": 3.19,
2084
+ "eval_loss": 0.783866822719574,
2085
+ "eval_runtime": 0.5321,
2086
+ "eval_samples_per_second": 1879.482,
2087
+ "eval_steps_per_second": 30.072,
2088
+ "step": 104000
2089
+ },
2090
+ {
2091
+ "epoch": 3.2,
2092
+ "learning_rate": 0.00028041354309487135,
2093
+ "loss": 0.3728,
2094
+ "step": 104500
2095
+ },
2096
+ {
2097
+ "epoch": 3.22,
2098
+ "learning_rate": 0.0002801721897388482,
2099
+ "loss": 0.3723,
2100
+ "step": 105000
2101
+ },
2102
+ {
2103
+ "epoch": 3.22,
2104
+ "eval_loss": 0.784348726272583,
2105
+ "eval_runtime": 0.5158,
2106
+ "eval_samples_per_second": 1938.703,
2107
+ "eval_steps_per_second": 31.019,
2108
+ "step": 105000
2109
+ },
2110
+ {
2111
+ "epoch": 3.23,
2112
+ "learning_rate": 0.0002799294675198124,
2113
+ "loss": 0.3723,
2114
+ "step": 105500
2115
+ },
2116
+ {
2117
+ "epoch": 3.25,
2118
+ "learning_rate": 0.00027968537909213524,
2119
+ "loss": 0.372,
2120
+ "step": 106000
2121
+ },
2122
+ {
2123
+ "epoch": 3.25,
2124
+ "eval_loss": 0.7889582514762878,
2125
+ "eval_runtime": 0.5199,
2126
+ "eval_samples_per_second": 1923.296,
2127
+ "eval_steps_per_second": 30.773,
2128
+ "step": 106000
2129
+ },
2130
+ {
2131
+ "epoch": 3.26,
2132
+ "learning_rate": 0.0002794399271251287,
2133
+ "loss": 0.3721,
2134
+ "step": 106500
2135
+ },
2136
+ {
2137
+ "epoch": 3.28,
2138
+ "learning_rate": 0.0002791931143030162,
2139
+ "loss": 0.3714,
2140
+ "step": 107000
2141
+ },
2142
+ {
2143
+ "epoch": 3.28,
2144
+ "eval_loss": 0.7859266400337219,
2145
+ "eval_runtime": 0.5189,
2146
+ "eval_samples_per_second": 1926.987,
2147
+ "eval_steps_per_second": 30.832,
2148
+ "step": 107000
2149
+ },
2150
+ {
2151
+ "epoch": 3.29,
2152
+ "learning_rate": 0.00027894494332490315,
2153
+ "loss": 0.3715,
2154
+ "step": 107500
2155
+ },
2156
+ {
2157
+ "epoch": 3.31,
2158
+ "learning_rate": 0.0002786954169047476,
2159
+ "loss": 0.3711,
2160
+ "step": 108000
2161
+ },
2162
+ {
2163
+ "epoch": 3.31,
2164
+ "eval_loss": 0.7798612117767334,
2165
+ "eval_runtime": 0.5242,
2166
+ "eval_samples_per_second": 1907.581,
2167
+ "eval_steps_per_second": 30.521,
2168
+ "step": 108000
2169
+ },
2170
+ {
2171
+ "epoch": 3.32,
2172
+ "learning_rate": 0.0002784445377713306,
2173
+ "loss": 0.3711,
2174
+ "step": 108500
2175
+ },
2176
+ {
2177
+ "epoch": 3.34,
2178
+ "learning_rate": 0.0002781923086682261,
2179
+ "loss": 0.371,
2180
+ "step": 109000
2181
+ },
2182
+ {
2183
+ "epoch": 3.34,
2184
+ "eval_loss": 0.7840728759765625,
2185
+ "eval_runtime": 0.5137,
2186
+ "eval_samples_per_second": 1946.758,
2187
+ "eval_steps_per_second": 31.148,
2188
+ "step": 109000
2189
+ },
2190
+ {
2191
+ "epoch": 3.35,
2192
+ "learning_rate": 0.0002779387323537711,
2193
+ "loss": 0.3704,
2194
+ "step": 109500
2195
+ },
2196
+ {
2197
+ "epoch": 3.37,
2198
+ "learning_rate": 0.0002776838116010356,
2199
+ "loss": 0.3705,
2200
+ "step": 110000
2201
+ },
2202
+ {
2203
+ "epoch": 3.37,
2204
+ "eval_loss": 0.7811622023582458,
2205
+ "eval_runtime": 0.5403,
2206
+ "eval_samples_per_second": 1850.736,
2207
+ "eval_steps_per_second": 29.612,
2208
+ "step": 110000
2209
  }
2210
  ],
2211
  "max_steps": 500000,
2212
  "num_train_epochs": 16,
2213
+ "total_flos": 3.5143345626635724e+21,
2214
  "trial_name": null,
2215
  "trial_params": null
2216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61dde823420dc7042c5bb2d699d112697bdd3010f540547619caa8b07f20d030
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b862e1a9d6ed89097b4519d1b3cde5a169841069d056cef214437ef6987452e1
3
  size 102501541