plip commited on
Commit
84369b0
1 Parent(s): 730b110

Training in progress, step 110000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4c86fed892e5d853a0aa10f530918fd27c33343c4e8c930c0a02ed36b9c3f12
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49758088a73ffa86cfa7391b8520727f0e91c812a8d23680b3ffbf53509abe86
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1656fe19fa26589bb005c2ca1f054c8e8bb207866300c7f3b095bee470e6b55
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d6b8966d1cc83e4388837c4e32ca9b57abfb21fb0d307bbaed74f29719988a7
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed45958fcf12318fa043e135a1ac933625a86df5477ef36cf136e443a8e9059f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a623d5977f2ffdfbfa35a1493256efb0eabb662e5c36d8f15402dd8b77d8717
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7786e0d240c1817a80f936fe537093f6b0f81238abcccea2c0e618f1ac9e9438
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a36392859753540b5ff28a6690e0fb35c1157de322529d1ae210898db91ddda7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.548549875121056,
5
- "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2006,11 +2006,211 @@
2006
  "eval_samples_per_second": 800.914,
2007
  "eval_steps_per_second": 12.815,
2008
  "step": 100000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2009
  }
2010
  ],
2011
  "max_steps": 500000,
2012
  "num_train_epochs": 13,
2013
- "total_flos": 3.19484442991891e+21,
2014
  "trial_name": null,
2015
  "trial_params": null
2016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.803404862633162,
5
+ "global_step": 110000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2006
  "eval_samples_per_second": 800.914,
2007
  "eval_steps_per_second": 12.815,
2008
  "step": 100000
2009
+ },
2010
+ {
2011
+ "epoch": 2.56,
2012
+ "learning_rate": 0.0002822947773032956,
2013
+ "loss": 0.3117,
2014
+ "step": 100500
2015
+ },
2016
+ {
2017
+ "epoch": 2.57,
2018
+ "learning_rate": 0.0002820644686063602,
2019
+ "loss": 0.3112,
2020
+ "step": 101000
2021
+ },
2022
+ {
2023
+ "epoch": 2.57,
2024
+ "eval_loss": 0.8347204327583313,
2025
+ "eval_runtime": 1.2512,
2026
+ "eval_samples_per_second": 799.226,
2027
+ "eval_steps_per_second": 12.788,
2028
+ "step": 101000
2029
+ },
2030
+ {
2031
+ "epoch": 2.59,
2032
+ "learning_rate": 0.00028183277035275363,
2033
+ "loss": 0.3117,
2034
+ "step": 101500
2035
+ },
2036
+ {
2037
+ "epoch": 2.6,
2038
+ "learning_rate": 0.0002815996850762909,
2039
+ "loss": 0.3112,
2040
+ "step": 102000
2041
+ },
2042
+ {
2043
+ "epoch": 2.6,
2044
+ "eval_loss": 0.8288648724555969,
2045
+ "eval_runtime": 1.2077,
2046
+ "eval_samples_per_second": 828.045,
2047
+ "eval_steps_per_second": 13.249,
2048
+ "step": 102000
2049
+ },
2050
+ {
2051
+ "epoch": 2.61,
2052
+ "learning_rate": 0.00028136521532595515,
2053
+ "loss": 0.311,
2054
+ "step": 102500
2055
+ },
2056
+ {
2057
+ "epoch": 2.63,
2058
+ "learning_rate": 0.00028112936366587023,
2059
+ "loss": 0.3112,
2060
+ "step": 103000
2061
+ },
2062
+ {
2063
+ "epoch": 2.63,
2064
+ "eval_loss": 0.8455829620361328,
2065
+ "eval_runtime": 1.2774,
2066
+ "eval_samples_per_second": 782.835,
2067
+ "eval_steps_per_second": 12.525,
2068
+ "step": 103000
2069
+ },
2070
+ {
2071
+ "epoch": 2.64,
2072
+ "learning_rate": 0.00028089213267527184,
2073
+ "loss": 0.311,
2074
+ "step": 103500
2075
+ },
2076
+ {
2077
+ "epoch": 2.65,
2078
+ "learning_rate": 0.0002806535249484803,
2079
+ "loss": 0.3107,
2080
+ "step": 104000
2081
+ },
2082
+ {
2083
+ "epoch": 2.65,
2084
+ "eval_loss": 0.8414435982704163,
2085
+ "eval_runtime": 1.2285,
2086
+ "eval_samples_per_second": 813.99,
2087
+ "eval_steps_per_second": 13.024,
2088
+ "step": 104000
2089
+ },
2090
+ {
2091
+ "epoch": 2.66,
2092
+ "learning_rate": 0.00028041354309487135,
2093
+ "loss": 0.3108,
2094
+ "step": 104500
2095
+ },
2096
+ {
2097
+ "epoch": 2.68,
2098
+ "learning_rate": 0.0002801721897388482,
2099
+ "loss": 0.3101,
2100
+ "step": 105000
2101
+ },
2102
+ {
2103
+ "epoch": 2.68,
2104
+ "eval_loss": 0.8326617479324341,
2105
+ "eval_runtime": 1.214,
2106
+ "eval_samples_per_second": 823.726,
2107
+ "eval_steps_per_second": 13.18,
2108
+ "step": 105000
2109
+ },
2110
+ {
2111
+ "epoch": 2.69,
2112
+ "learning_rate": 0.0002799294675198124,
2113
+ "loss": 0.3102,
2114
+ "step": 105500
2115
+ },
2116
+ {
2117
+ "epoch": 2.7,
2118
+ "learning_rate": 0.00027968537909213524,
2119
+ "loss": 0.3107,
2120
+ "step": 106000
2121
+ },
2122
+ {
2123
+ "epoch": 2.7,
2124
+ "eval_loss": 0.837422251701355,
2125
+ "eval_runtime": 1.2383,
2126
+ "eval_samples_per_second": 807.572,
2127
+ "eval_steps_per_second": 12.921,
2128
+ "step": 106000
2129
+ },
2130
+ {
2131
+ "epoch": 2.71,
2132
+ "learning_rate": 0.0002794399271251287,
2133
+ "loss": 0.3102,
2134
+ "step": 106500
2135
+ },
2136
+ {
2137
+ "epoch": 2.73,
2138
+ "learning_rate": 0.0002791931143030162,
2139
+ "loss": 0.3103,
2140
+ "step": 107000
2141
+ },
2142
+ {
2143
+ "epoch": 2.73,
2144
+ "eval_loss": 0.8471120595932007,
2145
+ "eval_runtime": 1.2736,
2146
+ "eval_samples_per_second": 785.146,
2147
+ "eval_steps_per_second": 12.562,
2148
+ "step": 107000
2149
+ },
2150
+ {
2151
+ "epoch": 2.74,
2152
+ "learning_rate": 0.00027894494332490315,
2153
+ "loss": 0.3104,
2154
+ "step": 107500
2155
+ },
2156
+ {
2157
+ "epoch": 2.75,
2158
+ "learning_rate": 0.0002786954169047476,
2159
+ "loss": 0.3095,
2160
+ "step": 108000
2161
+ },
2162
+ {
2163
+ "epoch": 2.75,
2164
+ "eval_loss": 0.845231831073761,
2165
+ "eval_runtime": 1.222,
2166
+ "eval_samples_per_second": 818.338,
2167
+ "eval_steps_per_second": 13.093,
2168
+ "step": 108000
2169
+ },
2170
+ {
2171
+ "epoch": 2.77,
2172
+ "learning_rate": 0.0002784445377713306,
2173
+ "loss": 0.3093,
2174
+ "step": 108500
2175
+ },
2176
+ {
2177
+ "epoch": 2.78,
2178
+ "learning_rate": 0.0002781923086682261,
2179
+ "loss": 0.3094,
2180
+ "step": 109000
2181
+ },
2182
+ {
2183
+ "epoch": 2.78,
2184
+ "eval_loss": 0.8512564301490784,
2185
+ "eval_runtime": 1.2211,
2186
+ "eval_samples_per_second": 818.908,
2187
+ "eval_steps_per_second": 13.103,
2188
+ "step": 109000
2189
+ },
2190
+ {
2191
+ "epoch": 2.79,
2192
+ "learning_rate": 0.0002779387323537711,
2193
+ "loss": 0.3091,
2194
+ "step": 109500
2195
+ },
2196
+ {
2197
+ "epoch": 2.8,
2198
+ "learning_rate": 0.0002776838116010356,
2199
+ "loss": 0.3094,
2200
+ "step": 110000
2201
+ },
2202
+ {
2203
+ "epoch": 2.8,
2204
+ "eval_loss": 0.8348438739776611,
2205
+ "eval_runtime": 1.2126,
2206
+ "eval_samples_per_second": 824.682,
2207
+ "eval_steps_per_second": 13.195,
2208
+ "step": 110000
2209
  }
2210
  ],
2211
  "max_steps": 500000,
2212
  "num_train_epochs": 13,
2213
+ "total_flos": 3.51433156869341e+21,
2214
  "trial_name": null,
2215
  "trial_params": null
2216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1656fe19fa26589bb005c2ca1f054c8e8bb207866300c7f3b095bee470e6b55
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d6b8966d1cc83e4388837c4e32ca9b57abfb21fb0d307bbaed74f29719988a7
3
  size 102501541