Training in progress, step 320000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/rng_state_6.pth +2 -2
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:229a42ebe682c3ef3fa77824f414f8052ce22269902d2cf833bbceae01b4ee94
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2234ce12404bc4801fab809254f08127bd71fca09a8dab3e0c720a225f0006ed
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b54a8c5749446bd4a65592cc408c92cd1c1a63789b632ec709bae613de880e8
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e12e956d2c1594d69772425e394d5c7340f5558535a744e143a62985c9f6b3a
|
3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5034bb9708a34c35b3368c1e0fff63513e2cb5f1c0dd56fffa0328312b7e4831
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e06a3dca10a2bccff3cb0c6a7b393b12b0f08503dc63d7b7533eeb15ed495c6
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a94f09290bc683f43d0869ce2fa5f9751184b5e70371828d250a3714d35fe40
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4915e195da15bfd64d34239234d248cd0ab1ad7df671f2845974753597da8bc3
|
3 |
+
size 14439
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:057d8e4139ad7708d7871dd8361365fbd9951b2ae3daf5aded867e56c2fe457c
|
3 |
+
size 14439
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7115cc7cbcc32a343bb9b4e7b15f1fa12bd3bb61d63d5248eaa0a65935d6e80c
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5673377a057c7734bd1a0ee14d972f6f3bfc67bb8208ac49ae618347d18d616b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 9.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -6206,11 +6206,211 @@
|
|
6206 |
"eval_samples_per_second": 1914.938,
|
6207 |
"eval_steps_per_second": 30.639,
|
6208 |
"step": 310000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6209 |
}
|
6210 |
],
|
6211 |
"max_steps": 500000,
|
6212 |
"num_train_epochs": 16,
|
6213 |
-
"total_flos":
|
6214 |
"trial_name": null,
|
6215 |
"trial_params": null
|
6216 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.804221943074236,
|
5 |
+
"global_step": 320000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
6206 |
"eval_samples_per_second": 1914.938,
|
6207 |
"eval_steps_per_second": 30.639,
|
6208 |
"step": 310000
|
6209 |
+
},
|
6210 |
+
{
|
6211 |
+
"epoch": 9.51,
|
6212 |
+
"learning_rate": 0.00010973674410951567,
|
6213 |
+
"loss": 0.3293,
|
6214 |
+
"step": 310500
|
6215 |
+
},
|
6216 |
+
{
|
6217 |
+
"epoch": 9.53,
|
6218 |
+
"learning_rate": 0.00010928144739511337,
|
6219 |
+
"loss": 0.329,
|
6220 |
+
"step": 311000
|
6221 |
+
},
|
6222 |
+
{
|
6223 |
+
"epoch": 9.53,
|
6224 |
+
"eval_loss": 0.776207685470581,
|
6225 |
+
"eval_runtime": 0.5118,
|
6226 |
+
"eval_samples_per_second": 1953.912,
|
6227 |
+
"eval_steps_per_second": 31.263,
|
6228 |
+
"step": 311000
|
6229 |
+
},
|
6230 |
+
{
|
6231 |
+
"epoch": 9.54,
|
6232 |
+
"learning_rate": 0.00010882665065147757,
|
6233 |
+
"loss": 0.3287,
|
6234 |
+
"step": 311500
|
6235 |
+
},
|
6236 |
+
{
|
6237 |
+
"epoch": 9.56,
|
6238 |
+
"learning_rate": 0.00010837235885219267,
|
6239 |
+
"loss": 0.3286,
|
6240 |
+
"step": 312000
|
6241 |
+
},
|
6242 |
+
{
|
6243 |
+
"epoch": 9.56,
|
6244 |
+
"eval_loss": 0.7779992818832397,
|
6245 |
+
"eval_runtime": 0.5097,
|
6246 |
+
"eval_samples_per_second": 1962.124,
|
6247 |
+
"eval_steps_per_second": 31.394,
|
6248 |
+
"step": 312000
|
6249 |
+
},
|
6250 |
+
{
|
6251 |
+
"epoch": 9.57,
|
6252 |
+
"learning_rate": 0.00010791857696532089,
|
6253 |
+
"loss": 0.3287,
|
6254 |
+
"step": 312500
|
6255 |
+
},
|
6256 |
+
{
|
6257 |
+
"epoch": 9.59,
|
6258 |
+
"learning_rate": 0.00010746530995334832,
|
6259 |
+
"loss": 0.3285,
|
6260 |
+
"step": 313000
|
6261 |
+
},
|
6262 |
+
{
|
6263 |
+
"epoch": 9.59,
|
6264 |
+
"eval_loss": 0.7776817679405212,
|
6265 |
+
"eval_runtime": 0.5012,
|
6266 |
+
"eval_samples_per_second": 1995.205,
|
6267 |
+
"eval_steps_per_second": 31.923,
|
6268 |
+
"step": 313000
|
6269 |
+
},
|
6270 |
+
{
|
6271 |
+
"epoch": 9.61,
|
6272 |
+
"learning_rate": 0.0001070125627731304,
|
6273 |
+
"loss": 0.3285,
|
6274 |
+
"step": 313500
|
6275 |
+
},
|
6276 |
+
{
|
6277 |
+
"epoch": 9.62,
|
6278 |
+
"learning_rate": 0.0001065603403758377,
|
6279 |
+
"loss": 0.3288,
|
6280 |
+
"step": 314000
|
6281 |
+
},
|
6282 |
+
{
|
6283 |
+
"epoch": 9.62,
|
6284 |
+
"eval_loss": 0.7795534133911133,
|
6285 |
+
"eval_runtime": 0.523,
|
6286 |
+
"eval_samples_per_second": 1912.141,
|
6287 |
+
"eval_steps_per_second": 30.594,
|
6288 |
+
"step": 314000
|
6289 |
+
},
|
6290 |
+
{
|
6291 |
+
"epoch": 9.64,
|
6292 |
+
"learning_rate": 0.00010610864770690196,
|
6293 |
+
"loss": 0.3285,
|
6294 |
+
"step": 314500
|
6295 |
+
},
|
6296 |
+
{
|
6297 |
+
"epoch": 9.65,
|
6298 |
+
"learning_rate": 0.00010565748970596172,
|
6299 |
+
"loss": 0.3281,
|
6300 |
+
"step": 315000
|
6301 |
+
},
|
6302 |
+
{
|
6303 |
+
"epoch": 9.65,
|
6304 |
+
"eval_loss": 0.7744332551956177,
|
6305 |
+
"eval_runtime": 0.5161,
|
6306 |
+
"eval_samples_per_second": 1937.422,
|
6307 |
+
"eval_steps_per_second": 30.999,
|
6308 |
+
"step": 315000
|
6309 |
+
},
|
6310 |
+
{
|
6311 |
+
"epoch": 9.67,
|
6312 |
+
"learning_rate": 0.00010520687130680884,
|
6313 |
+
"loss": 0.3279,
|
6314 |
+
"step": 315500
|
6315 |
+
},
|
6316 |
+
{
|
6317 |
+
"epoch": 9.68,
|
6318 |
+
"learning_rate": 0.00010475679743733364,
|
6319 |
+
"loss": 0.3284,
|
6320 |
+
"step": 316000
|
6321 |
+
},
|
6322 |
+
{
|
6323 |
+
"epoch": 9.68,
|
6324 |
+
"eval_loss": 0.7782894968986511,
|
6325 |
+
"eval_runtime": 0.5278,
|
6326 |
+
"eval_samples_per_second": 1894.746,
|
6327 |
+
"eval_steps_per_second": 30.316,
|
6328 |
+
"step": 316000
|
6329 |
+
},
|
6330 |
+
{
|
6331 |
+
"epoch": 9.7,
|
6332 |
+
"learning_rate": 0.00010430727301947202,
|
6333 |
+
"loss": 0.3282,
|
6334 |
+
"step": 316500
|
6335 |
+
},
|
6336 |
+
{
|
6337 |
+
"epoch": 9.71,
|
6338 |
+
"learning_rate": 0.00010385830296915104,
|
6339 |
+
"loss": 0.328,
|
6340 |
+
"step": 317000
|
6341 |
+
},
|
6342 |
+
{
|
6343 |
+
"epoch": 9.71,
|
6344 |
+
"eval_loss": 0.7791895866394043,
|
6345 |
+
"eval_runtime": 0.4908,
|
6346 |
+
"eval_samples_per_second": 2037.475,
|
6347 |
+
"eval_steps_per_second": 32.6,
|
6348 |
+
"step": 317000
|
6349 |
+
},
|
6350 |
+
{
|
6351 |
+
"epoch": 9.73,
|
6352 |
+
"learning_rate": 0.00010340989219623508,
|
6353 |
+
"loss": 0.328,
|
6354 |
+
"step": 317500
|
6355 |
+
},
|
6356 |
+
{
|
6357 |
+
"epoch": 9.74,
|
6358 |
+
"learning_rate": 0.0001029620456044727,
|
6359 |
+
"loss": 0.3278,
|
6360 |
+
"step": 318000
|
6361 |
+
},
|
6362 |
+
{
|
6363 |
+
"epoch": 9.74,
|
6364 |
+
"eval_loss": 0.7816545367240906,
|
6365 |
+
"eval_runtime": 0.5081,
|
6366 |
+
"eval_samples_per_second": 1968.272,
|
6367 |
+
"eval_steps_per_second": 31.492,
|
6368 |
+
"step": 318000
|
6369 |
+
},
|
6370 |
+
{
|
6371 |
+
"epoch": 9.76,
|
6372 |
+
"learning_rate": 0.00010251476809144226,
|
6373 |
+
"loss": 0.3279,
|
6374 |
+
"step": 318500
|
6375 |
+
},
|
6376 |
+
{
|
6377 |
+
"epoch": 9.77,
|
6378 |
+
"learning_rate": 0.00010206806454849917,
|
6379 |
+
"loss": 0.3276,
|
6380 |
+
"step": 319000
|
6381 |
+
},
|
6382 |
+
{
|
6383 |
+
"epoch": 9.77,
|
6384 |
+
"eval_loss": 0.7802248597145081,
|
6385 |
+
"eval_runtime": 0.4946,
|
6386 |
+
"eval_samples_per_second": 2021.73,
|
6387 |
+
"eval_steps_per_second": 32.348,
|
6388 |
+
"step": 319000
|
6389 |
+
},
|
6390 |
+
{
|
6391 |
+
"epoch": 9.79,
|
6392 |
+
"learning_rate": 0.00010162193986072167,
|
6393 |
+
"loss": 0.3272,
|
6394 |
+
"step": 319500
|
6395 |
+
},
|
6396 |
+
{
|
6397 |
+
"epoch": 9.8,
|
6398 |
+
"learning_rate": 0.00010117639890685795,
|
6399 |
+
"loss": 0.3273,
|
6400 |
+
"step": 320000
|
6401 |
+
},
|
6402 |
+
{
|
6403 |
+
"epoch": 9.8,
|
6404 |
+
"eval_loss": 0.7808557152748108,
|
6405 |
+
"eval_runtime": 0.499,
|
6406 |
+
"eval_samples_per_second": 2004.192,
|
6407 |
+
"eval_steps_per_second": 32.067,
|
6408 |
+
"step": 320000
|
6409 |
}
|
6410 |
],
|
6411 |
"max_steps": 500000,
|
6412 |
"num_train_epochs": 16,
|
6413 |
+
"total_flos": 1.0223516549216217e+22,
|
6414 |
"trial_name": null,
|
6415 |
"trial_params": null
|
6416 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2234ce12404bc4801fab809254f08127bd71fca09a8dab3e0c720a225f0006ed
|
3 |
size 102501541
|