Training in progress, step 320000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:feec96829044f69c5632c270eedd009034bbe0cf717e555992fa10aeea97b864
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d15795c67ef8a474ec8269e65cb5cae595586b0dd6a18f0656a533551a730789
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5673377a057c7734bd1a0ee14d972f6f3bfc67bb8208ac49ae618347d18d616b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -6206,11 +6206,211 @@
|
|
6206 |
"eval_samples_per_second": 786.914,
|
6207 |
"eval_steps_per_second": 12.591,
|
6208 |
"step": 310000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6209 |
}
|
6210 |
],
|
6211 |
"max_steps": 500000,
|
6212 |
"num_train_epochs": 13,
|
6213 |
-
"total_flos":
|
6214 |
"trial_name": null,
|
6215 |
"trial_params": null
|
6216 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.15535960038738,
|
5 |
+
"global_step": 320000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
6206 |
"eval_samples_per_second": 786.914,
|
6207 |
"eval_steps_per_second": 12.591,
|
6208 |
"step": 310000
|
6209 |
+
},
|
6210 |
+
{
|
6211 |
+
"epoch": 7.91,
|
6212 |
+
"learning_rate": 0.00010973674410951567,
|
6213 |
+
"loss": 0.281,
|
6214 |
+
"step": 310500
|
6215 |
+
},
|
6216 |
+
{
|
6217 |
+
"epoch": 7.93,
|
6218 |
+
"learning_rate": 0.00010928144739511337,
|
6219 |
+
"loss": 0.281,
|
6220 |
+
"step": 311000
|
6221 |
+
},
|
6222 |
+
{
|
6223 |
+
"epoch": 7.93,
|
6224 |
+
"eval_loss": 0.806954562664032,
|
6225 |
+
"eval_runtime": 1.274,
|
6226 |
+
"eval_samples_per_second": 784.956,
|
6227 |
+
"eval_steps_per_second": 12.559,
|
6228 |
+
"step": 311000
|
6229 |
+
},
|
6230 |
+
{
|
6231 |
+
"epoch": 7.94,
|
6232 |
+
"learning_rate": 0.00010882665065147757,
|
6233 |
+
"loss": 0.2805,
|
6234 |
+
"step": 311500
|
6235 |
+
},
|
6236 |
+
{
|
6237 |
+
"epoch": 7.95,
|
6238 |
+
"learning_rate": 0.00010837235885219267,
|
6239 |
+
"loss": 0.2807,
|
6240 |
+
"step": 312000
|
6241 |
+
},
|
6242 |
+
{
|
6243 |
+
"epoch": 7.95,
|
6244 |
+
"eval_loss": 0.8041293621063232,
|
6245 |
+
"eval_runtime": 1.273,
|
6246 |
+
"eval_samples_per_second": 785.527,
|
6247 |
+
"eval_steps_per_second": 12.568,
|
6248 |
+
"step": 312000
|
6249 |
+
},
|
6250 |
+
{
|
6251 |
+
"epoch": 7.96,
|
6252 |
+
"learning_rate": 0.00010791857696532089,
|
6253 |
+
"loss": 0.2808,
|
6254 |
+
"step": 312500
|
6255 |
+
},
|
6256 |
+
{
|
6257 |
+
"epoch": 7.98,
|
6258 |
+
"learning_rate": 0.00010746530995334832,
|
6259 |
+
"loss": 0.2811,
|
6260 |
+
"step": 313000
|
6261 |
+
},
|
6262 |
+
{
|
6263 |
+
"epoch": 7.98,
|
6264 |
+
"eval_loss": 0.8100136518478394,
|
6265 |
+
"eval_runtime": 1.2743,
|
6266 |
+
"eval_samples_per_second": 784.749,
|
6267 |
+
"eval_steps_per_second": 12.556,
|
6268 |
+
"step": 313000
|
6269 |
+
},
|
6270 |
+
{
|
6271 |
+
"epoch": 7.99,
|
6272 |
+
"learning_rate": 0.0001070125627731304,
|
6273 |
+
"loss": 0.2809,
|
6274 |
+
"step": 313500
|
6275 |
+
},
|
6276 |
+
{
|
6277 |
+
"epoch": 8.0,
|
6278 |
+
"learning_rate": 0.0001065603403758377,
|
6279 |
+
"loss": 0.2821,
|
6280 |
+
"step": 314000
|
6281 |
+
},
|
6282 |
+
{
|
6283 |
+
"epoch": 8.0,
|
6284 |
+
"eval_loss": 0.828373908996582,
|
6285 |
+
"eval_runtime": 1.2715,
|
6286 |
+
"eval_samples_per_second": 786.498,
|
6287 |
+
"eval_steps_per_second": 12.584,
|
6288 |
+
"step": 314000
|
6289 |
+
},
|
6290 |
+
{
|
6291 |
+
"epoch": 8.02,
|
6292 |
+
"learning_rate": 0.00010610864770690196,
|
6293 |
+
"loss": 0.2806,
|
6294 |
+
"step": 314500
|
6295 |
+
},
|
6296 |
+
{
|
6297 |
+
"epoch": 8.03,
|
6298 |
+
"learning_rate": 0.00010565748970596172,
|
6299 |
+
"loss": 0.2808,
|
6300 |
+
"step": 315000
|
6301 |
+
},
|
6302 |
+
{
|
6303 |
+
"epoch": 8.03,
|
6304 |
+
"eval_loss": 0.8072633743286133,
|
6305 |
+
"eval_runtime": 1.2547,
|
6306 |
+
"eval_samples_per_second": 796.999,
|
6307 |
+
"eval_steps_per_second": 12.752,
|
6308 |
+
"step": 315000
|
6309 |
+
},
|
6310 |
+
{
|
6311 |
+
"epoch": 8.04,
|
6312 |
+
"learning_rate": 0.00010520687130680884,
|
6313 |
+
"loss": 0.2809,
|
6314 |
+
"step": 315500
|
6315 |
+
},
|
6316 |
+
{
|
6317 |
+
"epoch": 8.05,
|
6318 |
+
"learning_rate": 0.00010475679743733364,
|
6319 |
+
"loss": 0.2805,
|
6320 |
+
"step": 316000
|
6321 |
+
},
|
6322 |
+
{
|
6323 |
+
"epoch": 8.05,
|
6324 |
+
"eval_loss": 0.8141467571258545,
|
6325 |
+
"eval_runtime": 1.2856,
|
6326 |
+
"eval_samples_per_second": 777.823,
|
6327 |
+
"eval_steps_per_second": 12.445,
|
6328 |
+
"step": 316000
|
6329 |
+
},
|
6330 |
+
{
|
6331 |
+
"epoch": 8.07,
|
6332 |
+
"learning_rate": 0.00010430727301947202,
|
6333 |
+
"loss": 0.2804,
|
6334 |
+
"step": 316500
|
6335 |
+
},
|
6336 |
+
{
|
6337 |
+
"epoch": 8.08,
|
6338 |
+
"learning_rate": 0.00010385830296915104,
|
6339 |
+
"loss": 0.2801,
|
6340 |
+
"step": 317000
|
6341 |
+
},
|
6342 |
+
{
|
6343 |
+
"epoch": 8.08,
|
6344 |
+
"eval_loss": 0.8066883683204651,
|
6345 |
+
"eval_runtime": 1.2502,
|
6346 |
+
"eval_samples_per_second": 799.87,
|
6347 |
+
"eval_steps_per_second": 12.798,
|
6348 |
+
"step": 317000
|
6349 |
+
},
|
6350 |
+
{
|
6351 |
+
"epoch": 8.09,
|
6352 |
+
"learning_rate": 0.00010340989219623508,
|
6353 |
+
"loss": 0.2803,
|
6354 |
+
"step": 317500
|
6355 |
+
},
|
6356 |
+
{
|
6357 |
+
"epoch": 8.1,
|
6358 |
+
"learning_rate": 0.0001029620456044727,
|
6359 |
+
"loss": 0.28,
|
6360 |
+
"step": 318000
|
6361 |
+
},
|
6362 |
+
{
|
6363 |
+
"epoch": 8.1,
|
6364 |
+
"eval_loss": 0.8122532963752747,
|
6365 |
+
"eval_runtime": 1.2711,
|
6366 |
+
"eval_samples_per_second": 786.71,
|
6367 |
+
"eval_steps_per_second": 12.587,
|
6368 |
+
"step": 318000
|
6369 |
+
},
|
6370 |
+
{
|
6371 |
+
"epoch": 8.12,
|
6372 |
+
"learning_rate": 0.00010251476809144226,
|
6373 |
+
"loss": 0.2801,
|
6374 |
+
"step": 318500
|
6375 |
+
},
|
6376 |
+
{
|
6377 |
+
"epoch": 8.13,
|
6378 |
+
"learning_rate": 0.00010206806454849917,
|
6379 |
+
"loss": 0.2802,
|
6380 |
+
"step": 319000
|
6381 |
+
},
|
6382 |
+
{
|
6383 |
+
"epoch": 8.13,
|
6384 |
+
"eval_loss": 0.8077669143676758,
|
6385 |
+
"eval_runtime": 1.2395,
|
6386 |
+
"eval_samples_per_second": 806.793,
|
6387 |
+
"eval_steps_per_second": 12.909,
|
6388 |
+
"step": 319000
|
6389 |
+
},
|
6390 |
+
{
|
6391 |
+
"epoch": 8.14,
|
6392 |
+
"learning_rate": 0.00010162193986072167,
|
6393 |
+
"loss": 0.2805,
|
6394 |
+
"step": 319500
|
6395 |
+
},
|
6396 |
+
{
|
6397 |
+
"epoch": 8.16,
|
6398 |
+
"learning_rate": 0.00010117639890685795,
|
6399 |
+
"loss": 0.2799,
|
6400 |
+
"step": 320000
|
6401 |
+
},
|
6402 |
+
{
|
6403 |
+
"epoch": 8.16,
|
6404 |
+
"eval_loss": 0.8211207389831543,
|
6405 |
+
"eval_runtime": 1.2456,
|
6406 |
+
"eval_samples_per_second": 802.848,
|
6407 |
+
"eval_steps_per_second": 12.846,
|
6408 |
+
"step": 320000
|
6409 |
}
|
6410 |
],
|
6411 |
"max_steps": 500000,
|
6412 |
"num_train_epochs": 13,
|
6413 |
+
"total_flos": 1.022348060947964e+22,
|
6414 |
"trial_name": null,
|
6415 |
"trial_params": null
|
6416 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d15795c67ef8a474ec8269e65cb5cae595586b0dd6a18f0656a533551a730789
|
3 |
size 102501541
|