Training in progress, step 80000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73045cc3d0c4c73c7a304fc419b67fc70f5890b81e163e1a8a2a9bce7006b524
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b3b0e28c0382823d55f599e6bdbd134ef629f7e5342a0597c5271470ab8336f
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52b56ac7e2d49a255e26944a75af12a85964edcdc6e46d62983b15a37947b570
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eff90b5853455d439dc2f89f06dce4dd6575b564c20e1387efdf3e7aaf00ff95
|
3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01d301e6ea6f21d7b1880ccef9bfd4fbd3e69c49a007d8e22935d9fcb510c3a5
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c442b548bbec55775013c97b41312e4ef25472d89576d14c167f1843976d866
|
3 |
+
size 14439
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3070a38ac5fbd287025149ce6d76fcde28fce103ebe6d7d53ffc82820503320f
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01a360366ed887a9dd09f52f770c090125f631416090ce14483f193c8d020d2c
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0865afaa175e5470997944056f52744d7830792f5c6d1a926cbcaa46aeddd290
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf7d552313ab0fd670a96222e4be3c0d1d9dc06059697a7fd34d8ad13955978a
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ff31836ff2c96f7fb19d95df664b507273477e3a4f87dcce611b28b7e31820b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1406,11 +1406,211 @@
|
|
1406 |
"eval_samples_per_second": 1922.99,
|
1407 |
"eval_steps_per_second": 30.768,
|
1408 |
"step": 70000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1409 |
}
|
1410 |
],
|
1411 |
"max_steps": 500000,
|
1412 |
"num_train_epochs": 16,
|
1413 |
-
"total_flos": 2.
|
1414 |
"trial_name": null,
|
1415 |
"trial_params": null
|
1416 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.451055485768559,
|
5 |
+
"global_step": 80000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1406 |
"eval_samples_per_second": 1922.99,
|
1407 |
"eval_steps_per_second": 30.768,
|
1408 |
"step": 70000
|
1409 |
+
},
|
1410 |
+
{
|
1411 |
+
"epoch": 2.16,
|
1412 |
+
"learning_rate": 0.0002934838153624519,
|
1413 |
+
"loss": 0.3962,
|
1414 |
+
"step": 70500
|
1415 |
+
},
|
1416 |
+
{
|
1417 |
+
"epoch": 2.18,
|
1418 |
+
"learning_rate": 0.00029334092796560427,
|
1419 |
+
"loss": 0.3958,
|
1420 |
+
"step": 71000
|
1421 |
+
},
|
1422 |
+
{
|
1423 |
+
"epoch": 2.18,
|
1424 |
+
"eval_loss": 0.7898754477500916,
|
1425 |
+
"eval_runtime": 0.5162,
|
1426 |
+
"eval_samples_per_second": 1937.227,
|
1427 |
+
"eval_steps_per_second": 30.996,
|
1428 |
+
"step": 71000
|
1429 |
+
},
|
1430 |
+
{
|
1431 |
+
"epoch": 2.19,
|
1432 |
+
"learning_rate": 0.0002931965276945326,
|
1433 |
+
"loss": 0.3951,
|
1434 |
+
"step": 71500
|
1435 |
+
},
|
1436 |
+
{
|
1437 |
+
"epoch": 2.21,
|
1438 |
+
"learning_rate": 0.0002930506161283751,
|
1439 |
+
"loss": 0.3947,
|
1440 |
+
"step": 72000
|
1441 |
+
},
|
1442 |
+
{
|
1443 |
+
"epoch": 2.21,
|
1444 |
+
"eval_loss": 0.7828860878944397,
|
1445 |
+
"eval_runtime": 0.5502,
|
1446 |
+
"eval_samples_per_second": 1817.401,
|
1447 |
+
"eval_steps_per_second": 29.078,
|
1448 |
+
"step": 72000
|
1449 |
+
},
|
1450 |
+
{
|
1451 |
+
"epoch": 2.22,
|
1452 |
+
"learning_rate": 0.00029290319486279724,
|
1453 |
+
"loss": 0.3944,
|
1454 |
+
"step": 72500
|
1455 |
+
},
|
1456 |
+
{
|
1457 |
+
"epoch": 2.24,
|
1458 |
+
"learning_rate": 0.0002927542655099744,
|
1459 |
+
"loss": 0.3935,
|
1460 |
+
"step": 73000
|
1461 |
+
},
|
1462 |
+
{
|
1463 |
+
"epoch": 2.24,
|
1464 |
+
"eval_loss": 0.7836081385612488,
|
1465 |
+
"eval_runtime": 0.518,
|
1466 |
+
"eval_samples_per_second": 1930.433,
|
1467 |
+
"eval_steps_per_second": 30.887,
|
1468 |
+
"step": 73000
|
1469 |
+
},
|
1470 |
+
{
|
1471 |
+
"epoch": 2.25,
|
1472 |
+
"learning_rate": 0.00029260382969857417,
|
1473 |
+
"loss": 0.3936,
|
1474 |
+
"step": 73500
|
1475 |
+
},
|
1476 |
+
{
|
1477 |
+
"epoch": 2.27,
|
1478 |
+
"learning_rate": 0.00029245188907373845,
|
1479 |
+
"loss": 0.393,
|
1480 |
+
"step": 74000
|
1481 |
+
},
|
1482 |
+
{
|
1483 |
+
"epoch": 2.27,
|
1484 |
+
"eval_loss": 0.7833809852600098,
|
1485 |
+
"eval_runtime": 0.5265,
|
1486 |
+
"eval_samples_per_second": 1899.502,
|
1487 |
+
"eval_steps_per_second": 30.392,
|
1488 |
+
"step": 74000
|
1489 |
+
},
|
1490 |
+
{
|
1491 |
+
"epoch": 2.28,
|
1492 |
+
"learning_rate": 0.0002922984452970655,
|
1493 |
+
"loss": 0.3923,
|
1494 |
+
"step": 74500
|
1495 |
+
},
|
1496 |
+
{
|
1497 |
+
"epoch": 2.3,
|
1498 |
+
"learning_rate": 0.000292143500046592,
|
1499 |
+
"loss": 0.392,
|
1500 |
+
"step": 75000
|
1501 |
+
},
|
1502 |
+
{
|
1503 |
+
"epoch": 2.3,
|
1504 |
+
"eval_loss": 0.7891106009483337,
|
1505 |
+
"eval_runtime": 0.549,
|
1506 |
+
"eval_samples_per_second": 1821.565,
|
1507 |
+
"eval_steps_per_second": 29.145,
|
1508 |
+
"step": 75000
|
1509 |
+
},
|
1510 |
+
{
|
1511 |
+
"epoch": 2.31,
|
1512 |
+
"learning_rate": 0.0002919870550167743,
|
1513 |
+
"loss": 0.3917,
|
1514 |
+
"step": 75500
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"epoch": 2.33,
|
1518 |
+
"learning_rate": 0.0002918291119184702,
|
1519 |
+
"loss": 0.3913,
|
1520 |
+
"step": 76000
|
1521 |
+
},
|
1522 |
+
{
|
1523 |
+
"epoch": 2.33,
|
1524 |
+
"eval_loss": 0.7858054041862488,
|
1525 |
+
"eval_runtime": 0.5293,
|
1526 |
+
"eval_samples_per_second": 1889.27,
|
1527 |
+
"eval_steps_per_second": 30.228,
|
1528 |
+
"step": 76000
|
1529 |
+
},
|
1530 |
+
{
|
1531 |
+
"epoch": 2.34,
|
1532 |
+
"learning_rate": 0.0002916696724789201,
|
1533 |
+
"loss": 0.3906,
|
1534 |
+
"step": 76500
|
1535 |
+
},
|
1536 |
+
{
|
1537 |
+
"epoch": 2.36,
|
1538 |
+
"learning_rate": 0.00029150873844172823,
|
1539 |
+
"loss": 0.3904,
|
1540 |
+
"step": 77000
|
1541 |
+
},
|
1542 |
+
{
|
1543 |
+
"epoch": 2.36,
|
1544 |
+
"eval_loss": 0.7869090437889099,
|
1545 |
+
"eval_runtime": 0.5095,
|
1546 |
+
"eval_samples_per_second": 1962.553,
|
1547 |
+
"eval_steps_per_second": 31.401,
|
1548 |
+
"step": 77000
|
1549 |
+
},
|
1550 |
+
{
|
1551 |
+
"epoch": 2.37,
|
1552 |
+
"learning_rate": 0.00029134631156684334,
|
1553 |
+
"loss": 0.3898,
|
1554 |
+
"step": 77500
|
1555 |
+
},
|
1556 |
+
{
|
1557 |
+
"epoch": 2.39,
|
1558 |
+
"learning_rate": 0.0002911823936305398,
|
1559 |
+
"loss": 0.3893,
|
1560 |
+
"step": 78000
|
1561 |
+
},
|
1562 |
+
{
|
1563 |
+
"epoch": 2.39,
|
1564 |
+
"eval_loss": 0.7837140560150146,
|
1565 |
+
"eval_runtime": 0.5128,
|
1566 |
+
"eval_samples_per_second": 1950.24,
|
1567 |
+
"eval_steps_per_second": 31.204,
|
1568 |
+
"step": 78000
|
1569 |
+
},
|
1570 |
+
{
|
1571 |
+
"epoch": 2.41,
|
1572 |
+
"learning_rate": 0.0002910169864253979,
|
1573 |
+
"loss": 0.3892,
|
1574 |
+
"step": 78500
|
1575 |
+
},
|
1576 |
+
{
|
1577 |
+
"epoch": 2.42,
|
1578 |
+
"learning_rate": 0.0002908500917602842,
|
1579 |
+
"loss": 0.3886,
|
1580 |
+
"step": 79000
|
1581 |
+
},
|
1582 |
+
{
|
1583 |
+
"epoch": 2.42,
|
1584 |
+
"eval_loss": 0.787075936794281,
|
1585 |
+
"eval_runtime": 0.5289,
|
1586 |
+
"eval_samples_per_second": 1890.813,
|
1587 |
+
"eval_steps_per_second": 30.253,
|
1588 |
+
"step": 79000
|
1589 |
+
},
|
1590 |
+
{
|
1591 |
+
"epoch": 2.44,
|
1592 |
+
"learning_rate": 0.00029068171146033226,
|
1593 |
+
"loss": 0.3883,
|
1594 |
+
"step": 79500
|
1595 |
+
},
|
1596 |
+
{
|
1597 |
+
"epoch": 2.45,
|
1598 |
+
"learning_rate": 0.0002905118473669218,
|
1599 |
+
"loss": 0.3877,
|
1600 |
+
"step": 80000
|
1601 |
+
},
|
1602 |
+
{
|
1603 |
+
"epoch": 2.45,
|
1604 |
+
"eval_loss": 0.7844729423522949,
|
1605 |
+
"eval_runtime": 0.512,
|
1606 |
+
"eval_samples_per_second": 1953.312,
|
1607 |
+
"eval_steps_per_second": 31.253,
|
1608 |
+
"step": 80000
|
1609 |
}
|
1610 |
],
|
1611 |
"max_steps": 500000,
|
1612 |
"num_train_epochs": 16,
|
1613 |
+
"total_flos": 2.5558811342920483e+21,
|
1614 |
"trial_name": null,
|
1615 |
"trial_params": null
|
1616 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b3b0e28c0382823d55f599e6bdbd134ef629f7e5342a0597c5271470ab8336f
|
3 |
size 102501541
|