Training in progress, step 490000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/rng_state_6.pth +2 -2
- last-checkpoint/rng_state_7.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2e2116038f17a755ee0cd7f714c11f53fb07e5f62178c545ddce403c568ffbe
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6673bee6889f96f4d0585736b0bbd0104ce06075881649e694ae573ad1d2887a
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:644e2fc163f368345d150d3fc83a57447c36fc56a8c5b1ac9505e0d54bf78bd0
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a9725305d551fdabba33dd56f81db3b2c581f84aafc700effd4e6475b7ec812
|
3 |
+
size 14439
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbe151312b54baa9a4e64116f572138b7b71dec85430cd296cb3640e4c2da8a4
|
3 |
+
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:368b6ac98a14548e894c6dec52e2926741be3331b9c05acbb8210a7533733def
|
3 |
+
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb22d9d447646758c04766cf11ca8c2e8fc19a36cd697a9779fc029a142baac3
|
3 |
+
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:978268c9f7cbaa0bf6afcfb4a36b2945ad3b7df5c70456c1598647d77f700d81
|
3 |
+
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea73efb8fdbffbda3c38be20fe382b5f7cbbd8a80c55ee21bf5ba148273fa6a6
|
3 |
+
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:363c6a12aae6b2a1c1924495d50178e5b2b9c07c6657605723562b286ea4de81
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d798ff13d72fe751bc0ea721c37eb1e98064dde5819b90f3504db53fdceee97
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -9606,11 +9606,211 @@
|
|
9606 |
"eval_samples_per_second": 1946.296,
|
9607 |
"eval_steps_per_second": 31.141,
|
9608 |
"step": 480000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9609 |
}
|
9610 |
],
|
9611 |
"max_steps": 500000,
|
9612 |
"num_train_epochs": 16,
|
9613 |
-
"total_flos": 1.
|
9614 |
"trial_name": null,
|
9615 |
"trial_params": null
|
9616 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 15.012714850332424,
|
5 |
+
"global_step": 490000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
9606 |
"eval_samples_per_second": 1946.296,
|
9607 |
"eval_steps_per_second": 31.141,
|
9608 |
"step": 480000
|
9609 |
+
},
|
9610 |
+
{
|
9611 |
+
"epoch": 14.72,
|
9612 |
+
"learning_rate": 1.1204252864868377e-05,
|
9613 |
+
"loss": 0.3127,
|
9614 |
+
"step": 480500
|
9615 |
+
},
|
9616 |
+
{
|
9617 |
+
"epoch": 14.74,
|
9618 |
+
"learning_rate": 1.1143368309400725e-05,
|
9619 |
+
"loss": 0.3125,
|
9620 |
+
"step": 481000
|
9621 |
+
},
|
9622 |
+
{
|
9623 |
+
"epoch": 14.74,
|
9624 |
+
"eval_loss": 0.7771226763725281,
|
9625 |
+
"eval_runtime": 0.5065,
|
9626 |
+
"eval_samples_per_second": 1974.24,
|
9627 |
+
"eval_steps_per_second": 31.588,
|
9628 |
+
"step": 481000
|
9629 |
+
},
|
9630 |
+
{
|
9631 |
+
"epoch": 14.75,
|
9632 |
+
"learning_rate": 1.1084056947009348e-05,
|
9633 |
+
"loss": 0.3125,
|
9634 |
+
"step": 481500
|
9635 |
+
},
|
9636 |
+
{
|
9637 |
+
"epoch": 14.77,
|
9638 |
+
"learning_rate": 1.1026319426313837e-05,
|
9639 |
+
"loss": 0.3124,
|
9640 |
+
"step": 482000
|
9641 |
+
},
|
9642 |
+
{
|
9643 |
+
"epoch": 14.77,
|
9644 |
+
"eval_loss": 0.7745999693870544,
|
9645 |
+
"eval_runtime": 0.4972,
|
9646 |
+
"eval_samples_per_second": 2011.082,
|
9647 |
+
"eval_steps_per_second": 32.177,
|
9648 |
+
"step": 482000
|
9649 |
+
},
|
9650 |
+
{
|
9651 |
+
"epoch": 14.78,
|
9652 |
+
"learning_rate": 1.097015637872247e-05,
|
9653 |
+
"loss": 0.3126,
|
9654 |
+
"step": 482500
|
9655 |
+
},
|
9656 |
+
{
|
9657 |
+
"epoch": 14.8,
|
9658 |
+
"learning_rate": 1.0915568418425301e-05,
|
9659 |
+
"loss": 0.3125,
|
9660 |
+
"step": 483000
|
9661 |
+
},
|
9662 |
+
{
|
9663 |
+
"epoch": 14.8,
|
9664 |
+
"eval_loss": 0.7761328220367432,
|
9665 |
+
"eval_runtime": 0.5155,
|
9666 |
+
"eval_samples_per_second": 1939.967,
|
9667 |
+
"eval_steps_per_second": 31.039,
|
9668 |
+
"step": 483000
|
9669 |
+
},
|
9670 |
+
{
|
9671 |
+
"epoch": 14.81,
|
9672 |
+
"learning_rate": 1.0862556142387571e-05,
|
9673 |
+
"loss": 0.3126,
|
9674 |
+
"step": 483500
|
9675 |
+
},
|
9676 |
+
{
|
9677 |
+
"epoch": 14.83,
|
9678 |
+
"learning_rate": 1.081112013034298e-05,
|
9679 |
+
"loss": 0.3127,
|
9680 |
+
"step": 484000
|
9681 |
+
},
|
9682 |
+
{
|
9683 |
+
"epoch": 14.83,
|
9684 |
+
"eval_loss": 0.7775337100028992,
|
9685 |
+
"eval_runtime": 0.5093,
|
9686 |
+
"eval_samples_per_second": 1963.507,
|
9687 |
+
"eval_steps_per_second": 31.416,
|
9688 |
+
"step": 484000
|
9689 |
+
},
|
9690 |
+
{
|
9691 |
+
"epoch": 14.84,
|
9692 |
+
"learning_rate": 1.0761260944787561e-05,
|
9693 |
+
"loss": 0.313,
|
9694 |
+
"step": 484500
|
9695 |
+
},
|
9696 |
+
{
|
9697 |
+
"epoch": 14.86,
|
9698 |
+
"learning_rate": 1.0712979130973347e-05,
|
9699 |
+
"loss": 0.3126,
|
9700 |
+
"step": 485000
|
9701 |
+
},
|
9702 |
+
{
|
9703 |
+
"epoch": 14.86,
|
9704 |
+
"eval_loss": 0.7740907073020935,
|
9705 |
+
"eval_runtime": 0.5188,
|
9706 |
+
"eval_samples_per_second": 1927.581,
|
9707 |
+
"eval_steps_per_second": 30.841,
|
9708 |
+
"step": 485000
|
9709 |
+
},
|
9710 |
+
{
|
9711 |
+
"epoch": 14.87,
|
9712 |
+
"learning_rate": 1.0666275216902535e-05,
|
9713 |
+
"loss": 0.3125,
|
9714 |
+
"step": 485500
|
9715 |
+
},
|
9716 |
+
{
|
9717 |
+
"epoch": 14.89,
|
9718 |
+
"learning_rate": 1.0621149713321656e-05,
|
9719 |
+
"loss": 0.3128,
|
9720 |
+
"step": 486000
|
9721 |
+
},
|
9722 |
+
{
|
9723 |
+
"epoch": 14.89,
|
9724 |
+
"eval_loss": 0.7765258550643921,
|
9725 |
+
"eval_runtime": 0.52,
|
9726 |
+
"eval_samples_per_second": 1923.021,
|
9727 |
+
"eval_steps_per_second": 30.768,
|
9728 |
+
"step": 486000
|
9729 |
+
},
|
9730 |
+
{
|
9731 |
+
"epoch": 14.91,
|
9732 |
+
"learning_rate": 1.0577603113715964e-05,
|
9733 |
+
"loss": 0.3125,
|
9734 |
+
"step": 486500
|
9735 |
+
},
|
9736 |
+
{
|
9737 |
+
"epoch": 14.92,
|
9738 |
+
"learning_rate": 1.0535635894304106e-05,
|
9739 |
+
"loss": 0.3126,
|
9740 |
+
"step": 487000
|
9741 |
+
},
|
9742 |
+
{
|
9743 |
+
"epoch": 14.92,
|
9744 |
+
"eval_loss": 0.7741805911064148,
|
9745 |
+
"eval_runtime": 0.5052,
|
9746 |
+
"eval_samples_per_second": 1979.424,
|
9747 |
+
"eval_steps_per_second": 31.671,
|
9748 |
+
"step": 487000
|
9749 |
+
},
|
9750 |
+
{
|
9751 |
+
"epoch": 14.94,
|
9752 |
+
"learning_rate": 1.0495248514032875e-05,
|
9753 |
+
"loss": 0.3123,
|
9754 |
+
"step": 487500
|
9755 |
+
},
|
9756 |
+
{
|
9757 |
+
"epoch": 14.95,
|
9758 |
+
"learning_rate": 1.045644141457218e-05,
|
9759 |
+
"loss": 0.3126,
|
9760 |
+
"step": 488000
|
9761 |
+
},
|
9762 |
+
{
|
9763 |
+
"epoch": 14.95,
|
9764 |
+
"eval_loss": 0.7744404077529907,
|
9765 |
+
"eval_runtime": 0.5273,
|
9766 |
+
"eval_samples_per_second": 1896.451,
|
9767 |
+
"eval_steps_per_second": 30.343,
|
9768 |
+
"step": 488000
|
9769 |
+
},
|
9770 |
+
{
|
9771 |
+
"epoch": 14.97,
|
9772 |
+
"learning_rate": 1.0419215020310254e-05,
|
9773 |
+
"loss": 0.3123,
|
9774 |
+
"step": 488500
|
9775 |
+
},
|
9776 |
+
{
|
9777 |
+
"epoch": 14.98,
|
9778 |
+
"learning_rate": 1.0383569738348988e-05,
|
9779 |
+
"loss": 0.3125,
|
9780 |
+
"step": 489000
|
9781 |
+
},
|
9782 |
+
{
|
9783 |
+
"epoch": 14.98,
|
9784 |
+
"eval_loss": 0.7724484205245972,
|
9785 |
+
"eval_runtime": 0.5078,
|
9786 |
+
"eval_samples_per_second": 1969.112,
|
9787 |
+
"eval_steps_per_second": 31.506,
|
9788 |
+
"step": 489000
|
9789 |
+
},
|
9790 |
+
{
|
9791 |
+
"epoch": 15.0,
|
9792 |
+
"learning_rate": 1.0349505958499436e-05,
|
9793 |
+
"loss": 0.3126,
|
9794 |
+
"step": 489500
|
9795 |
+
},
|
9796 |
+
{
|
9797 |
+
"epoch": 15.01,
|
9798 |
+
"learning_rate": 1.0317024053277693e-05,
|
9799 |
+
"loss": 0.3124,
|
9800 |
+
"step": 490000
|
9801 |
+
},
|
9802 |
+
{
|
9803 |
+
"epoch": 15.01,
|
9804 |
+
"eval_loss": 0.7753793001174927,
|
9805 |
+
"eval_runtime": 0.498,
|
9806 |
+
"eval_samples_per_second": 2008.198,
|
9807 |
+
"eval_steps_per_second": 32.131,
|
9808 |
+
"step": 490000
|
9809 |
}
|
9810 |
],
|
9811 |
"max_steps": 500000,
|
9812 |
"num_train_epochs": 16,
|
9813 |
+
"total_flos": 1.5654749980670862e+22,
|
9814 |
"trial_name": null,
|
9815 |
"trial_params": null
|
9816 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6673bee6889f96f4d0585736b0bbd0104ce06075881649e694ae573ad1d2887a
|
3 |
size 102501541
|