Training in progress, step 290000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd4094b5cac60d73fdd6f21e7e668f3210934954f13fb86e2d4209ee938a0a5a
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6255fd3a6ff2bf7ca5c36d99c6e77a3008adb68677e42013dd8386bb7b970a5
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:363babfb7b54265d790fbecb2309bf42f41b102f3bf25fe89de84147b11c7dfa
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:510c7713e00d76055c77bffa7429d1c526fc618345e8f8ea963b237765d79340
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ae1f30b94d6bbe49a697a2558dea2baf48ccbdc3ae096616d495689477f1d7b
|
3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:815dd65ada93eb961b018854672e652fa0d47bdfa3d615278f6e0ee59635af1b
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eddefbea3505d30a2cf1bb4dae32403e020f44a6c57a201cd9d6a10a92b68999
|
3 |
+
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87c32cbcd683d4257d3e7fe41cd4c20d1f40623baf6540486e5ca371ee7890e3
|
3 |
+
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14439
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f5b8ca27148787504a99d545fbc90a1c7466702929561717fa3c00574d165ef
|
3 |
size 14439
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a00f019162fc5f8994e9b1cb654b981eebf83af07e8dead098665a0f88f9319c
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39bc196d8aea9810b9698ff8cd04e2aeef8774f706fbd61ae0f0055bbacd0eaf
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 8.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5606,11 +5606,211 @@
|
|
5606 |
"eval_samples_per_second": 1904.885,
|
5607 |
"eval_steps_per_second": 30.478,
|
5608 |
"step": 280000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5609 |
}
|
5610 |
],
|
5611 |
"max_steps": 500000,
|
5612 |
"num_train_epochs": 16,
|
5613 |
-
"total_flos":
|
5614 |
"trial_name": null,
|
5615 |
"trial_params": null
|
5616 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.885076135911026,
|
5 |
+
"global_step": 290000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5606 |
"eval_samples_per_second": 1904.885,
|
5607 |
"eval_steps_per_second": 30.478,
|
5608 |
"step": 280000
|
5609 |
+
},
|
5610 |
+
{
|
5611 |
+
"epoch": 8.59,
|
5612 |
+
"learning_rate": 0.00013777852207079235,
|
5613 |
+
"loss": 0.3337,
|
5614 |
+
"step": 280500
|
5615 |
+
},
|
5616 |
+
{
|
5617 |
+
"epoch": 8.61,
|
5618 |
+
"learning_rate": 0.00013730250483349825,
|
5619 |
+
"loss": 0.3335,
|
5620 |
+
"step": 281000
|
5621 |
+
},
|
5622 |
+
{
|
5623 |
+
"epoch": 8.61,
|
5624 |
+
"eval_loss": 0.7787224054336548,
|
5625 |
+
"eval_runtime": 0.5159,
|
5626 |
+
"eval_samples_per_second": 1938.186,
|
5627 |
+
"eval_steps_per_second": 31.011,
|
5628 |
+
"step": 281000
|
5629 |
+
},
|
5630 |
+
{
|
5631 |
+
"epoch": 8.62,
|
5632 |
+
"learning_rate": 0.00013682668113317584,
|
5633 |
+
"loss": 0.3334,
|
5634 |
+
"step": 281500
|
5635 |
+
},
|
5636 |
+
{
|
5637 |
+
"epoch": 8.64,
|
5638 |
+
"learning_rate": 0.00013635105617335703,
|
5639 |
+
"loss": 0.3332,
|
5640 |
+
"step": 282000
|
5641 |
+
},
|
5642 |
+
{
|
5643 |
+
"epoch": 8.64,
|
5644 |
+
"eval_loss": 0.7804464101791382,
|
5645 |
+
"eval_runtime": 0.5024,
|
5646 |
+
"eval_samples_per_second": 1990.477,
|
5647 |
+
"eval_steps_per_second": 31.848,
|
5648 |
+
"step": 282000
|
5649 |
+
},
|
5650 |
+
{
|
5651 |
+
"epoch": 8.66,
|
5652 |
+
"learning_rate": 0.00013587563515539996,
|
5653 |
+
"loss": 0.3335,
|
5654 |
+
"step": 282500
|
5655 |
+
},
|
5656 |
+
{
|
5657 |
+
"epoch": 8.67,
|
5658 |
+
"learning_rate": 0.00013540042327843296,
|
5659 |
+
"loss": 0.3332,
|
5660 |
+
"step": 283000
|
5661 |
+
},
|
5662 |
+
{
|
5663 |
+
"epoch": 8.67,
|
5664 |
+
"eval_loss": 0.7820075750350952,
|
5665 |
+
"eval_runtime": 0.5246,
|
5666 |
+
"eval_samples_per_second": 1906.19,
|
5667 |
+
"eval_steps_per_second": 30.499,
|
5668 |
+
"step": 283000
|
5669 |
+
},
|
5670 |
+
{
|
5671 |
+
"epoch": 8.69,
|
5672 |
+
"learning_rate": 0.00013492542573929678,
|
5673 |
+
"loss": 0.3364,
|
5674 |
+
"step": 283500
|
5675 |
+
},
|
5676 |
+
{
|
5677 |
+
"epoch": 8.7,
|
5678 |
+
"learning_rate": 0.00013445064773248846,
|
5679 |
+
"loss": 0.3349,
|
5680 |
+
"step": 284000
|
5681 |
+
},
|
5682 |
+
{
|
5683 |
+
"epoch": 8.7,
|
5684 |
+
"eval_loss": 0.7795833349227905,
|
5685 |
+
"eval_runtime": 0.5328,
|
5686 |
+
"eval_samples_per_second": 1877.003,
|
5687 |
+
"eval_steps_per_second": 30.032,
|
5688 |
+
"step": 284000
|
5689 |
+
},
|
5690 |
+
{
|
5691 |
+
"epoch": 8.72,
|
5692 |
+
"learning_rate": 0.00013397609445010432,
|
5693 |
+
"loss": 0.3324,
|
5694 |
+
"step": 284500
|
5695 |
+
},
|
5696 |
+
{
|
5697 |
+
"epoch": 8.73,
|
5698 |
+
"learning_rate": 0.00013350177108178288,
|
5699 |
+
"loss": 0.3322,
|
5700 |
+
"step": 285000
|
5701 |
+
},
|
5702 |
+
{
|
5703 |
+
"epoch": 8.73,
|
5704 |
+
"eval_loss": 0.778048574924469,
|
5705 |
+
"eval_runtime": 0.5458,
|
5706 |
+
"eval_samples_per_second": 1832.202,
|
5707 |
+
"eval_steps_per_second": 29.315,
|
5708 |
+
"step": 285000
|
5709 |
+
},
|
5710 |
+
{
|
5711 |
+
"epoch": 8.75,
|
5712 |
+
"learning_rate": 0.00013302768281464863,
|
5713 |
+
"loss": 0.3325,
|
5714 |
+
"step": 285500
|
5715 |
+
},
|
5716 |
+
{
|
5717 |
+
"epoch": 8.76,
|
5718 |
+
"learning_rate": 0.0001325538348332548,
|
5719 |
+
"loss": 0.3328,
|
5720 |
+
"step": 286000
|
5721 |
+
},
|
5722 |
+
{
|
5723 |
+
"epoch": 8.76,
|
5724 |
+
"eval_loss": 0.7764204144477844,
|
5725 |
+
"eval_runtime": 0.5373,
|
5726 |
+
"eval_samples_per_second": 1861.329,
|
5727 |
+
"eval_steps_per_second": 29.781,
|
5728 |
+
"step": 286000
|
5729 |
+
},
|
5730 |
+
{
|
5731 |
+
"epoch": 8.78,
|
5732 |
+
"learning_rate": 0.00013208023231952706,
|
5733 |
+
"loss": 0.3322,
|
5734 |
+
"step": 286500
|
5735 |
+
},
|
5736 |
+
{
|
5737 |
+
"epoch": 8.79,
|
5738 |
+
"learning_rate": 0.0001316068804527066,
|
5739 |
+
"loss": 0.3323,
|
5740 |
+
"step": 287000
|
5741 |
+
},
|
5742 |
+
{
|
5743 |
+
"epoch": 8.79,
|
5744 |
+
"eval_loss": 0.7780716419219971,
|
5745 |
+
"eval_runtime": 0.5287,
|
5746 |
+
"eval_samples_per_second": 1891.289,
|
5747 |
+
"eval_steps_per_second": 30.261,
|
5748 |
+
"step": 287000
|
5749 |
+
},
|
5750 |
+
{
|
5751 |
+
"epoch": 8.81,
|
5752 |
+
"learning_rate": 0.00013113378440929353,
|
5753 |
+
"loss": 0.3322,
|
5754 |
+
"step": 287500
|
5755 |
+
},
|
5756 |
+
{
|
5757 |
+
"epoch": 8.82,
|
5758 |
+
"learning_rate": 0.00013066094936299056,
|
5759 |
+
"loss": 0.332,
|
5760 |
+
"step": 288000
|
5761 |
+
},
|
5762 |
+
{
|
5763 |
+
"epoch": 8.82,
|
5764 |
+
"eval_loss": 0.7822167873382568,
|
5765 |
+
"eval_runtime": 0.5284,
|
5766 |
+
"eval_samples_per_second": 1892.53,
|
5767 |
+
"eval_steps_per_second": 30.28,
|
5768 |
+
"step": 288000
|
5769 |
+
},
|
5770 |
+
{
|
5771 |
+
"epoch": 8.84,
|
5772 |
+
"learning_rate": 0.00013018838048464582,
|
5773 |
+
"loss": 0.332,
|
5774 |
+
"step": 288500
|
5775 |
+
},
|
5776 |
+
{
|
5777 |
+
"epoch": 8.85,
|
5778 |
+
"learning_rate": 0.00012971608294219702,
|
5779 |
+
"loss": 0.332,
|
5780 |
+
"step": 289000
|
5781 |
+
},
|
5782 |
+
{
|
5783 |
+
"epoch": 8.85,
|
5784 |
+
"eval_loss": 0.7825139760971069,
|
5785 |
+
"eval_runtime": 0.5164,
|
5786 |
+
"eval_samples_per_second": 1936.526,
|
5787 |
+
"eval_steps_per_second": 30.984,
|
5788 |
+
"step": 289000
|
5789 |
+
},
|
5790 |
+
{
|
5791 |
+
"epoch": 8.87,
|
5792 |
+
"learning_rate": 0.00012924406190061423,
|
5793 |
+
"loss": 0.332,
|
5794 |
+
"step": 289500
|
5795 |
+
},
|
5796 |
+
{
|
5797 |
+
"epoch": 8.89,
|
5798 |
+
"learning_rate": 0.0001287723225218441,
|
5799 |
+
"loss": 0.3323,
|
5800 |
+
"step": 290000
|
5801 |
+
},
|
5802 |
+
{
|
5803 |
+
"epoch": 8.89,
|
5804 |
+
"eval_loss": 0.7750741839408875,
|
5805 |
+
"eval_runtime": 0.5106,
|
5806 |
+
"eval_samples_per_second": 1958.426,
|
5807 |
+
"eval_steps_per_second": 31.335,
|
5808 |
+
"step": 290000
|
5809 |
}
|
5810 |
],
|
5811 |
"max_steps": 500000,
|
5812 |
"num_train_epochs": 16,
|
5813 |
+
"total_flos": 9.265063120844693e+21,
|
5814 |
"trial_name": null,
|
5815 |
"trial_params": null
|
5816 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6255fd3a6ff2bf7ca5c36d99c6e77a3008adb68677e42013dd8386bb7b970a5
|
3 |
size 102501541
|