stefania-radu
commited on
Commit
•
f0925b8
1
Parent(s):
4bf20bf
Training in progress, step 250000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893441530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce3e998a193326c567e5bf36b4614bcf9a92a873dabc8d985ee1ad116cb61ad2
|
3 |
size 893441530
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 454197066
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36acb3092fbd4cafad2bc39be38c35f4299b28e4d00c2f26079a5a4d3580780f
|
3 |
size 454197066
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58f82edd7a6194c8353e8edba667f026c5f098b23c6a725d52469afb859bd8fc
|
3 |
+
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e57954a9914278616526cbeea30401601bc7a6e242a7a52a60b8796e825a3c98
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1782,11 +1782,85 @@
|
|
1782 |
"eval_samples_per_second": 99.85,
|
1783 |
"eval_steps_per_second": 12.481,
|
1784 |
"step": 240000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1785 |
}
|
1786 |
],
|
1787 |
"max_steps": 1000000,
|
1788 |
"num_train_epochs": 9223372036854775807,
|
1789 |
-
"total_flos": 2.
|
1790 |
"trial_name": null,
|
1791 |
"trial_params": null
|
1792 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.25,
|
5 |
+
"global_step": 250000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1782 |
"eval_samples_per_second": 99.85,
|
1783 |
"eval_steps_per_second": 12.481,
|
1784 |
"step": 240000
|
1785 |
+
},
|
1786 |
+
{
|
1787 |
+
"epoch": 0.24,
|
1788 |
+
"learning_rate": 1.7905926017192613e-05,
|
1789 |
+
"loss": 0.3632,
|
1790 |
+
"step": 241000
|
1791 |
+
},
|
1792 |
+
{
|
1793 |
+
"epoch": 0.24,
|
1794 |
+
"learning_rate": 1.7897364070353612e-05,
|
1795 |
+
"loss": 0.3633,
|
1796 |
+
"step": 242000
|
1797 |
+
},
|
1798 |
+
{
|
1799 |
+
"epoch": 0.24,
|
1800 |
+
"learning_rate": 1.788876360350547e-05,
|
1801 |
+
"loss": 0.3628,
|
1802 |
+
"step": 243000
|
1803 |
+
},
|
1804 |
+
{
|
1805 |
+
"epoch": 0.24,
|
1806 |
+
"learning_rate": 1.7880124710701515e-05,
|
1807 |
+
"loss": 0.3628,
|
1808 |
+
"step": 244000
|
1809 |
+
},
|
1810 |
+
{
|
1811 |
+
"epoch": 0.24,
|
1812 |
+
"learning_rate": 1.7871447486415292e-05,
|
1813 |
+
"loss": 0.363,
|
1814 |
+
"step": 245000
|
1815 |
+
},
|
1816 |
+
{
|
1817 |
+
"epoch": 0.24,
|
1818 |
+
"eval_runtime": 3598.9717,
|
1819 |
+
"eval_samples_per_second": 94.375,
|
1820 |
+
"eval_steps_per_second": 11.797,
|
1821 |
+
"step": 245000
|
1822 |
+
},
|
1823 |
+
{
|
1824 |
+
"epoch": 0.25,
|
1825 |
+
"learning_rate": 1.7862732025539543e-05,
|
1826 |
+
"loss": 0.364,
|
1827 |
+
"step": 246000
|
1828 |
+
},
|
1829 |
+
{
|
1830 |
+
"epoch": 0.25,
|
1831 |
+
"learning_rate": 1.7853978423385145e-05,
|
1832 |
+
"loss": 0.3626,
|
1833 |
+
"step": 247000
|
1834 |
+
},
|
1835 |
+
{
|
1836 |
+
"epoch": 0.25,
|
1837 |
+
"learning_rate": 1.784518677568009e-05,
|
1838 |
+
"loss": 0.361,
|
1839 |
+
"step": 248000
|
1840 |
+
},
|
1841 |
+
{
|
1842 |
+
"epoch": 0.25,
|
1843 |
+
"learning_rate": 1.783635717856843e-05,
|
1844 |
+
"loss": 0.3597,
|
1845 |
+
"step": 249000
|
1846 |
+
},
|
1847 |
+
{
|
1848 |
+
"epoch": 0.25,
|
1849 |
+
"learning_rate": 1.782748972860922e-05,
|
1850 |
+
"loss": 0.3626,
|
1851 |
+
"step": 250000
|
1852 |
+
},
|
1853 |
+
{
|
1854 |
+
"epoch": 0.25,
|
1855 |
+
"eval_runtime": 2943.8602,
|
1856 |
+
"eval_samples_per_second": 115.376,
|
1857 |
+
"eval_steps_per_second": 14.422,
|
1858 |
+
"step": 250000
|
1859 |
}
|
1860 |
],
|
1861 |
"max_steps": 1000000,
|
1862 |
"num_train_epochs": 9223372036854775807,
|
1863 |
+
"total_flos": 2.213652543307776e+21,
|
1864 |
"trial_name": null,
|
1865 |
"trial_params": null
|
1866 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3768
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04b59685dedbedfa441a393ef0561d0eeade03f2fa8b31fadfbec3e0393ee18f
|
3 |
size 3768
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 454197066
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36acb3092fbd4cafad2bc39be38c35f4299b28e4d00c2f26079a5a4d3580780f
|
3 |
size 454197066
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3768
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04b59685dedbedfa441a393ef0561d0eeade03f2fa8b31fadfbec3e0393ee18f
|
3 |
size 3768
|