Femboyuwu2000
commited on
Commit
•
2667008
1
Parent(s):
011216a
Training in progress, step 5580, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13982248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e51450f5e4b92cb6050a5a712137dfb440701725d7829a72cff434a6a6aaa41
|
3 |
size 13982248
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7062522
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed1c4657442ba41e0933213c7fee10b7f93702abbb9f81f18812f94f63338971
|
3 |
size 7062522
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3eb3f735dc4651d569296bfd7a90e45c1cc6b4f4636e1916600a739323a96317
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f302dc9006cdba2924e7e2a7a8ee731d1e7e53d419fb3e6bb33e1ba8b9c94c94
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1869,6 +1869,97 @@
|
|
1869 |
"learning_rate": 2.5827524199573033e-05,
|
1870 |
"loss": 3.475,
|
1871 |
"step": 5320
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1872 |
}
|
1873 |
],
|
1874 |
"logging_steps": 20,
|
@@ -1876,7 +1967,7 @@
|
|
1876 |
"num_input_tokens_seen": 0,
|
1877 |
"num_train_epochs": 2,
|
1878 |
"save_steps": 20,
|
1879 |
-
"total_flos": 1.
|
1880 |
"train_batch_size": 8,
|
1881 |
"trial_name": null,
|
1882 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.4464,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5580,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1869 |
"learning_rate": 2.5827524199573033e-05,
|
1870 |
"loss": 3.475,
|
1871 |
"step": 5320
|
1872 |
+
},
|
1873 |
+
{
|
1874 |
+
"epoch": 0.43,
|
1875 |
+
"grad_norm": 40.04597091674805,
|
1876 |
+
"learning_rate": 2.5793846020484383e-05,
|
1877 |
+
"loss": 3.4903,
|
1878 |
+
"step": 5340
|
1879 |
+
},
|
1880 |
+
{
|
1881 |
+
"epoch": 0.43,
|
1882 |
+
"grad_norm": 27.712465286254883,
|
1883 |
+
"learning_rate": 2.5760054619058537e-05,
|
1884 |
+
"loss": 3.5108,
|
1885 |
+
"step": 5360
|
1886 |
+
},
|
1887 |
+
{
|
1888 |
+
"epoch": 0.43,
|
1889 |
+
"grad_norm": 43.66648864746094,
|
1890 |
+
"learning_rate": 2.5726150349751306e-05,
|
1891 |
+
"loss": 3.4656,
|
1892 |
+
"step": 5380
|
1893 |
+
},
|
1894 |
+
{
|
1895 |
+
"epoch": 0.43,
|
1896 |
+
"grad_norm": 28.305545806884766,
|
1897 |
+
"learning_rate": 2.569213356820244e-05,
|
1898 |
+
"loss": 3.5766,
|
1899 |
+
"step": 5400
|
1900 |
+
},
|
1901 |
+
{
|
1902 |
+
"epoch": 0.43,
|
1903 |
+
"grad_norm": 34.897857666015625,
|
1904 |
+
"learning_rate": 2.565800463123187e-05,
|
1905 |
+
"loss": 3.4286,
|
1906 |
+
"step": 5420
|
1907 |
+
},
|
1908 |
+
{
|
1909 |
+
"epoch": 0.44,
|
1910 |
+
"grad_norm": 43.083229064941406,
|
1911 |
+
"learning_rate": 2.5623763896835997e-05,
|
1912 |
+
"loss": 3.4292,
|
1913 |
+
"step": 5440
|
1914 |
+
},
|
1915 |
+
{
|
1916 |
+
"epoch": 0.44,
|
1917 |
+
"grad_norm": 35.71794128417969,
|
1918 |
+
"learning_rate": 2.5589411724183926e-05,
|
1919 |
+
"loss": 3.5542,
|
1920 |
+
"step": 5460
|
1921 |
+
},
|
1922 |
+
{
|
1923 |
+
"epoch": 0.44,
|
1924 |
+
"grad_norm": 23.70340347290039,
|
1925 |
+
"learning_rate": 2.555494847361369e-05,
|
1926 |
+
"loss": 3.5276,
|
1927 |
+
"step": 5480
|
1928 |
+
},
|
1929 |
+
{
|
1930 |
+
"epoch": 0.44,
|
1931 |
+
"grad_norm": 55.299556732177734,
|
1932 |
+
"learning_rate": 2.552037450662849e-05,
|
1933 |
+
"loss": 3.5644,
|
1934 |
+
"step": 5500
|
1935 |
+
},
|
1936 |
+
{
|
1937 |
+
"epoch": 0.44,
|
1938 |
+
"grad_norm": 22.433879852294922,
|
1939 |
+
"learning_rate": 2.5485690185892864e-05,
|
1940 |
+
"loss": 3.4475,
|
1941 |
+
"step": 5520
|
1942 |
+
},
|
1943 |
+
{
|
1944 |
+
"epoch": 0.44,
|
1945 |
+
"grad_norm": 26.397438049316406,
|
1946 |
+
"learning_rate": 2.545089587522893e-05,
|
1947 |
+
"loss": 3.507,
|
1948 |
+
"step": 5540
|
1949 |
+
},
|
1950 |
+
{
|
1951 |
+
"epoch": 0.44,
|
1952 |
+
"grad_norm": 25.061750411987305,
|
1953 |
+
"learning_rate": 2.5415991939612545e-05,
|
1954 |
+
"loss": 3.4521,
|
1955 |
+
"step": 5560
|
1956 |
+
},
|
1957 |
+
{
|
1958 |
+
"epoch": 0.45,
|
1959 |
+
"grad_norm": 35.5067138671875,
|
1960 |
+
"learning_rate": 2.5380978745169473e-05,
|
1961 |
+
"loss": 3.4894,
|
1962 |
+
"step": 5580
|
1963 |
}
|
1964 |
],
|
1965 |
"logging_steps": 20,
|
|
|
1967 |
"num_input_tokens_seen": 0,
|
1968 |
"num_train_epochs": 2,
|
1969 |
"save_steps": 20,
|
1970 |
+
"total_flos": 1.320651219615744e+16,
|
1971 |
"train_batch_size": 8,
|
1972 |
"trial_name": null,
|
1973 |
"trial_params": null
|