Femboyuwu2000 commited on
Commit
2667008
1 Parent(s): 011216a

Training in progress, step 5580, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3a737d68a4827402068459de6efdbc7478a5a7fd2fe7bd0de466d15928fc4c2
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e51450f5e4b92cb6050a5a712137dfb440701725d7829a72cff434a6a6aaa41
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:400d300ab5c6f0031e3d0ba8d37ecabfc57b9e43f048b577c44acb4c00908460
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed1c4657442ba41e0933213c7fee10b7f93702abbb9f81f18812f94f63338971
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ae9f372ef03becebb97898a812ec56da297ccd7cd57a3d1c178ee8c3636ac73
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb3f735dc4651d569296bfd7a90e45c1cc6b4f4636e1916600a739323a96317
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:215c966d5ce6af96785202b075ac3637e563938102fe487ca5657a08dce17dcb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f302dc9006cdba2924e7e2a7a8ee731d1e7e53d419fb3e6bb33e1ba8b9c94c94
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4256,
5
  "eval_steps": 500,
6
- "global_step": 5320,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1869,6 +1869,97 @@
1869
  "learning_rate": 2.5827524199573033e-05,
1870
  "loss": 3.475,
1871
  "step": 5320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1872
  }
1873
  ],
1874
  "logging_steps": 20,
@@ -1876,7 +1967,7 @@
1876
  "num_input_tokens_seen": 0,
1877
  "num_train_epochs": 2,
1878
  "save_steps": 20,
1879
- "total_flos": 1.2585955553869824e+16,
1880
  "train_batch_size": 8,
1881
  "trial_name": null,
1882
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4464,
5
  "eval_steps": 500,
6
+ "global_step": 5580,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1869
  "learning_rate": 2.5827524199573033e-05,
1870
  "loss": 3.475,
1871
  "step": 5320
1872
+ },
1873
+ {
1874
+ "epoch": 0.43,
1875
+ "grad_norm": 40.04597091674805,
1876
+ "learning_rate": 2.5793846020484383e-05,
1877
+ "loss": 3.4903,
1878
+ "step": 5340
1879
+ },
1880
+ {
1881
+ "epoch": 0.43,
1882
+ "grad_norm": 27.712465286254883,
1883
+ "learning_rate": 2.5760054619058537e-05,
1884
+ "loss": 3.5108,
1885
+ "step": 5360
1886
+ },
1887
+ {
1888
+ "epoch": 0.43,
1889
+ "grad_norm": 43.66648864746094,
1890
+ "learning_rate": 2.5726150349751306e-05,
1891
+ "loss": 3.4656,
1892
+ "step": 5380
1893
+ },
1894
+ {
1895
+ "epoch": 0.43,
1896
+ "grad_norm": 28.305545806884766,
1897
+ "learning_rate": 2.569213356820244e-05,
1898
+ "loss": 3.5766,
1899
+ "step": 5400
1900
+ },
1901
+ {
1902
+ "epoch": 0.43,
1903
+ "grad_norm": 34.897857666015625,
1904
+ "learning_rate": 2.565800463123187e-05,
1905
+ "loss": 3.4286,
1906
+ "step": 5420
1907
+ },
1908
+ {
1909
+ "epoch": 0.44,
1910
+ "grad_norm": 43.083229064941406,
1911
+ "learning_rate": 2.5623763896835997e-05,
1912
+ "loss": 3.4292,
1913
+ "step": 5440
1914
+ },
1915
+ {
1916
+ "epoch": 0.44,
1917
+ "grad_norm": 35.71794128417969,
1918
+ "learning_rate": 2.5589411724183926e-05,
1919
+ "loss": 3.5542,
1920
+ "step": 5460
1921
+ },
1922
+ {
1923
+ "epoch": 0.44,
1924
+ "grad_norm": 23.70340347290039,
1925
+ "learning_rate": 2.555494847361369e-05,
1926
+ "loss": 3.5276,
1927
+ "step": 5480
1928
+ },
1929
+ {
1930
+ "epoch": 0.44,
1931
+ "grad_norm": 55.299556732177734,
1932
+ "learning_rate": 2.552037450662849e-05,
1933
+ "loss": 3.5644,
1934
+ "step": 5500
1935
+ },
1936
+ {
1937
+ "epoch": 0.44,
1938
+ "grad_norm": 22.433879852294922,
1939
+ "learning_rate": 2.5485690185892864e-05,
1940
+ "loss": 3.4475,
1941
+ "step": 5520
1942
+ },
1943
+ {
1944
+ "epoch": 0.44,
1945
+ "grad_norm": 26.397438049316406,
1946
+ "learning_rate": 2.545089587522893e-05,
1947
+ "loss": 3.507,
1948
+ "step": 5540
1949
+ },
1950
+ {
1951
+ "epoch": 0.44,
1952
+ "grad_norm": 25.061750411987305,
1953
+ "learning_rate": 2.5415991939612545e-05,
1954
+ "loss": 3.4521,
1955
+ "step": 5560
1956
+ },
1957
+ {
1958
+ "epoch": 0.45,
1959
+ "grad_norm": 35.5067138671875,
1960
+ "learning_rate": 2.5380978745169473e-05,
1961
+ "loss": 3.4894,
1962
+ "step": 5580
1963
  }
1964
  ],
1965
  "logging_steps": 20,
 
1967
  "num_input_tokens_seen": 0,
1968
  "num_train_epochs": 2,
1969
  "save_steps": 20,
1970
+ "total_flos": 1.320651219615744e+16,
1971
  "train_batch_size": 8,
1972
  "trial_name": null,
1973
  "trial_params": null