error577 commited on
Commit
46ba2af
·
verified ·
1 Parent(s): b39ee1c

Training in progress, step 280, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47d1b5f64bd2a32d210dc344d1aff48c8e493243e614ec9102648d191115dc29
3
  size 250422888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ec4daa34e1b935faa5430b839c1b1376234e001622e8070291431ae3b9045a6
3
  size 250422888
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98f1e84050803c2c14633c1d7ea4207038a49fbb83d078e48cf5120e5f6a2bb9
3
  size 127788756
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2cf3d564b798f114de21e7824ed904bb2253270fddc61cb4be5dbab804c0dbe
3
  size 127788756
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5099e825b9d64fd5c6ba8576cc14367aedac4b0fecec8ded967a654f174bf6f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95c4ab9d7cc61365afe9c1520630b1d6dab7e2688ccbe4b330d8eeb62edffd3d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50b7ff23712e03935414ffb227695142d0eb9d939a270308f287d92b88215dbb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa1e27bc8c4f6272ee858bf97369b9e68f7265e3e9a72207bbd5098643e86719
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.05984061681866567,
5
  "eval_steps": 20,
6
- "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1939,6 +1939,154 @@
1939
  "eval_samples_per_second": 4.194,
1940
  "eval_steps_per_second": 4.194,
1941
  "step": 260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1942
  }
1943
  ],
1944
  "logging_steps": 1,
@@ -1958,7 +2106,7 @@
1958
  "attributes": {}
1959
  }
1960
  },
1961
- "total_flos": 4.12608947552256e+16,
1962
  "train_batch_size": 1,
1963
  "trial_name": null,
1964
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06444374118933226,
5
  "eval_steps": 20,
6
+ "global_step": 280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1939
  "eval_samples_per_second": 4.194,
1940
  "eval_steps_per_second": 4.194,
1941
  "step": 260
1942
+ },
1943
+ {
1944
+ "epoch": 0.060070773037199,
1945
+ "grad_norm": 5.089763164520264,
1946
+ "learning_rate": 1.3189384779019535e-05,
1947
+ "loss": 2.9909,
1948
+ "step": 261
1949
+ },
1950
+ {
1951
+ "epoch": 0.06030092925573233,
1952
+ "grad_norm": 4.29011869430542,
1953
+ "learning_rate": 1.25311378409661e-05,
1954
+ "loss": 1.7705,
1955
+ "step": 262
1956
+ },
1957
+ {
1958
+ "epoch": 0.06053108547426566,
1959
+ "grad_norm": 3.4714138507843018,
1960
+ "learning_rate": 1.1889023483392879e-05,
1961
+ "loss": 1.7503,
1962
+ "step": 263
1963
+ },
1964
+ {
1965
+ "epoch": 0.060761241692798985,
1966
+ "grad_norm": 4.181496620178223,
1967
+ "learning_rate": 1.1263117061266675e-05,
1968
+ "loss": 0.8877,
1969
+ "step": 264
1970
+ },
1971
+ {
1972
+ "epoch": 0.06099139791133232,
1973
+ "grad_norm": 2.1141738891601562,
1974
+ "learning_rate": 1.0653492027481286e-05,
1975
+ "loss": 1.2273,
1976
+ "step": 265
1977
+ },
1978
+ {
1979
+ "epoch": 0.061221554129865643,
1980
+ "grad_norm": 6.813518047332764,
1981
+ "learning_rate": 1.0060219924237379e-05,
1982
+ "loss": 2.4627,
1983
+ "step": 266
1984
+ },
1985
+ {
1986
+ "epoch": 0.061451710348398976,
1987
+ "grad_norm": 3.919358730316162,
1988
+ "learning_rate": 9.48337037464666e-06,
1989
+ "loss": 1.6218,
1990
+ "step": 267
1991
+ },
1992
+ {
1993
+ "epoch": 0.06168186656693231,
1994
+ "grad_norm": 5.324815273284912,
1995
+ "learning_rate": 8.923011074561404e-06,
1996
+ "loss": 2.7972,
1997
+ "step": 268
1998
+ },
1999
+ {
2000
+ "epoch": 0.061912022785465634,
2001
+ "grad_norm": 5.306177616119385,
2002
+ "learning_rate": 8.379207784630004e-06,
2003
+ "loss": 2.748,
2004
+ "step": 269
2005
+ },
2006
+ {
2007
+ "epoch": 0.06214217900399897,
2008
+ "grad_norm": 5.779840469360352,
2009
+ "learning_rate": 7.852024322579648e-06,
2010
+ "loss": 2.8093,
2011
+ "step": 270
2012
+ },
2013
+ {
2014
+ "epoch": 0.06237233522253229,
2015
+ "grad_norm": 4.838181495666504,
2016
+ "learning_rate": 7.34152255572697e-06,
2017
+ "loss": 3.2099,
2018
+ "step": 271
2019
+ },
2020
+ {
2021
+ "epoch": 0.06260249144106562,
2022
+ "grad_norm": 2.9259281158447266,
2023
+ "learning_rate": 6.847762393717782e-06,
2024
+ "loss": 0.6693,
2025
+ "step": 272
2026
+ },
2027
+ {
2028
+ "epoch": 0.06283264765959895,
2029
+ "grad_norm": 4.114650726318359,
2030
+ "learning_rate": 6.370801781496326e-06,
2031
+ "loss": 3.2477,
2032
+ "step": 273
2033
+ },
2034
+ {
2035
+ "epoch": 0.06306280387813228,
2036
+ "grad_norm": 4.7701592445373535,
2037
+ "learning_rate": 5.910696692505201e-06,
2038
+ "loss": 1.5787,
2039
+ "step": 274
2040
+ },
2041
+ {
2042
+ "epoch": 0.06329296009666562,
2043
+ "grad_norm": 4.48903226852417,
2044
+ "learning_rate": 5.467501122116563e-06,
2045
+ "loss": 1.4006,
2046
+ "step": 275
2047
+ },
2048
+ {
2049
+ "epoch": 0.06352311631519894,
2050
+ "grad_norm": 3.0731749534606934,
2051
+ "learning_rate": 5.0412670812956465e-06,
2052
+ "loss": 0.4172,
2053
+ "step": 276
2054
+ },
2055
+ {
2056
+ "epoch": 0.06375327253373227,
2057
+ "grad_norm": 6.239867210388184,
2058
+ "learning_rate": 4.6320445904969475e-06,
2059
+ "loss": 2.111,
2060
+ "step": 277
2061
+ },
2062
+ {
2063
+ "epoch": 0.0639834287522656,
2064
+ "grad_norm": 4.272534370422363,
2065
+ "learning_rate": 4.239881673794165e-06,
2066
+ "loss": 1.026,
2067
+ "step": 278
2068
+ },
2069
+ {
2070
+ "epoch": 0.06421358497079893,
2071
+ "grad_norm": 4.327932834625244,
2072
+ "learning_rate": 3.864824353244367e-06,
2073
+ "loss": 1.7713,
2074
+ "step": 279
2075
+ },
2076
+ {
2077
+ "epoch": 0.06444374118933226,
2078
+ "grad_norm": 7.858902454376221,
2079
+ "learning_rate": 3.506916643487001e-06,
2080
+ "loss": 3.015,
2081
+ "step": 280
2082
+ },
2083
+ {
2084
+ "epoch": 0.06444374118933226,
2085
+ "eval_loss": 0.3807111978530884,
2086
+ "eval_runtime": 83.9232,
2087
+ "eval_samples_per_second": 4.194,
2088
+ "eval_steps_per_second": 4.194,
2089
+ "step": 280
2090
  }
2091
  ],
2092
  "logging_steps": 1,
 
2106
  "attributes": {}
2107
  }
2108
  },
2109
+ "total_flos": 4.44348097363968e+16,
2110
  "train_batch_size": 1,
2111
  "trial_name": null,
2112
  "trial_params": null