plip commited on
Commit
fa3956e
1 Parent(s): 04b202a

Training in progress, step 260000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9c431c8ac0b934898a38cd62a12706d8ab0fc93577c4e6844d7870ae6512e7f
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66c0b735c70ca1ca003eab9d686d7e3ea552331bf016385c8fad2dfe60c740ac
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36871e0a0236345a131fc6413f45a963c57827bc4a63fc490f54666ca02cdc4f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e8c193ef48de967d7501e0b15a06f4b379c56e8efdf426bfb091824f5c716b
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66f8a69dfc02c488bc648729ef9bad2ca2a24c53ab78fef957c6f965ce64fa70
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89edb9f9ff26027cc98aa82c2133f2e1b82d5b92ed39de235eb4cb2271e68c3d
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f85e11280175f8d49e58ca2c243b97ecb329b334e32f849def115ac8e9089484
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54ee6f005be263ffc1028eeae009e64fa85afa0eb3360fe9f44a8e6025237de1
3
+ size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f308a322446c83c7ff8ca339af44e90f317ca3e4bc00bec09d3fca34b1bd288
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3500ada53b759cb5c8307ed7d4c05bb6055c04f57f48c3b210c40f88bacd59f
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b730665b8791d2a5bb5ca82f14abbe6976f10590220fbbf30dfd68881b966e88
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9805eeb18ee0853d0258572c1ad542ff4bc6ff2393ff8df1da096af07bacd6d
3
+ size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:896821ffe540c0e03a6707d24bb82c6126a1dd196a3f95efa94bb655e9f2789a
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b00333ce97a5a96a0369400120eb38a7bfd7ee5df94e91c7d6993e0e7f5524ba
3
+ size 14439
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bf3b1113ea18a94bc71b301c56b529f1a0a69f16d8397dc90d5d9cb24b83265
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4802de77ca51e4efac2063b9b4a2ac08f4acde74a027da9b43cf90af44cf0108
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7202c113cacd346ee4859b578e536f19b58145065a28d3ad52e00f885db71f7e
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f2a04b63259c1a14b093267b1fd26d6b21fab4af2fc765329473d9fca239907
3
  size 14439
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d7c35cdfe9dea2fc398ff2f4cb9aef06694dd8b7b25c155eaae9f938d26d546
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f74dc2cbdc738c6fbf513addd5269b878530a18e75ada83dcaec9120da52354f
3
+ size 14439
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:438f3db980ad0547f739432602e1f85cc46b6fbef312b9261fd3b355ceeb97af
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b7fe86b6bf62db9f7989d6e264b9b70447a29a8d4bbea419af77ab1989ca356
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.659548393026747,
5
- "global_step": 250000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5006,11 +5006,211 @@
5006
  "eval_samples_per_second": 1847.755,
5007
  "eval_steps_per_second": 29.564,
5008
  "step": 250000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5009
  }
5010
  ],
5011
  "max_steps": 500000,
5012
  "num_train_epochs": 16,
5013
- "total_flos": 7.987122553698669e+21,
5014
  "trial_name": null,
5015
  "trial_params": null
5016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.965930328747817,
5
+ "global_step": 260000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5006
  "eval_samples_per_second": 1847.755,
5007
  "eval_steps_per_second": 29.564,
5008
  "step": 250000
5009
+ },
5010
+ {
5011
+ "epoch": 7.67,
5012
+ "learning_rate": 0.00016649607200138356,
5013
+ "loss": 0.3383,
5014
+ "step": 250500
5015
+ },
5016
+ {
5017
+ "epoch": 7.69,
5018
+ "learning_rate": 0.00016601801319007743,
5019
+ "loss": 0.3382,
5020
+ "step": 251000
5021
+ },
5022
+ {
5023
+ "epoch": 7.69,
5024
+ "eval_loss": 0.782693088054657,
5025
+ "eval_runtime": 0.5248,
5026
+ "eval_samples_per_second": 1905.624,
5027
+ "eval_steps_per_second": 30.49,
5028
+ "step": 251000
5029
+ },
5030
+ {
5031
+ "epoch": 7.71,
5032
+ "learning_rate": 0.00016553983388754428,
5033
+ "loss": 0.3381,
5034
+ "step": 251500
5035
+ },
5036
+ {
5037
+ "epoch": 7.72,
5038
+ "learning_rate": 0.00016506153932307636,
5039
+ "loss": 0.3377,
5040
+ "step": 252000
5041
+ },
5042
+ {
5043
+ "epoch": 7.72,
5044
+ "eval_loss": 0.7844077944755554,
5045
+ "eval_runtime": 0.5281,
5046
+ "eval_samples_per_second": 1893.751,
5047
+ "eval_steps_per_second": 30.3,
5048
+ "step": 252000
5049
+ },
5050
+ {
5051
+ "epoch": 7.74,
5052
+ "learning_rate": 0.00016458313472722638,
5053
+ "loss": 0.3376,
5054
+ "step": 252500
5055
+ },
5056
+ {
5057
+ "epoch": 7.75,
5058
+ "learning_rate": 0.00016410462533175045,
5059
+ "loss": 0.3377,
5060
+ "step": 253000
5061
+ },
5062
+ {
5063
+ "epoch": 7.75,
5064
+ "eval_loss": 0.7837400436401367,
5065
+ "eval_runtime": 0.5195,
5066
+ "eval_samples_per_second": 1924.894,
5067
+ "eval_steps_per_second": 30.798,
5068
+ "step": 253000
5069
+ },
5070
+ {
5071
+ "epoch": 7.77,
5072
+ "learning_rate": 0.00016362601636955049,
5073
+ "loss": 0.3378,
5074
+ "step": 253500
5075
+ },
5076
+ {
5077
+ "epoch": 7.78,
5078
+ "learning_rate": 0.00016314731307461754,
5079
+ "loss": 0.3377,
5080
+ "step": 254000
5081
+ },
5082
+ {
5083
+ "epoch": 7.78,
5084
+ "eval_loss": 0.7803494930267334,
5085
+ "eval_runtime": 0.5126,
5086
+ "eval_samples_per_second": 1950.683,
5087
+ "eval_steps_per_second": 31.211,
5088
+ "step": 254000
5089
+ },
5090
+ {
5091
+ "epoch": 7.8,
5092
+ "learning_rate": 0.0001626685206819742,
5093
+ "loss": 0.3376,
5094
+ "step": 254500
5095
+ },
5096
+ {
5097
+ "epoch": 7.81,
5098
+ "learning_rate": 0.0001621896444276172,
5099
+ "loss": 0.337,
5100
+ "step": 255000
5101
+ },
5102
+ {
5103
+ "epoch": 7.81,
5104
+ "eval_loss": 0.7785842418670654,
5105
+ "eval_runtime": 0.5059,
5106
+ "eval_samples_per_second": 1976.634,
5107
+ "eval_steps_per_second": 31.626,
5108
+ "step": 255000
5109
+ },
5110
+ {
5111
+ "epoch": 7.83,
5112
+ "learning_rate": 0.00016171068954846067,
5113
+ "loss": 0.3373,
5114
+ "step": 255500
5115
+ },
5116
+ {
5117
+ "epoch": 7.84,
5118
+ "learning_rate": 0.00016123166128227835,
5119
+ "loss": 0.3372,
5120
+ "step": 256000
5121
+ },
5122
+ {
5123
+ "epoch": 7.84,
5124
+ "eval_loss": 0.7810379266738892,
5125
+ "eval_runtime": 0.5109,
5126
+ "eval_samples_per_second": 1957.233,
5127
+ "eval_steps_per_second": 31.316,
5128
+ "step": 256000
5129
+ },
5130
+ {
5131
+ "epoch": 7.86,
5132
+ "learning_rate": 0.0001607525648676467,
5133
+ "loss": 0.3368,
5134
+ "step": 256500
5135
+ },
5136
+ {
5137
+ "epoch": 7.87,
5138
+ "learning_rate": 0.0001602734055438873,
5139
+ "loss": 0.337,
5140
+ "step": 257000
5141
+ },
5142
+ {
5143
+ "epoch": 7.87,
5144
+ "eval_loss": 0.7789760828018188,
5145
+ "eval_runtime": 0.541,
5146
+ "eval_samples_per_second": 1848.491,
5147
+ "eval_steps_per_second": 29.576,
5148
+ "step": 257000
5149
+ },
5150
+ {
5151
+ "epoch": 7.89,
5152
+ "learning_rate": 0.00015979418855100963,
5153
+ "loss": 0.3367,
5154
+ "step": 257500
5155
+ },
5156
+ {
5157
+ "epoch": 7.9,
5158
+ "learning_rate": 0.00015931491912965417,
5159
+ "loss": 0.3366,
5160
+ "step": 258000
5161
+ },
5162
+ {
5163
+ "epoch": 7.9,
5164
+ "eval_loss": 0.7777426838874817,
5165
+ "eval_runtime": 0.5219,
5166
+ "eval_samples_per_second": 1916.204,
5167
+ "eval_steps_per_second": 30.659,
5168
+ "step": 258000
5169
+ },
5170
+ {
5171
+ "epoch": 7.92,
5172
+ "learning_rate": 0.0001588356025210344,
5173
+ "loss": 0.3369,
5174
+ "step": 258500
5175
+ },
5176
+ {
5177
+ "epoch": 7.94,
5178
+ "learning_rate": 0.00015835624396688,
5179
+ "loss": 0.3364,
5180
+ "step": 259000
5181
+ },
5182
+ {
5183
+ "epoch": 7.94,
5184
+ "eval_loss": 0.7759175300598145,
5185
+ "eval_runtime": 0.5164,
5186
+ "eval_samples_per_second": 1936.553,
5187
+ "eval_steps_per_second": 30.985,
5188
+ "step": 259000
5189
+ },
5190
+ {
5191
+ "epoch": 7.95,
5192
+ "learning_rate": 0.00015787684870937924,
5193
+ "loss": 0.3364,
5194
+ "step": 259500
5195
+ },
5196
+ {
5197
+ "epoch": 7.97,
5198
+ "learning_rate": 0.00015739742199112196,
5199
+ "loss": 0.3364,
5200
+ "step": 260000
5201
+ },
5202
+ {
5203
+ "epoch": 7.97,
5204
+ "eval_loss": 0.7805649638175964,
5205
+ "eval_runtime": 0.5211,
5206
+ "eval_samples_per_second": 1919.019,
5207
+ "eval_steps_per_second": 30.704,
5208
+ "step": 260000
5209
  }
5210
  ],
5211
  "max_steps": 500000,
5212
  "num_train_epochs": 16,
5213
+ "total_flos": 8.306609692473169e+21,
5214
  "trial_name": null,
5215
  "trial_params": null
5216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36871e0a0236345a131fc6413f45a963c57827bc4a63fc490f54666ca02cdc4f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e8c193ef48de967d7501e0b15a06f4b379c56e8efdf426bfb091824f5c716b
3
  size 102501541