dq158 commited on
Commit
6e36677
1 Parent(s): 603c9a3

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edd15c54ce790c7516cb0d030eb8ff51312d9d594cc86699aaab0955c180e0fd
3
  size 1980859973
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6139318cf513eafa3224f054690d83317b804a9c28a2577d7370471793ce1b21
3
  size 1980859973
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3189017799e464e97ffb8a5e4bb37d2d3a4c52cd249802e8c7dcb7ad9a25afa3
3
  size 990408885
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0fa9ab0647db949841272150c5f38714cb2e9d7c97e66166b9a8e4985840035
3
  size 990408885
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:969c7a9f9e96e582bdeeaa51020c2fe36c4aa27f31056f54feeaf0a9a8297b2c
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58ddbd433b5599bf8b86b64743d5e39fd3dfe1434235c8a38c9b7bbdd7fa5b81
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d42e8f748d546c35d0a3efca92030df4b30dc0bda4152d70d8cbdd1a281cf40
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c334c84f22d7de01f067ab9bee69f31e2f916e7efdb35f8cf0f7917bfd43bda
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.3411617279052734,
3
- "best_model_checkpoint": "dq158/morbius/checkpoint-7890",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 7890,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -154,13 +154,68 @@
154
  "eval_steps_per_second": 1.46,
155
  "eval_translation_length": 45550,
156
  "step": 7890
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  }
158
  ],
159
  "logging_steps": 500,
160
  "max_steps": 13150,
161
  "num_train_epochs": 5,
162
  "save_steps": 500,
163
- "total_flos": 6.483282612977664e+16,
164
  "trial_name": null,
165
  "trial_params": null
166
  }
 
1
  {
2
+ "best_metric": 2.3347413539886475,
3
+ "best_model_checkpoint": "dq158/morbius/checkpoint-10520",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 10520,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
154
  "eval_steps_per_second": 1.46,
155
  "eval_translation_length": 45550,
156
  "step": 7890
157
+ },
158
+ {
159
+ "epoch": 3.04,
160
+ "learning_rate": 1.958174904942966e-05,
161
+ "loss": 2.4461,
162
+ "step": 8000
163
+ },
164
+ {
165
+ "epoch": 3.23,
166
+ "learning_rate": 1.7680608365019013e-05,
167
+ "loss": 2.4042,
168
+ "step": 8500
169
+ },
170
+ {
171
+ "epoch": 3.42,
172
+ "learning_rate": 1.5779467680608364e-05,
173
+ "loss": 2.4027,
174
+ "step": 9000
175
+ },
176
+ {
177
+ "epoch": 3.61,
178
+ "learning_rate": 1.387832699619772e-05,
179
+ "loss": 2.3927,
180
+ "step": 9500
181
+ },
182
+ {
183
+ "epoch": 3.8,
184
+ "learning_rate": 1.1977186311787073e-05,
185
+ "loss": 2.3733,
186
+ "step": 10000
187
+ },
188
+ {
189
+ "epoch": 3.99,
190
+ "learning_rate": 1.0076045627376426e-05,
191
+ "loss": 2.3716,
192
+ "step": 10500
193
+ },
194
+ {
195
+ "epoch": 4.0,
196
+ "eval_bleu": 0.04869515928692814,
197
+ "eval_brevity_penalty": 0.9823324657661657,
198
+ "eval_length_ratio": 0.9824867144301728,
199
+ "eval_loss": 2.3347413539886475,
200
+ "eval_precisions": [
201
+ 0.12663965838169275,
202
+ 0.0574505431946487,
203
+ 0.030477866031926728,
204
+ 0.027230821761893922
205
+ ],
206
+ "eval_reference_length": 46479,
207
+ "eval_runtime": 200.6451,
208
+ "eval_samples_per_second": 17.479,
209
+ "eval_steps_per_second": 1.46,
210
+ "eval_translation_length": 45665,
211
+ "step": 10520
212
  }
213
  ],
214
  "logging_steps": 500,
215
  "max_steps": 13150,
216
  "num_train_epochs": 5,
217
  "save_steps": 500,
218
+ "total_flos": 8.644376817303552e+16,
219
  "trial_name": null,
220
  "trial_params": null
221
  }