kooff11 commited on
Commit
bd8474b
·
verified ·
1 Parent(s): e8304fa

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bed482554efe123094943faf37455da3a996fdac36f56f8fb99db004c10f6e77
3
  size 619632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d7e1956e00bca6a37ff14db2a30da9b68b0412270804d15c373d573d895130e
3
  size 619632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:085da727568270ac2791a46b34838f3da7de5ded0312a74ee45c1fe08c97f73c
3
  size 1324026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68e5fcfec47e208e5db613b7916aebe5eb47e922e21e87d04bd4b90ef71e6c0
3
  size 1324026
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d27fe899a7839025729cf1fb7357f56d7faf3f1dbdc18ff29f9424d9d3fc5b7
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f1b42cbd14e4887332cb62e946119f0bcf0d9a9fcacc80ed8b66010e166bbd7
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17fc2c8c4ae755ef8015c8c7e0643c28af7cc038251fdb78f97f403908b497c7
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c8d2f6e87f5b671dfa031e06947209073b518165329eeb9867cd4aaa16776ca
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3af69dbf654c73d9cdaa3b7070f08391ce43134d6289c650afd77196fc9b0fe
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3e333deca01a6860a16a4bea1a2ebeb14f960dad45973fb2bf65501096c51e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5043341213553979,
5
  "eval_steps": 10,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -171,6 +171,84 @@
171
  "eval_samples_per_second": 92.192,
172
  "eval_steps_per_second": 23.048,
173
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  }
175
  ],
176
  "logging_steps": 1,
@@ -190,7 +268,7 @@
190
  "attributes": {}
191
  }
192
  },
193
- "total_flos": 324995477667840.0,
194
  "train_batch_size": 2,
195
  "trial_name": null,
196
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7565011820330969,
5
  "eval_steps": 10,
6
+ "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
171
  "eval_samples_per_second": 92.192,
172
  "eval_steps_per_second": 23.048,
173
  "step": 20
174
+ },
175
+ {
176
+ "epoch": 0.5295508274231678,
177
+ "grad_norm": 0.23226885497570038,
178
+ "learning_rate": 5e-05,
179
+ "loss": 11.9191,
180
+ "step": 21
181
+ },
182
+ {
183
+ "epoch": 0.5547675334909378,
184
+ "grad_norm": 0.23677287995815277,
185
+ "learning_rate": 4.5871032726383386e-05,
186
+ "loss": 11.9191,
187
+ "step": 22
188
+ },
189
+ {
190
+ "epoch": 0.5799842395587076,
191
+ "grad_norm": 0.24115414917469025,
192
+ "learning_rate": 4.17702704859633e-05,
193
+ "loss": 11.9169,
194
+ "step": 23
195
+ },
196
+ {
197
+ "epoch": 0.6052009456264775,
198
+ "grad_norm": 0.23872853815555573,
199
+ "learning_rate": 3.772572564296005e-05,
200
+ "loss": 11.9193,
201
+ "step": 24
202
+ },
203
+ {
204
+ "epoch": 0.6304176516942475,
205
+ "grad_norm": 0.24184449017047882,
206
+ "learning_rate": 3.3765026539765834e-05,
207
+ "loss": 11.9191,
208
+ "step": 25
209
+ },
210
+ {
211
+ "epoch": 0.6556343577620173,
212
+ "grad_norm": 0.2603144645690918,
213
+ "learning_rate": 2.991522876735154e-05,
214
+ "loss": 11.9174,
215
+ "step": 26
216
+ },
217
+ {
218
+ "epoch": 0.6808510638297872,
219
+ "grad_norm": 0.26540812849998474,
220
+ "learning_rate": 2.6202630348146324e-05,
221
+ "loss": 11.916,
222
+ "step": 27
223
+ },
224
+ {
225
+ "epoch": 0.7060677698975572,
226
+ "grad_norm": 0.2523845136165619,
227
+ "learning_rate": 2.2652592093878666e-05,
228
+ "loss": 11.9157,
229
+ "step": 28
230
+ },
231
+ {
232
+ "epoch": 0.731284475965327,
233
+ "grad_norm": 0.24367552995681763,
234
+ "learning_rate": 1.928936436551661e-05,
235
+ "loss": 11.9162,
236
+ "step": 29
237
+ },
238
+ {
239
+ "epoch": 0.7565011820330969,
240
+ "grad_norm": 0.25121253728866577,
241
+ "learning_rate": 1.6135921418712956e-05,
242
+ "loss": 11.9183,
243
+ "step": 30
244
+ },
245
+ {
246
+ "epoch": 0.7565011820330969,
247
+ "eval_loss": 11.915938377380371,
248
+ "eval_runtime": 2.9116,
249
+ "eval_samples_per_second": 92.045,
250
+ "eval_steps_per_second": 23.011,
251
+ "step": 30
252
  }
253
  ],
254
  "logging_steps": 1,
 
268
  "attributes": {}
269
  }
270
  },
271
+ "total_flos": 487493216501760.0,
272
  "train_batch_size": 2,
273
  "trial_name": null,
274
  "trial_params": null