kooff11 commited on
Commit
1fd47a4
·
verified ·
1 Parent(s): 3d19bf7

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d7e1956e00bca6a37ff14db2a30da9b68b0412270804d15c373d573d895130e
3
  size 619632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38799347f65a7ed1a2cbacce2d9144aa3c914d18ccb1e3fdd4251542125db97d
3
  size 619632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f68e5fcfec47e208e5db613b7916aebe5eb47e922e21e87d04bd4b90ef71e6c0
3
  size 1324026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab235c3bd36826b1a9f64b02b346ee5a6d20e57442da515f05163e4e5379acd
3
  size 1324026
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f1b42cbd14e4887332cb62e946119f0bcf0d9a9fcacc80ed8b66010e166bbd7
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90021be0ad63cbcd6db2577a307ccc4130b2c06ca765264bfa98daa67579ad94
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c8d2f6e87f5b671dfa031e06947209073b518165329eeb9867cd4aaa16776ca
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a402bfbd22dfb93faf811296f0988585bea76884af7cafd9bcf1eec8614fa602
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3e333deca01a6860a16a4bea1a2ebeb14f960dad45973fb2bf65501096c51e2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2759c51def6f12ecf076b0cf2571dc6049a636d579f7f0a8bd8ae37daff96c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7565011820330969,
5
  "eval_steps": 10,
6
- "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -249,6 +249,84 @@
249
  "eval_samples_per_second": 92.045,
250
  "eval_steps_per_second": 23.011,
251
  "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  }
253
  ],
254
  "logging_steps": 1,
@@ -263,12 +341,12 @@
263
  "should_evaluate": false,
264
  "should_log": false,
265
  "should_save": true,
266
- "should_training_stop": false
267
  },
268
  "attributes": {}
269
  }
270
  },
271
- "total_flos": 487493216501760.0,
272
  "train_batch_size": 2,
273
  "trial_name": null,
274
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0110323089046493,
5
  "eval_steps": 10,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
249
  "eval_samples_per_second": 92.045,
250
  "eval_steps_per_second": 23.011,
251
  "step": 30
252
+ },
253
+ {
254
+ "epoch": 0.7817178881008668,
255
+ "grad_norm": 0.25170794129371643,
256
+ "learning_rate": 1.3213804466343421e-05,
257
+ "loss": 11.9165,
258
+ "step": 31
259
+ },
260
+ {
261
+ "epoch": 0.8069345941686368,
262
+ "grad_norm": 0.2574658989906311,
263
+ "learning_rate": 1.0542974530180327e-05,
264
+ "loss": 11.9158,
265
+ "step": 32
266
+ },
267
+ {
268
+ "epoch": 0.8321513002364066,
269
+ "grad_norm": 0.2503858506679535,
270
+ "learning_rate": 8.141676086873572e-06,
271
+ "loss": 11.9152,
272
+ "step": 33
273
+ },
274
+ {
275
+ "epoch": 0.8573680063041765,
276
+ "grad_norm": 0.27511805295944214,
277
+ "learning_rate": 6.026312439675552e-06,
278
+ "loss": 11.9153,
279
+ "step": 34
280
+ },
281
+ {
282
+ "epoch": 0.8825847123719465,
283
+ "grad_norm": 0.2627543807029724,
284
+ "learning_rate": 4.2113336672471245e-06,
285
+ "loss": 11.9171,
286
+ "step": 35
287
+ },
288
+ {
289
+ "epoch": 0.9078014184397163,
290
+ "grad_norm": 0.22878780961036682,
291
+ "learning_rate": 2.7091379149682685e-06,
292
+ "loss": 11.9206,
293
+ "step": 36
294
+ },
295
+ {
296
+ "epoch": 0.9330181245074862,
297
+ "grad_norm": 0.26924818754196167,
298
+ "learning_rate": 1.5299867030334814e-06,
299
+ "loss": 11.9155,
300
+ "step": 37
301
+ },
302
+ {
303
+ "epoch": 0.9582348305752562,
304
+ "grad_norm": 0.25205621123313904,
305
+ "learning_rate": 6.819348298638839e-07,
306
+ "loss": 11.916,
307
+ "step": 38
308
+ },
309
+ {
310
+ "epoch": 0.983451536643026,
311
+ "grad_norm": 0.25344499945640564,
312
+ "learning_rate": 1.7077534966650766e-07,
313
+ "loss": 11.9166,
314
+ "step": 39
315
+ },
316
+ {
317
+ "epoch": 1.0110323089046493,
318
+ "grad_norm": 0.42802441120147705,
319
+ "learning_rate": 0.0,
320
+ "loss": 17.6584,
321
+ "step": 40
322
+ },
323
+ {
324
+ "epoch": 1.0110323089046493,
325
+ "eval_loss": 11.915095329284668,
326
+ "eval_runtime": 2.9117,
327
+ "eval_samples_per_second": 92.042,
328
+ "eval_steps_per_second": 23.011,
329
+ "step": 40
330
  }
331
  ],
332
  "logging_steps": 1,
 
341
  "should_evaluate": false,
342
  "should_log": false,
343
  "should_save": true,
344
+ "should_training_stop": true
345
  },
346
  "attributes": {}
347
  }
348
  },
349
+ "total_flos": 649990955335680.0,
350
  "train_batch_size": 2,
351
  "trial_name": null,
352
  "trial_params": null