bitsoko commited on
Commit
588ce04
·
verified ·
1 Parent(s): c86992c

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eeb743d2fa9bf331d379631b7ef0b8c68e7774e97a71b38840d8975b5d99fc4a
3
  size 119597408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a94fac9e323c5fbebfe886066a1743f12550b20b593b0fca7b99a7ea370dd990
3
  size 119597408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e07fbe7b1b0f4cd92cbd073a4e31b6a5e3a92d3e33cecc19de5d91d7616d06d3
3
  size 60387220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:512cf826ff85e68fffbcaa4e5178926ef6fafa0892b557e9f00ceaa6914d5726
3
  size 60387220
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d9b92fa45e44153a07435b55d5d3d71bf302bfd9a3b3f027d29977fda525192
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01d248dd4a61bc85f685999c311d599535fb345e57d98e4ccefd0735e695f9bd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efef9367937c87bc0bc848b4e2adc863b119783b8c6fd51e9157e257a682f5a9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92364bde4f303f913bfd963d4c88c3c3a056f83b75924575f2b397f85dd5b7b0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.03374440149702436,
5
  "eval_steps": 20,
6
- "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -412,6 +412,51 @@
412
  "eval_samples_per_second": 5.069,
413
  "eval_steps_per_second": 0.659,
414
  "step": 540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
  }
416
  ],
417
  "logging_steps": 20,
@@ -419,7 +464,7 @@
419
  "num_input_tokens_seen": 0,
420
  "num_train_epochs": 3,
421
  "save_steps": 50,
422
- "total_flos": 3.5324564332032e+16,
423
  "train_batch_size": 2,
424
  "trial_name": null,
425
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03681207436039021,
5
  "eval_steps": 20,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
412
  "eval_samples_per_second": 5.069,
413
  "eval_steps_per_second": 0.659,
414
  "step": 540
415
+ },
416
+ {
417
+ "epoch": 0.034357936069697524,
418
+ "grad_norm": 0.623745322227478,
419
+ "learning_rate": 0.00019772968992882274,
420
+ "loss": 1.5216,
421
+ "step": 560
422
+ },
423
+ {
424
+ "epoch": 0.034357936069697524,
425
+ "eval_loss": 1.3708571195602417,
426
+ "eval_runtime": 19.6215,
427
+ "eval_samples_per_second": 5.096,
428
+ "eval_steps_per_second": 0.663,
429
+ "step": 560
430
+ },
431
+ {
432
+ "epoch": 0.03558500521504387,
433
+ "grad_norm": 0.7613083124160767,
434
+ "learning_rate": 0.0001976478769532848,
435
+ "loss": 1.4677,
436
+ "step": 580
437
+ },
438
+ {
439
+ "epoch": 0.03558500521504387,
440
+ "eval_loss": 1.3612563610076904,
441
+ "eval_runtime": 19.7315,
442
+ "eval_samples_per_second": 5.068,
443
+ "eval_steps_per_second": 0.659,
444
+ "step": 580
445
+ },
446
+ {
447
+ "epoch": 0.03681207436039021,
448
+ "grad_norm": 0.6662244200706482,
449
+ "learning_rate": 0.00019756606397774689,
450
+ "loss": 1.4336,
451
+ "step": 600
452
+ },
453
+ {
454
+ "epoch": 0.03681207436039021,
455
+ "eval_loss": 1.3519067764282227,
456
+ "eval_runtime": 19.6923,
457
+ "eval_samples_per_second": 5.078,
458
+ "eval_steps_per_second": 0.66,
459
+ "step": 600
460
  }
461
  ],
462
  "logging_steps": 20,
 
464
  "num_input_tokens_seen": 0,
465
  "num_train_epochs": 3,
466
  "save_steps": 50,
467
+ "total_flos": 3.892256129028096e+16,
468
  "train_batch_size": 2,
469
  "trial_name": null,
470
  "trial_params": null