Rakhman16 commited on
Commit
ad537bd
·
verified ·
1 Parent(s): 90835e1

Training in progress, step 2404, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20fd6b0054d1f056bdd1f068136faf6ae936a9917ce75d20eb97a5588d22cf17
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87f85bed44e9318e852ba7f17efe6282052cb7b32f2aa95c64b2e71e72a3a8f6
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:978241794f0765cf1904aaf145ce5b4c824548d741ec93437d45849535a4ab04
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a97536a6cd849dbc37c17bc14c1751f38c23bab08204bdfbfe2b1a75da08ad1
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e79bcfaf683538279e39272f16342dfc788f3ded86099a7f67e3c0cbc2e642c8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ad5e4b3f643cc8277d6ec1988fa3ef711691975814d2464a77f58b0a264633
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab6c69dd439a3fb69dcb9250bb4485f8f30f907b01e32d0bd8204147e7413181
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f209df7f5c086a9893a7b6f5e2441e84040cb50d6f1d1fedbaf8d0eb7427609e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.288575142621994,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-2000",
4
- "epoch": 3.3250207813798838,
5
  "eval_steps": 100,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -447,6 +447,94 @@
447
  "eval_samples_per_second": 12.873,
448
  "eval_steps_per_second": 1.616,
449
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  }
451
  ],
452
  "logging_steps": 50,
@@ -461,12 +549,12 @@
461
  "should_evaluate": false,
462
  "should_log": false,
463
  "should_save": true,
464
- "should_training_stop": false
465
  },
466
  "attributes": {}
467
  }
468
  },
469
- "total_flos": 1.948482562424832e+16,
470
  "train_batch_size": 8,
471
  "trial_name": null,
472
  "trial_params": null
 
1
  {
2
  "best_metric": 0.288575142621994,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-2000",
4
+ "epoch": 3.99667497921862,
5
  "eval_steps": 100,
6
+ "global_step": 2404,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
447
  "eval_samples_per_second": 12.873,
448
  "eval_steps_per_second": 1.616,
449
  "step": 2000
450
+ },
451
+ {
452
+ "epoch": 3.408146300914381,
453
+ "grad_norm": 48414.92578125,
454
+ "learning_rate": 7.362728785357738e-06,
455
+ "loss": 0.3053,
456
+ "step": 2050
457
+ },
458
+ {
459
+ "epoch": 3.491271820448878,
460
+ "grad_norm": 56748.72265625,
461
+ "learning_rate": 6.32279534109817e-06,
462
+ "loss": 0.3253,
463
+ "step": 2100
464
+ },
465
+ {
466
+ "epoch": 3.491271820448878,
467
+ "eval_loss": 0.28712254762649536,
468
+ "eval_runtime": 38.3593,
469
+ "eval_samples_per_second": 12.878,
470
+ "eval_steps_per_second": 1.616,
471
+ "step": 2100
472
+ },
473
+ {
474
+ "epoch": 3.574397339983375,
475
+ "grad_norm": 52447.25390625,
476
+ "learning_rate": 5.282861896838603e-06,
477
+ "loss": 0.3188,
478
+ "step": 2150
479
+ },
480
+ {
481
+ "epoch": 3.657522859517872,
482
+ "grad_norm": 47730.953125,
483
+ "learning_rate": 4.242928452579035e-06,
484
+ "loss": 0.3041,
485
+ "step": 2200
486
+ },
487
+ {
488
+ "epoch": 3.657522859517872,
489
+ "eval_loss": 0.2862567901611328,
490
+ "eval_runtime": 38.2735,
491
+ "eval_samples_per_second": 12.907,
492
+ "eval_steps_per_second": 1.62,
493
+ "step": 2200
494
+ },
495
+ {
496
+ "epoch": 3.7406483790523692,
497
+ "grad_norm": 48164.21484375,
498
+ "learning_rate": 3.2029950083194676e-06,
499
+ "loss": 0.3013,
500
+ "step": 2250
501
+ },
502
+ {
503
+ "epoch": 3.8237738985868663,
504
+ "grad_norm": 44376.41015625,
505
+ "learning_rate": 2.1630615640599005e-06,
506
+ "loss": 0.3166,
507
+ "step": 2300
508
+ },
509
+ {
510
+ "epoch": 3.8237738985868663,
511
+ "eval_loss": 0.2858003079891205,
512
+ "eval_runtime": 38.3193,
513
+ "eval_samples_per_second": 12.892,
514
+ "eval_steps_per_second": 1.618,
515
+ "step": 2300
516
+ },
517
+ {
518
+ "epoch": 3.9068994181213634,
519
+ "grad_norm": 53910.06640625,
520
+ "learning_rate": 1.1231281198003328e-06,
521
+ "loss": 0.3034,
522
+ "step": 2350
523
+ },
524
+ {
525
+ "epoch": 3.9900249376558605,
526
+ "grad_norm": 36512.5546875,
527
+ "learning_rate": 8.319467554076539e-08,
528
+ "loss": 0.2983,
529
+ "step": 2400
530
+ },
531
+ {
532
+ "epoch": 3.9900249376558605,
533
+ "eval_loss": 0.2859017550945282,
534
+ "eval_runtime": 38.2723,
535
+ "eval_samples_per_second": 12.908,
536
+ "eval_steps_per_second": 1.62,
537
+ "step": 2400
538
  }
539
  ],
540
  "logging_steps": 50,
 
549
  "should_evaluate": false,
550
  "should_log": false,
551
  "should_save": true,
552
+ "should_training_stop": true
553
  },
554
  "attributes": {}
555
  }
556
  },
557
+ "total_flos": 2.342112942882816e+16,
558
  "train_batch_size": 8,
559
  "trial_name": null,
560
  "trial_params": null