YL95 commited on
Commit
1074d60
1 Parent(s): b8561ab

training state at step 40

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4827586206896552,
5
  "eval_steps": 1,
6
- "global_step": 35,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -517,6 +517,81 @@
517
  "eval_samples_per_second": 1.136,
518
  "eval_steps_per_second": 0.568,
519
  "step": 34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
  }
521
  ],
522
  "logging_steps": 1,
@@ -536,7 +611,7 @@
536
  "attributes": {}
537
  }
538
  },
539
- "total_flos": 4.450920354589901e+16,
540
  "train_batch_size": 2,
541
  "trial_name": null,
542
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5517241379310345,
5
  "eval_steps": 1,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
517
  "eval_samples_per_second": 1.136,
518
  "eval_steps_per_second": 0.568,
519
  "step": 34
520
+ },
521
+ {
522
+ "epoch": 0.4827586206896552,
523
+ "grad_norm": 2.2027809619903564,
524
+ "learning_rate": 4.944806430988927e-05,
525
+ "loss": 1.3801,
526
+ "step": 35
527
+ },
528
+ {
529
+ "epoch": 0.4827586206896552,
530
+ "eval_loss": 0.9546059370040894,
531
+ "eval_runtime": 17.5811,
532
+ "eval_samples_per_second": 1.138,
533
+ "eval_steps_per_second": 0.569,
534
+ "step": 35
535
+ },
536
+ {
537
+ "epoch": 0.496551724137931,
538
+ "grad_norm": 2.3457250595092773,
539
+ "learning_rate": 4.936026311617316e-05,
540
+ "loss": 1.4401,
541
+ "step": 36
542
+ },
543
+ {
544
+ "epoch": 0.496551724137931,
545
+ "eval_loss": 0.9482511281967163,
546
+ "eval_runtime": 17.8351,
547
+ "eval_samples_per_second": 1.121,
548
+ "eval_steps_per_second": 0.561,
549
+ "step": 36
550
+ },
551
+ {
552
+ "epoch": 0.5103448275862069,
553
+ "grad_norm": 2.161039352416992,
554
+ "learning_rate": 4.926607386564898e-05,
555
+ "loss": 1.4067,
556
+ "step": 37
557
+ },
558
+ {
559
+ "epoch": 0.5103448275862069,
560
+ "eval_loss": 0.9448164701461792,
561
+ "eval_runtime": 17.6014,
562
+ "eval_samples_per_second": 1.136,
563
+ "eval_steps_per_second": 0.568,
564
+ "step": 37
565
+ },
566
+ {
567
+ "epoch": 0.5241379310344828,
568
+ "grad_norm": 2.1683900356292725,
569
+ "learning_rate": 4.916552125781528e-05,
570
+ "loss": 1.3806,
571
+ "step": 38
572
+ },
573
+ {
574
+ "epoch": 0.5241379310344828,
575
+ "eval_loss": 0.9402996897697449,
576
+ "eval_runtime": 17.6524,
577
+ "eval_samples_per_second": 1.133,
578
+ "eval_steps_per_second": 0.566,
579
+ "step": 38
580
+ },
581
+ {
582
+ "epoch": 0.5379310344827586,
583
+ "grad_norm": 2.2735962867736816,
584
+ "learning_rate": 4.9058631660850765e-05,
585
+ "loss": 1.4937,
586
+ "step": 39
587
+ },
588
+ {
589
+ "epoch": 0.5379310344827586,
590
+ "eval_loss": 0.9291872978210449,
591
+ "eval_runtime": 17.5838,
592
+ "eval_samples_per_second": 1.137,
593
+ "eval_steps_per_second": 0.569,
594
+ "step": 39
595
  }
596
  ],
597
  "logging_steps": 1,
 
611
  "attributes": {}
612
  }
613
  },
614
+ "total_flos": 5.112519916024627e+16,
615
  "train_batch_size": 2,
616
  "trial_name": null,
617
  "trial_params": null