YL95 commited on
Commit
a85bb5d
1 Parent(s): ccae0fa

training state at step 35

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.41379310344827586,
5
  "eval_steps": 1,
6
- "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -442,6 +442,81 @@
442
  "eval_samples_per_second": 1.144,
443
  "eval_steps_per_second": 0.572,
444
  "step": 29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  }
446
  ],
447
  "logging_steps": 1,
@@ -461,7 +536,7 @@
461
  "attributes": {}
462
  }
463
  },
464
- "total_flos": 3.786199396619059e+16,
465
  "train_batch_size": 2,
466
  "trial_name": null,
467
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4827586206896552,
5
  "eval_steps": 1,
6
+ "global_step": 35,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
442
  "eval_samples_per_second": 1.144,
443
  "eval_steps_per_second": 0.572,
444
  "step": 29
445
+ },
446
+ {
447
+ "epoch": 0.41379310344827586,
448
+ "grad_norm": 2.320786237716675,
449
+ "learning_rate": 4.9790502530660635e-05,
450
+ "loss": 1.4691,
451
+ "step": 30
452
+ },
453
+ {
454
+ "epoch": 0.41379310344827586,
455
+ "eval_loss": 0.9836109280586243,
456
+ "eval_runtime": 16.9043,
457
+ "eval_samples_per_second": 1.183,
458
+ "eval_steps_per_second": 0.592,
459
+ "step": 30
460
+ },
461
+ {
462
+ "epoch": 0.42758620689655175,
463
+ "grad_norm": 2.1385531425476074,
464
+ "learning_rate": 4.9734953280908904e-05,
465
+ "loss": 1.4696,
466
+ "step": 31
467
+ },
468
+ {
469
+ "epoch": 0.42758620689655175,
470
+ "eval_loss": 0.976610541343689,
471
+ "eval_runtime": 17.3486,
472
+ "eval_samples_per_second": 1.153,
473
+ "eval_steps_per_second": 0.576,
474
+ "step": 31
475
+ },
476
+ {
477
+ "epoch": 0.4413793103448276,
478
+ "grad_norm": 2.2254769802093506,
479
+ "learning_rate": 4.967291771834727e-05,
480
+ "loss": 1.531,
481
+ "step": 32
482
+ },
483
+ {
484
+ "epoch": 0.4413793103448276,
485
+ "eval_loss": 0.9718761444091797,
486
+ "eval_runtime": 17.5285,
487
+ "eval_samples_per_second": 1.141,
488
+ "eval_steps_per_second": 0.57,
489
+ "step": 32
490
+ },
491
+ {
492
+ "epoch": 0.45517241379310347,
493
+ "grad_norm": 2.34843373298645,
494
+ "learning_rate": 4.960441211072686e-05,
495
+ "loss": 1.5484,
496
+ "step": 33
497
+ },
498
+ {
499
+ "epoch": 0.45517241379310347,
500
+ "eval_loss": 0.9682589769363403,
501
+ "eval_runtime": 17.5952,
502
+ "eval_samples_per_second": 1.137,
503
+ "eval_steps_per_second": 0.568,
504
+ "step": 33
505
+ },
506
+ {
507
+ "epoch": 0.4689655172413793,
508
+ "grad_norm": 1.9610539674758911,
509
+ "learning_rate": 4.9529454422455976e-05,
510
+ "loss": 1.3204,
511
+ "step": 34
512
+ },
513
+ {
514
+ "epoch": 0.4689655172413793,
515
+ "eval_loss": 0.9610344767570496,
516
+ "eval_runtime": 17.6076,
517
+ "eval_samples_per_second": 1.136,
518
+ "eval_steps_per_second": 0.568,
519
+ "step": 34
520
  }
521
  ],
522
  "logging_steps": 1,
 
536
  "attributes": {}
537
  }
538
  },
539
+ "total_flos": 4.450920354589901e+16,
540
  "train_batch_size": 2,
541
  "trial_name": null,
542
  "trial_params": null