narekvslife commited on
Commit
0e3ef6e
1 Parent(s): 21a65c8

5wiothfs 5.6

Browse files
Files changed (4) hide show
  1. adapter_model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +2 -152
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01526417c034d18e2cfcdf730aea5425a2578aed652ba30c516159f411ca16a5
3
  size 18900240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cd287bba5fde8fc753916b1d432058128613e0bcfc071316ad6378d8a26508e
3
  size 18900240
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc6743779b6a95aa39cfa21eb4180047acc5099f52d15563917ad0be3f66b7b5
3
  size 37910458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74d28b77057f2ce067f3506293bc8da387c9a97b206dccc88b87d9cb314b5e32
3
  size 37910458
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3838d83f7726f0c86c7838cad1ec5e5096386a76705b0e3ebf79216139e847d0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1dcb1c05f8406763f478190e5dde325c77e4a5fb69a17c7b42d79ed2e579e6e
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.795204913192394,
5
  "eval_steps": 2000,
6
- "global_step": 5700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8439,156 +8439,6 @@
8439
  "rewards/margins": 0.27365249395370483,
8440
  "rewards/rejected": 0.5168629884719849,
8441
  "step": 5600
8442
- },
8443
- {
8444
- "epoch": 1.7668595724577774,
8445
- "grad_norm": 2.46875,
8446
- "learning_rate": 3.4943208086663183e-06,
8447
- "logits/chosen": -0.4847659170627594,
8448
- "logits/rejected": -0.33793026208877563,
8449
- "logps/chosen": -197.34933471679688,
8450
- "logps/rejected": -174.9829559326172,
8451
- "loss": 0.646,
8452
- "rewards/accuracies": 0.6000000238418579,
8453
- "rewards/chosen": 0.7014733552932739,
8454
- "rewards/margins": 0.12839707732200623,
8455
- "rewards/rejected": 0.5730762481689453,
8456
- "step": 5610
8457
- },
8458
- {
8459
- "epoch": 1.7700090547616236,
8460
- "grad_norm": 3.578125,
8461
- "learning_rate": 3.4894823245512986e-06,
8462
- "logits/chosen": -0.506749153137207,
8463
- "logits/rejected": -0.45556968450546265,
8464
- "logps/chosen": -197.71902465820312,
8465
- "logps/rejected": -186.50241088867188,
8466
- "loss": 0.6803,
8467
- "rewards/accuracies": 0.550000011920929,
8468
- "rewards/chosen": 0.7117626070976257,
8469
- "rewards/margins": 0.06737571209669113,
8470
- "rewards/rejected": 0.644386887550354,
8471
- "step": 5620
8472
- },
8473
- {
8474
- "epoch": 1.7731585370654699,
8475
- "grad_norm": 2.578125,
8476
- "learning_rate": 3.484639441627448e-06,
8477
- "logits/chosen": -0.5070594549179077,
8478
- "logits/rejected": -0.3329693078994751,
8479
- "logps/chosen": -220.60986328125,
8480
- "logps/rejected": -183.98416137695312,
8481
- "loss": 0.6042,
8482
- "rewards/accuracies": 0.737500011920929,
8483
- "rewards/chosen": 0.7875211834907532,
8484
- "rewards/margins": 0.2286391705274582,
8485
- "rewards/rejected": 0.5588821172714233,
8486
- "step": 5630
8487
- },
8488
- {
8489
- "epoch": 1.7763080193693161,
8490
- "grad_norm": 2.546875,
8491
- "learning_rate": 3.4797921814241196e-06,
8492
- "logits/chosen": -0.48938584327697754,
8493
- "logits/rejected": -0.37643399834632874,
8494
- "logps/chosen": -194.7692413330078,
8495
- "logps/rejected": -171.0836944580078,
8496
- "loss": 0.6345,
8497
- "rewards/accuracies": 0.637499988079071,
8498
- "rewards/chosen": 0.7208179235458374,
8499
- "rewards/margins": 0.17952939867973328,
8500
- "rewards/rejected": 0.5412884950637817,
8501
- "step": 5640
8502
- },
8503
- {
8504
- "epoch": 1.7794575016731624,
8505
- "grad_norm": 2.71875,
8506
- "learning_rate": 3.4749405654901297e-06,
8507
- "logits/chosen": -0.5021311044692993,
8508
- "logits/rejected": -0.3592470586299896,
8509
- "logps/chosen": -203.04798889160156,
8510
- "logps/rejected": -170.28916931152344,
8511
- "loss": 0.6468,
8512
- "rewards/accuracies": 0.5874999761581421,
8513
- "rewards/chosen": 0.7304830551147461,
8514
- "rewards/margins": 0.14200101792812347,
8515
- "rewards/rejected": 0.5884820222854614,
8516
- "step": 5650
8517
- },
8518
- {
8519
- "epoch": 1.7826069839770087,
8520
- "grad_norm": 1.8125,
8521
- "learning_rate": 3.470084615393655e-06,
8522
- "logits/chosen": -0.5099314451217651,
8523
- "logits/rejected": -0.36777496337890625,
8524
- "logps/chosen": -188.96286010742188,
8525
- "logps/rejected": -158.13487243652344,
8526
- "loss": 0.5854,
8527
- "rewards/accuracies": 0.8374999761581421,
8528
- "rewards/chosen": 0.7638787031173706,
8529
- "rewards/margins": 0.25533777475357056,
8530
- "rewards/rejected": 0.5085408687591553,
8531
- "step": 5660
8532
- },
8533
- {
8534
- "epoch": 1.785756466280855,
8535
- "grad_norm": 2.71875,
8536
- "learning_rate": 3.4652243527221423e-06,
8537
- "logits/chosen": -0.4756031632423401,
8538
- "logits/rejected": -0.44920986890792847,
8539
- "logps/chosen": -185.1388397216797,
8540
- "logps/rejected": -172.55137634277344,
8541
- "loss": 0.6583,
8542
- "rewards/accuracies": 0.5375000238418579,
8543
- "rewards/chosen": 0.7094627618789673,
8544
- "rewards/margins": 0.13025884330272675,
8545
- "rewards/rejected": 0.5792039036750793,
8546
- "step": 5670
8547
- },
8548
- {
8549
- "epoch": 1.7889059485847014,
8550
- "grad_norm": 3.171875,
8551
- "learning_rate": 3.460359799082209e-06,
8552
- "logits/chosen": -0.47689515352249146,
8553
- "logits/rejected": -0.34241801500320435,
8554
- "logps/chosen": -204.8109588623047,
8555
- "logps/rejected": -166.13514709472656,
8556
- "loss": 0.615,
8557
- "rewards/accuracies": 0.7250000238418579,
8558
- "rewards/chosen": 0.7595565915107727,
8559
- "rewards/margins": 0.21238622069358826,
8560
- "rewards/rejected": 0.5471702814102173,
8561
- "step": 5680
8562
- },
8563
- {
8564
- "epoch": 1.7920554308885477,
8565
- "grad_norm": 3.765625,
8566
- "learning_rate": 3.4554909760995485e-06,
8567
- "logits/chosen": -0.5418170094490051,
8568
- "logits/rejected": -0.41362690925598145,
8569
- "logps/chosen": -187.98043823242188,
8570
- "logps/rejected": -167.5854034423828,
8571
- "loss": 0.6338,
8572
- "rewards/accuracies": 0.699999988079071,
8573
- "rewards/chosen": 0.7204712629318237,
8574
- "rewards/margins": 0.1737706959247589,
8575
- "rewards/rejected": 0.5467005968093872,
8576
- "step": 5690
8577
- },
8578
- {
8579
- "epoch": 1.795204913192394,
8580
- "grad_norm": 3.3125,
8581
- "learning_rate": 3.450617905418834e-06,
8582
- "logits/chosen": -0.442087322473526,
8583
- "logits/rejected": -0.3480719029903412,
8584
- "logps/chosen": -205.0787353515625,
8585
- "logps/rejected": -176.585693359375,
8586
- "loss": 0.6078,
8587
- "rewards/accuracies": 0.699999988079071,
8588
- "rewards/chosen": 0.7968889474868774,
8589
- "rewards/margins": 0.2236328423023224,
8590
- "rewards/rejected": 0.5732561349868774,
8591
- "step": 5700
8592
  }
8593
  ],
8594
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.763710090153931,
5
  "eval_steps": 2000,
6
+ "global_step": 5600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8439
  "rewards/margins": 0.27365249395370483,
8440
  "rewards/rejected": 0.5168629884719849,
8441
  "step": 5600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8442
  }
8443
  ],
8444
  "logging_steps": 10,