cat-searcher commited on
Commit
7eb7574
·
verified ·
1 Parent(s): 2998396

Training in progress, epoch 20, checkpoint

Browse files
Files changed (30) hide show
  1. last-checkpoint/global_step3941/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step3941/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step3941/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step3941/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step3941/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step3941/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step3941/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step3941/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step3941/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step3941/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step3941/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step3941/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step3941/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step3941/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step3941/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step3941/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +889 -4
  30. last-checkpoint/training_args.bin +1 -1
last-checkpoint/global_step3941/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94b4dcfd71cab30d4f84d893b6fd0d1a6e2aa4b3a61d6c79e5479ce39b939d94
3
+ size 2506176112
last-checkpoint/global_step3941/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f954c542f98537a9ed834facf341309119d8d1fba9b2ce996edb91caa0185864
3
+ size 2506176112
last-checkpoint/global_step3941/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b2c3b968bcad4a7393ddf5c5f119cdaac63191903276c1eb81d6c451c5d6752
3
+ size 2506176112
last-checkpoint/global_step3941/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bee82ebc9e3eee25d3b0df80e977d29d2fb8d31a2c15e34a592ca2e3b23b894
3
+ size 2506176112
last-checkpoint/global_step3941/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb059eb071e2c280b3205e611e8d35da6f4b0074daa865e2753febf692019be7
3
+ size 2506176112
last-checkpoint/global_step3941/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88b041f26328228bde99957a111e977af41d76464988508861df2fe943631d98
3
+ size 2506176112
last-checkpoint/global_step3941/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d86002b3f908ebc201f5fbcb089f747f46dc062a490b815c3292beeddbb2eba
3
+ size 2506176112
last-checkpoint/global_step3941/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eff104ff6a3804962fc8e81a4a25a68e1ddd23153106604a90d90db75ed7cc23
3
+ size 2506176112
last-checkpoint/global_step3941/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cac99f40ef6a29819b6bfee413e07a661faee0937b8d1c61c805da871e0945af
3
+ size 85570
last-checkpoint/global_step3941/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5701cbcfc2c6647d84113e3d13ee7cc172b6847210fec719fae584f61e7c3a0
3
+ size 85506
last-checkpoint/global_step3941/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24bf30e27f127c8845ba54ae9788e2ca7ed1ccbcbba555d599dab83769417d07
3
+ size 85506
last-checkpoint/global_step3941/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c29e366a2f7623d367aac2611b78199725dcfee1510c37b39fb5eff91c9287
3
+ size 85506
last-checkpoint/global_step3941/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14f446b32558ac815c52ce36a2ece940f8c9860c1b6666d3cf16bcfedebda918
3
+ size 85506
last-checkpoint/global_step3941/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f63d5c8898ac42433f7add352d994d40bab319ea7c6840b8756db83a707506f3
3
+ size 85506
last-checkpoint/global_step3941/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c9e1729073f017d91bafd979b84d7c37c6570c15926abbd20aeeb138871a251
3
+ size 85506
last-checkpoint/global_step3941/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d37159e66416476690c422874e2523ac0312d3e34aa742482bd47edb58aebc
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3357
 
1
+ global_step3941
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3578035697ae915bf8ed319e400346be0b8f4d900849a6d07f0ff9b4c3b1711c
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acdc933f849726cf9f6626987c11cea6a7678ccfd80dc2d0483bec3e83e0c3a7
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a652aa698a378ecc5fb4aaee9480c493c9a62c60f4f96b74c9d6698fa2aa8d33
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b099839b0f1c5f0d5fb18759ba02999fd787c83dda2d02d2ba36941c44a246b
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a0c9979566a5d89cb3c766336548670ec6f2291deba1b7ab1764c12d3187b24
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f8f81f81a21cdf7d6a9d642f67427b821e87802eb5a4ca4a5c038480e9a673f
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03e36a570d6158fc25d1cf5d9f8f450fc64c5a7683330277f89ff76d5f2fc6cd
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c967add958d82a4ef6285ec19e0e5a560f82b1ea3488260d2539d200cb5f199b
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4f619cbef4b74f1680d667c8788285a602392e63bdf3760ef3a59ec8864d483
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a5e30f320a5f3e4a4760af275c4a2bb9fdcfc6c661a3757da3b20b4c29a87fc
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fc037fba93ace1bf7ce01b1a5f7d785698d47b4cc2cedf2300bbf7a41ebf05c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3411b849d13fdd9e668c8cc5fafa8402de13fd4a2d5761eaf33f2791780721e4
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ab728c2461d6d1c64f04d7cbfdfcbfa7bd7ad0ef6e19d52458501ee81b27128
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1765109deada93bbc4f9e38b5c3c6bbd9afe9a12839bbbd72227ca7ff46467b1
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27530e653ebf5997ae3159cdcde264607e6a6f86b7e3c7a1b3a1e8301cd43d03
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6147f79a889f2906a81eea5c06f72acf722f674546fd6be8432d3d70a04392a8
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1fddaeb1257697bd7c0101abf1ab23f2925d0d9165cd8bddfbd22f8444db2b7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f5815a19b60dc1bb303c451b515a4ec523fa0faf1c87c69b03ad4032c4baa2
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:942af3734a320fe12a3205a47ca1cdc7d1f0996bfde86c020a35545ccd2fd418
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:395da160bbc37f8049700e0d82d501cbd1effc732719599c0cd143db7892910e
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:605054ed043f8ba321ca13100ae25afc2296eb67de83d5027f6f7f6d891a4130
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dd8e459c5f044a7182784611ab9373a71c50f4a912290b4cfdcb531e69b1b46
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.99746835443038,
5
  "eval_steps": 100,
6
- "global_step": 3357,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5047,12 +5047,897 @@
5047
  "rewards/margins": 0.5159622430801392,
5048
  "rewards/rejected": -0.33039581775665283,
5049
  "step": 3350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5050
  }
5051
  ],
5052
  "logging_steps": 10,
5053
- "max_steps": 3546,
5054
  "num_input_tokens_seen": 0,
5055
- "num_train_epochs": 18,
5056
  "save_steps": 500,
5057
  "stateful_callbacks": {
5058
  "TrainerControl": {
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
  "eval_steps": 100,
6
+ "global_step": 3941,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5047
  "rewards/margins": 0.5159622430801392,
5048
  "rewards/rejected": -0.33039581775665283,
5049
  "step": 3350
5050
+ },
5051
+ {
5052
+ "epoch": 17.0126582278481,
5053
+ "grad_norm": 218651.56901322177,
5054
+ "learning_rate": 2.9144468818552176e-08,
5055
+ "logits/chosen": 0.41573429107666016,
5056
+ "logits/rejected": 1.103547215461731,
5057
+ "logps/chosen": -37.6799201965332,
5058
+ "logps/rejected": -569.5391235351562,
5059
+ "loss": 14029.3563,
5060
+ "rewards/accuracies": 0.987500011920929,
5061
+ "rewards/chosen": 0.18778078258037567,
5062
+ "rewards/margins": 0.5316546559333801,
5063
+ "rewards/rejected": -0.34387388825416565,
5064
+ "step": 3360
5065
+ },
5066
+ {
5067
+ "epoch": 17.063291139240505,
5068
+ "grad_norm": 236719.0916690887,
5069
+ "learning_rate": 2.7577561892823564e-08,
5070
+ "logits/chosen": -0.09267449378967285,
5071
+ "logits/rejected": 0.3535307049751282,
5072
+ "logps/chosen": -43.02147674560547,
5073
+ "logps/rejected": -571.3306884765625,
5074
+ "loss": 14216.225,
5075
+ "rewards/accuracies": 0.987500011920929,
5076
+ "rewards/chosen": 0.18815621733665466,
5077
+ "rewards/margins": 0.52230304479599,
5078
+ "rewards/rejected": -0.33414679765701294,
5079
+ "step": 3370
5080
+ },
5081
+ {
5082
+ "epoch": 17.11392405063291,
5083
+ "grad_norm": 151995.97062770248,
5084
+ "learning_rate": 2.6010654967094953e-08,
5085
+ "logits/chosen": 1.3600900173187256,
5086
+ "logits/rejected": 0.45606088638305664,
5087
+ "logps/chosen": -33.07421112060547,
5088
+ "logps/rejected": -574.2869262695312,
5089
+ "loss": 14569.3875,
5090
+ "rewards/accuracies": 1.0,
5091
+ "rewards/chosen": 0.1843741536140442,
5092
+ "rewards/margins": 0.533474862575531,
5093
+ "rewards/rejected": -0.3491007089614868,
5094
+ "step": 3380
5095
+ },
5096
+ {
5097
+ "epoch": 17.164556962025316,
5098
+ "grad_norm": 229039.39535517112,
5099
+ "learning_rate": 2.4443748041366342e-08,
5100
+ "logits/chosen": 0.012326288037002087,
5101
+ "logits/rejected": -0.24337856471538544,
5102
+ "logps/chosen": -48.85834503173828,
5103
+ "logps/rejected": -591.9976806640625,
5104
+ "loss": 15141.0031,
5105
+ "rewards/accuracies": 1.0,
5106
+ "rewards/chosen": 0.19536466896533966,
5107
+ "rewards/margins": 0.5443064570426941,
5108
+ "rewards/rejected": -0.34894177317619324,
5109
+ "step": 3390
5110
+ },
5111
+ {
5112
+ "epoch": 17.21518987341772,
5113
+ "grad_norm": 224579.22425486994,
5114
+ "learning_rate": 2.2876841115637728e-08,
5115
+ "logits/chosen": -0.07731113582849503,
5116
+ "logits/rejected": 0.8038260340690613,
5117
+ "logps/chosen": -42.96089172363281,
5118
+ "logps/rejected": -587.9930419921875,
5119
+ "loss": 13962.9406,
5120
+ "rewards/accuracies": 0.987500011920929,
5121
+ "rewards/chosen": 0.18969421088695526,
5122
+ "rewards/margins": 0.5471119284629822,
5123
+ "rewards/rejected": -0.3574177622795105,
5124
+ "step": 3400
5125
+ },
5126
+ {
5127
+ "epoch": 17.265822784810126,
5128
+ "grad_norm": 194108.45632178357,
5129
+ "learning_rate": 2.1309934189909117e-08,
5130
+ "logits/chosen": -1.735790491104126,
5131
+ "logits/rejected": -0.8417277336120605,
5132
+ "logps/chosen": -40.28795623779297,
5133
+ "logps/rejected": -577.9163208007812,
5134
+ "loss": 14457.5328,
5135
+ "rewards/accuracies": 0.987500011920929,
5136
+ "rewards/chosen": 0.19212636351585388,
5137
+ "rewards/margins": 0.54021155834198,
5138
+ "rewards/rejected": -0.3480851650238037,
5139
+ "step": 3410
5140
+ },
5141
+ {
5142
+ "epoch": 17.31645569620253,
5143
+ "grad_norm": 323871.0912725565,
5144
+ "learning_rate": 1.9743027264180506e-08,
5145
+ "logits/chosen": 1.0423898696899414,
5146
+ "logits/rejected": 1.1823880672454834,
5147
+ "logps/chosen": -50.077327728271484,
5148
+ "logps/rejected": -565.8704223632812,
5149
+ "loss": 14191.8531,
5150
+ "rewards/accuracies": 0.987500011920929,
5151
+ "rewards/chosen": 0.18371161818504333,
5152
+ "rewards/margins": 0.5181502103805542,
5153
+ "rewards/rejected": -0.3344385623931885,
5154
+ "step": 3420
5155
+ },
5156
+ {
5157
+ "epoch": 17.367088607594937,
5158
+ "grad_norm": 207973.13380554292,
5159
+ "learning_rate": 1.8176120338451895e-08,
5160
+ "logits/chosen": -0.8037737011909485,
5161
+ "logits/rejected": -0.8005819320678711,
5162
+ "logps/chosen": -45.626670837402344,
5163
+ "logps/rejected": -544.7116088867188,
5164
+ "loss": 14114.0906,
5165
+ "rewards/accuracies": 0.9750000238418579,
5166
+ "rewards/chosen": 0.1760983169078827,
5167
+ "rewards/margins": 0.5017568469047546,
5168
+ "rewards/rejected": -0.32565850019454956,
5169
+ "step": 3430
5170
+ },
5171
+ {
5172
+ "epoch": 17.417721518987342,
5173
+ "grad_norm": 191156.31750064602,
5174
+ "learning_rate": 1.6609213412723284e-08,
5175
+ "logits/chosen": 1.2277637720108032,
5176
+ "logits/rejected": 0.573845386505127,
5177
+ "logps/chosen": -50.492279052734375,
5178
+ "logps/rejected": -586.3282470703125,
5179
+ "loss": 13957.7594,
5180
+ "rewards/accuracies": 0.9750000238418579,
5181
+ "rewards/chosen": 0.19206462800502777,
5182
+ "rewards/margins": 0.5328875184059143,
5183
+ "rewards/rejected": -0.34082287549972534,
5184
+ "step": 3440
5185
+ },
5186
+ {
5187
+ "epoch": 17.468354430379748,
5188
+ "grad_norm": 262791.84599779843,
5189
+ "learning_rate": 1.5042306486994673e-08,
5190
+ "logits/chosen": 0.29228338599205017,
5191
+ "logits/rejected": 0.9747223854064941,
5192
+ "logps/chosen": -37.640201568603516,
5193
+ "logps/rejected": -557.47119140625,
5194
+ "loss": 14478.4906,
5195
+ "rewards/accuracies": 1.0,
5196
+ "rewards/chosen": 0.18818514049053192,
5197
+ "rewards/margins": 0.5214470624923706,
5198
+ "rewards/rejected": -0.3332619369029999,
5199
+ "step": 3450
5200
+ },
5201
+ {
5202
+ "epoch": 17.518987341772153,
5203
+ "grad_norm": 227441.5548714142,
5204
+ "learning_rate": 1.347539956126606e-08,
5205
+ "logits/chosen": -0.060483645647764206,
5206
+ "logits/rejected": 0.41309136152267456,
5207
+ "logps/chosen": -46.32054138183594,
5208
+ "logps/rejected": -588.6563720703125,
5209
+ "loss": 14804.9047,
5210
+ "rewards/accuracies": 0.987500011920929,
5211
+ "rewards/chosen": 0.19712397456169128,
5212
+ "rewards/margins": 0.5437620878219604,
5213
+ "rewards/rejected": -0.34663814306259155,
5214
+ "step": 3460
5215
+ },
5216
+ {
5217
+ "epoch": 17.569620253164558,
5218
+ "grad_norm": 378558.8588589865,
5219
+ "learning_rate": 1.1908492635537449e-08,
5220
+ "logits/chosen": 2.0075535774230957,
5221
+ "logits/rejected": 2.772726058959961,
5222
+ "logps/chosen": -46.09113693237305,
5223
+ "logps/rejected": -582.0597534179688,
5224
+ "loss": 14645.9562,
5225
+ "rewards/accuracies": 0.987500011920929,
5226
+ "rewards/chosen": 0.18593838810920715,
5227
+ "rewards/margins": 0.5331605076789856,
5228
+ "rewards/rejected": -0.34722214937210083,
5229
+ "step": 3470
5230
+ },
5231
+ {
5232
+ "epoch": 17.620253164556964,
5233
+ "grad_norm": 263891.6462573049,
5234
+ "learning_rate": 1.0341585709808836e-08,
5235
+ "logits/chosen": 0.273967444896698,
5236
+ "logits/rejected": 1.9021276235580444,
5237
+ "logps/chosen": -34.29851531982422,
5238
+ "logps/rejected": -567.2389526367188,
5239
+ "loss": 15085.2313,
5240
+ "rewards/accuracies": 1.0,
5241
+ "rewards/chosen": 0.18956169486045837,
5242
+ "rewards/margins": 0.5355597734451294,
5243
+ "rewards/rejected": -0.3459981083869934,
5244
+ "step": 3480
5245
+ },
5246
+ {
5247
+ "epoch": 17.67088607594937,
5248
+ "grad_norm": 276267.9285814808,
5249
+ "learning_rate": 8.774678784080225e-09,
5250
+ "logits/chosen": -0.02632077969610691,
5251
+ "logits/rejected": 0.4594387114048004,
5252
+ "logps/chosen": -45.098960876464844,
5253
+ "logps/rejected": -568.720947265625,
5254
+ "loss": 13750.4469,
5255
+ "rewards/accuracies": 0.987500011920929,
5256
+ "rewards/chosen": 0.19129987061023712,
5257
+ "rewards/margins": 0.5272942781448364,
5258
+ "rewards/rejected": -0.33599433302879333,
5259
+ "step": 3490
5260
+ },
5261
+ {
5262
+ "epoch": 17.72151898734177,
5263
+ "grad_norm": 156087.69298121333,
5264
+ "learning_rate": 7.207771858351613e-09,
5265
+ "logits/chosen": 0.04748225212097168,
5266
+ "logits/rejected": 0.4610685408115387,
5267
+ "logps/chosen": -49.872169494628906,
5268
+ "logps/rejected": -603.3367919921875,
5269
+ "loss": 13778.1469,
5270
+ "rewards/accuracies": 0.987500011920929,
5271
+ "rewards/chosen": 0.19287212193012238,
5272
+ "rewards/margins": 0.5504390001296997,
5273
+ "rewards/rejected": -0.3575669229030609,
5274
+ "step": 3500
5275
+ },
5276
+ {
5277
+ "epoch": 17.772151898734176,
5278
+ "grad_norm": 209667.9634643516,
5279
+ "learning_rate": 5.6408649326230014e-09,
5280
+ "logits/chosen": 1.4883615970611572,
5281
+ "logits/rejected": 2.2038960456848145,
5282
+ "logps/chosen": -46.18961715698242,
5283
+ "logps/rejected": -575.4703369140625,
5284
+ "loss": 13653.9672,
5285
+ "rewards/accuracies": 1.0,
5286
+ "rewards/chosen": 0.18806029856204987,
5287
+ "rewards/margins": 0.5310976505279541,
5288
+ "rewards/rejected": -0.34303733706474304,
5289
+ "step": 3510
5290
+ },
5291
+ {
5292
+ "epoch": 17.82278481012658,
5293
+ "grad_norm": 222056.5820151951,
5294
+ "learning_rate": 4.07395800689439e-09,
5295
+ "logits/chosen": -0.582931637763977,
5296
+ "logits/rejected": -0.23906604945659637,
5297
+ "logps/chosen": -60.795921325683594,
5298
+ "logps/rejected": -590.2525024414062,
5299
+ "loss": 14149.5938,
5300
+ "rewards/accuracies": 0.949999988079071,
5301
+ "rewards/chosen": 0.19504059851169586,
5302
+ "rewards/margins": 0.5360020399093628,
5303
+ "rewards/rejected": -0.34096142649650574,
5304
+ "step": 3520
5305
+ },
5306
+ {
5307
+ "epoch": 17.873417721518987,
5308
+ "grad_norm": 213324.38139465638,
5309
+ "learning_rate": 2.5070510811657785e-09,
5310
+ "logits/chosen": -0.3791787028312683,
5311
+ "logits/rejected": 0.26259681582450867,
5312
+ "logps/chosen": -48.315147399902344,
5313
+ "logps/rejected": -579.376220703125,
5314
+ "loss": 14028.4,
5315
+ "rewards/accuracies": 1.0,
5316
+ "rewards/chosen": 0.19789119064807892,
5317
+ "rewards/margins": 0.5311988592147827,
5318
+ "rewards/rejected": -0.333307683467865,
5319
+ "step": 3530
5320
+ },
5321
+ {
5322
+ "epoch": 17.924050632911392,
5323
+ "grad_norm": 207695.40556695752,
5324
+ "learning_rate": 9.40144155437167e-10,
5325
+ "logits/chosen": 2.0871522426605225,
5326
+ "logits/rejected": 2.378633975982666,
5327
+ "logps/chosen": -36.07915115356445,
5328
+ "logps/rejected": -560.524169921875,
5329
+ "loss": 13942.7234,
5330
+ "rewards/accuracies": 0.987500011920929,
5331
+ "rewards/chosen": 0.18208447098731995,
5332
+ "rewards/margins": 0.5249064564704895,
5333
+ "rewards/rejected": -0.34282201528549194,
5334
+ "step": 3540
5335
+ },
5336
+ {
5337
+ "epoch": 18.020253164556962,
5338
+ "grad_norm": 633377.3531549113,
5339
+ "learning_rate": 2.774992165465371e-07,
5340
+ "logits/chosen": 0.778042197227478,
5341
+ "logits/rejected": 0.4570779800415039,
5342
+ "logps/chosen": -30.87795639038086,
5343
+ "logps/rejected": -562.3123168945312,
5344
+ "loss": 14823.5117,
5345
+ "rewards/accuracies": 1.0,
5346
+ "rewards/chosen": 0.19000156223773956,
5347
+ "rewards/margins": 0.5307614803314209,
5348
+ "rewards/rejected": -0.34075987339019775,
5349
+ "step": 3550
5350
+ },
5351
+ {
5352
+ "epoch": 18.070886075949367,
5353
+ "grad_norm": 536501.4949459385,
5354
+ "learning_rate": 2.767157630836728e-07,
5355
+ "logits/chosen": -1.453107476234436,
5356
+ "logits/rejected": -1.1603299379348755,
5357
+ "logps/chosen": -48.59767532348633,
5358
+ "logps/rejected": -607.5595703125,
5359
+ "loss": 14268.2156,
5360
+ "rewards/accuracies": 1.0,
5361
+ "rewards/chosen": 0.19779345393180847,
5362
+ "rewards/margins": 0.5535213351249695,
5363
+ "rewards/rejected": -0.3557279109954834,
5364
+ "step": 3560
5365
+ },
5366
+ {
5367
+ "epoch": 18.121518987341773,
5368
+ "grad_norm": 613929.4964505757,
5369
+ "learning_rate": 2.7593230962080847e-07,
5370
+ "logits/chosen": -1.0204923152923584,
5371
+ "logits/rejected": -1.006306529045105,
5372
+ "logps/chosen": -40.379703521728516,
5373
+ "logps/rejected": -586.8853759765625,
5374
+ "loss": 14124.1406,
5375
+ "rewards/accuracies": 1.0,
5376
+ "rewards/chosen": 0.18828515708446503,
5377
+ "rewards/margins": 0.5416163206100464,
5378
+ "rewards/rejected": -0.35333114862442017,
5379
+ "step": 3570
5380
+ },
5381
+ {
5382
+ "epoch": 18.172151898734178,
5383
+ "grad_norm": 453188.0208924516,
5384
+ "learning_rate": 2.751488561579442e-07,
5385
+ "logits/chosen": 0.978573203086853,
5386
+ "logits/rejected": 1.6422239542007446,
5387
+ "logps/chosen": -40.75902557373047,
5388
+ "logps/rejected": -571.6940307617188,
5389
+ "loss": 14028.8266,
5390
+ "rewards/accuracies": 1.0,
5391
+ "rewards/chosen": 0.19008655846118927,
5392
+ "rewards/margins": 0.5349593758583069,
5393
+ "rewards/rejected": -0.34487277269363403,
5394
+ "step": 3580
5395
+ },
5396
+ {
5397
+ "epoch": 18.222784810126583,
5398
+ "grad_norm": 470617.1864493106,
5399
+ "learning_rate": 2.743654026950799e-07,
5400
+ "logits/chosen": 0.612755298614502,
5401
+ "logits/rejected": 1.586531639099121,
5402
+ "logps/chosen": -47.43413162231445,
5403
+ "logps/rejected": -567.2514038085938,
5404
+ "loss": 14305.0953,
5405
+ "rewards/accuracies": 0.987500011920929,
5406
+ "rewards/chosen": 0.18671520054340363,
5407
+ "rewards/margins": 0.5188931226730347,
5408
+ "rewards/rejected": -0.33217787742614746,
5409
+ "step": 3590
5410
+ },
5411
+ {
5412
+ "epoch": 18.27341772151899,
5413
+ "grad_norm": 568328.2123455897,
5414
+ "learning_rate": 2.7358194923221564e-07,
5415
+ "logits/chosen": 2.5831315517425537,
5416
+ "logits/rejected": 2.3743977546691895,
5417
+ "logps/chosen": -36.72047805786133,
5418
+ "logps/rejected": -561.4580688476562,
5419
+ "loss": 14931.7812,
5420
+ "rewards/accuracies": 0.987500011920929,
5421
+ "rewards/chosen": 0.18578791618347168,
5422
+ "rewards/margins": 0.5219975113868713,
5423
+ "rewards/rejected": -0.33620959520339966,
5424
+ "step": 3600
5425
+ },
5426
+ {
5427
+ "epoch": 18.324050632911394,
5428
+ "grad_norm": 258649.85824251673,
5429
+ "learning_rate": 2.727984957693513e-07,
5430
+ "logits/chosen": -0.6456964612007141,
5431
+ "logits/rejected": 0.10119187831878662,
5432
+ "logps/chosen": -45.66813659667969,
5433
+ "logps/rejected": -584.33984375,
5434
+ "loss": 13962.2891,
5435
+ "rewards/accuracies": 1.0,
5436
+ "rewards/chosen": 0.19075247645378113,
5437
+ "rewards/margins": 0.5430020093917847,
5438
+ "rewards/rejected": -0.35224950313568115,
5439
+ "step": 3610
5440
+ },
5441
+ {
5442
+ "epoch": 18.374683544303796,
5443
+ "grad_norm": 523823.39531677734,
5444
+ "learning_rate": 2.72015042306487e-07,
5445
+ "logits/chosen": -0.1337634027004242,
5446
+ "logits/rejected": 0.3194190561771393,
5447
+ "logps/chosen": -43.2452278137207,
5448
+ "logps/rejected": -576.6324462890625,
5449
+ "loss": 14478.6656,
5450
+ "rewards/accuracies": 0.987500011920929,
5451
+ "rewards/chosen": 0.19283099472522736,
5452
+ "rewards/margins": 0.5422399640083313,
5453
+ "rewards/rejected": -0.34940892457962036,
5454
+ "step": 3620
5455
+ },
5456
+ {
5457
+ "epoch": 18.4253164556962,
5458
+ "grad_norm": 369527.7483340646,
5459
+ "learning_rate": 2.712315888436227e-07,
5460
+ "logits/chosen": -0.5704905390739441,
5461
+ "logits/rejected": -0.24132680892944336,
5462
+ "logps/chosen": -39.81604766845703,
5463
+ "logps/rejected": -579.3060302734375,
5464
+ "loss": 14853.9188,
5465
+ "rewards/accuracies": 1.0,
5466
+ "rewards/chosen": 0.1893097311258316,
5467
+ "rewards/margins": 0.5385677218437195,
5468
+ "rewards/rejected": -0.3492580056190491,
5469
+ "step": 3630
5470
+ },
5471
+ {
5472
+ "epoch": 18.475949367088607,
5473
+ "grad_norm": 487722.91173438437,
5474
+ "learning_rate": 2.704481353807584e-07,
5475
+ "logits/chosen": 0.30203062295913696,
5476
+ "logits/rejected": 1.367623209953308,
5477
+ "logps/chosen": -43.79780578613281,
5478
+ "logps/rejected": -575.3096313476562,
5479
+ "loss": 14337.7125,
5480
+ "rewards/accuracies": 0.9750000238418579,
5481
+ "rewards/chosen": 0.18831488490104675,
5482
+ "rewards/margins": 0.5326144099235535,
5483
+ "rewards/rejected": -0.3442995548248291,
5484
+ "step": 3640
5485
+ },
5486
+ {
5487
+ "epoch": 18.526582278481012,
5488
+ "grad_norm": 769147.1132735502,
5489
+ "learning_rate": 2.6966468191789406e-07,
5490
+ "logits/chosen": 0.5818338990211487,
5491
+ "logits/rejected": 0.8189504742622375,
5492
+ "logps/chosen": -40.80295944213867,
5493
+ "logps/rejected": -569.6201171875,
5494
+ "loss": 14414.5,
5495
+ "rewards/accuracies": 1.0,
5496
+ "rewards/chosen": 0.19092252850532532,
5497
+ "rewards/margins": 0.5284001231193542,
5498
+ "rewards/rejected": -0.3374776244163513,
5499
+ "step": 3650
5500
+ },
5501
+ {
5502
+ "epoch": 18.577215189873417,
5503
+ "grad_norm": 423741.6615039136,
5504
+ "learning_rate": 2.6888122845502977e-07,
5505
+ "logits/chosen": -2.1757419109344482,
5506
+ "logits/rejected": -1.7465986013412476,
5507
+ "logps/chosen": -33.543739318847656,
5508
+ "logps/rejected": -566.9044189453125,
5509
+ "loss": 13990.4406,
5510
+ "rewards/accuracies": 0.987500011920929,
5511
+ "rewards/chosen": 0.18767623603343964,
5512
+ "rewards/margins": 0.5356841683387756,
5513
+ "rewards/rejected": -0.3480078876018524,
5514
+ "step": 3660
5515
+ },
5516
+ {
5517
+ "epoch": 18.627848101265823,
5518
+ "grad_norm": 405282.2937016151,
5519
+ "learning_rate": 2.680977749921655e-07,
5520
+ "logits/chosen": -0.054244786500930786,
5521
+ "logits/rejected": 0.9029023051261902,
5522
+ "logps/chosen": -49.31962966918945,
5523
+ "logps/rejected": -585.48779296875,
5524
+ "loss": 14779.0125,
5525
+ "rewards/accuracies": 1.0,
5526
+ "rewards/chosen": 0.19979842007160187,
5527
+ "rewards/margins": 0.5441454648971558,
5528
+ "rewards/rejected": -0.3443470597267151,
5529
+ "step": 3670
5530
+ },
5531
+ {
5532
+ "epoch": 18.678481012658228,
5533
+ "grad_norm": 468937.7683958159,
5534
+ "learning_rate": 2.673143215293012e-07,
5535
+ "logits/chosen": -0.046643782407045364,
5536
+ "logits/rejected": -0.1421128809452057,
5537
+ "logps/chosen": -40.85643768310547,
5538
+ "logps/rejected": -577.6583862304688,
5539
+ "loss": 14531.35,
5540
+ "rewards/accuracies": 0.987500011920929,
5541
+ "rewards/chosen": 0.195206418633461,
5542
+ "rewards/margins": 0.5361508131027222,
5543
+ "rewards/rejected": -0.3409443199634552,
5544
+ "step": 3680
5545
+ },
5546
+ {
5547
+ "epoch": 18.729113924050633,
5548
+ "grad_norm": 627917.5959141933,
5549
+ "learning_rate": 2.6653086806643683e-07,
5550
+ "logits/chosen": 1.5284700393676758,
5551
+ "logits/rejected": 1.2886362075805664,
5552
+ "logps/chosen": -48.694664001464844,
5553
+ "logps/rejected": -579.7990112304688,
5554
+ "loss": 15195.4844,
5555
+ "rewards/accuracies": 0.987500011920929,
5556
+ "rewards/chosen": 0.1851346641778946,
5557
+ "rewards/margins": 0.531388521194458,
5558
+ "rewards/rejected": -0.346253901720047,
5559
+ "step": 3690
5560
+ },
5561
+ {
5562
+ "epoch": 18.77974683544304,
5563
+ "grad_norm": 511207.857422736,
5564
+ "learning_rate": 2.6574741460357254e-07,
5565
+ "logits/chosen": 0.03928997367620468,
5566
+ "logits/rejected": 0.5418666005134583,
5567
+ "logps/chosen": -50.31745529174805,
5568
+ "logps/rejected": -593.0169677734375,
5569
+ "loss": 14929.1469,
5570
+ "rewards/accuracies": 0.987500011920929,
5571
+ "rewards/chosen": 0.19712677597999573,
5572
+ "rewards/margins": 0.5447811484336853,
5573
+ "rewards/rejected": -0.3476543724536896,
5574
+ "step": 3700
5575
+ },
5576
+ {
5577
+ "epoch": 18.830379746835444,
5578
+ "grad_norm": 568133.4282182837,
5579
+ "learning_rate": 2.6496396114070825e-07,
5580
+ "logits/chosen": -0.7848063707351685,
5581
+ "logits/rejected": -0.8312255144119263,
5582
+ "logps/chosen": -39.726234436035156,
5583
+ "logps/rejected": -566.0286254882812,
5584
+ "loss": 14112.2844,
5585
+ "rewards/accuracies": 0.987500011920929,
5586
+ "rewards/chosen": 0.18878208100795746,
5587
+ "rewards/margins": 0.5250921249389648,
5588
+ "rewards/rejected": -0.3363099992275238,
5589
+ "step": 3710
5590
+ },
5591
+ {
5592
+ "epoch": 18.88101265822785,
5593
+ "grad_norm": 293062.3175283677,
5594
+ "learning_rate": 2.6418050767784395e-07,
5595
+ "logits/chosen": -0.22776488959789276,
5596
+ "logits/rejected": -0.043119143694639206,
5597
+ "logps/chosen": -47.83971405029297,
5598
+ "logps/rejected": -575.6166381835938,
5599
+ "loss": 14345.3813,
5600
+ "rewards/accuracies": 0.987500011920929,
5601
+ "rewards/chosen": 0.19344884157180786,
5602
+ "rewards/margins": 0.5261351466178894,
5603
+ "rewards/rejected": -0.33268633484840393,
5604
+ "step": 3720
5605
+ },
5606
+ {
5607
+ "epoch": 18.931645569620255,
5608
+ "grad_norm": 369584.46121245134,
5609
+ "learning_rate": 2.633970542149796e-07,
5610
+ "logits/chosen": 0.6460098028182983,
5611
+ "logits/rejected": 0.6165057420730591,
5612
+ "logps/chosen": -53.0880126953125,
5613
+ "logps/rejected": -602.0147705078125,
5614
+ "loss": 14143.9609,
5615
+ "rewards/accuracies": 1.0,
5616
+ "rewards/chosen": 0.19915179908275604,
5617
+ "rewards/margins": 0.5483053922653198,
5618
+ "rewards/rejected": -0.3491537272930145,
5619
+ "step": 3730
5620
+ },
5621
+ {
5622
+ "epoch": 18.98227848101266,
5623
+ "grad_norm": 328959.5337312854,
5624
+ "learning_rate": 2.626136007521153e-07,
5625
+ "logits/chosen": 0.25958794355392456,
5626
+ "logits/rejected": 0.5823850631713867,
5627
+ "logps/chosen": -49.16436004638672,
5628
+ "logps/rejected": -584.5070190429688,
5629
+ "loss": 14187.4844,
5630
+ "rewards/accuracies": 0.987500011920929,
5631
+ "rewards/chosen": 0.19965310394763947,
5632
+ "rewards/margins": 0.5432143211364746,
5633
+ "rewards/rejected": -0.34356123208999634,
5634
+ "step": 3740
5635
+ },
5636
+ {
5637
+ "epoch": 19.03291139240506,
5638
+ "grad_norm": 1626740.8696455131,
5639
+ "learning_rate": 2.61830147289251e-07,
5640
+ "logits/chosen": -0.6601130366325378,
5641
+ "logits/rejected": -0.8405634164810181,
5642
+ "logps/chosen": -46.10778045654297,
5643
+ "logps/rejected": -587.3377685546875,
5644
+ "loss": 14051.6469,
5645
+ "rewards/accuracies": 0.987500011920929,
5646
+ "rewards/chosen": 0.1946493685245514,
5647
+ "rewards/margins": 0.54271399974823,
5648
+ "rewards/rejected": -0.34806469082832336,
5649
+ "step": 3750
5650
+ },
5651
+ {
5652
+ "epoch": 19.083544303797467,
5653
+ "grad_norm": 786920.4959477714,
5654
+ "learning_rate": 2.610466938263867e-07,
5655
+ "logits/chosen": 0.280475914478302,
5656
+ "logits/rejected": 1.5355632305145264,
5657
+ "logps/chosen": -40.83550262451172,
5658
+ "logps/rejected": -576.2233276367188,
5659
+ "loss": 14524.6172,
5660
+ "rewards/accuracies": 0.987500011920929,
5661
+ "rewards/chosen": 0.19496676325798035,
5662
+ "rewards/margins": 0.5368971228599548,
5663
+ "rewards/rejected": -0.3419303297996521,
5664
+ "step": 3760
5665
+ },
5666
+ {
5667
+ "epoch": 19.134177215189872,
5668
+ "grad_norm": 670222.9584254185,
5669
+ "learning_rate": 2.602632403635224e-07,
5670
+ "logits/chosen": 1.6073856353759766,
5671
+ "logits/rejected": 2.1679255962371826,
5672
+ "logps/chosen": -48.07741928100586,
5673
+ "logps/rejected": -568.386962890625,
5674
+ "loss": 16064.1922,
5675
+ "rewards/accuracies": 0.987500011920929,
5676
+ "rewards/chosen": 0.19028018414974213,
5677
+ "rewards/margins": 0.5232519507408142,
5678
+ "rewards/rejected": -0.3329717516899109,
5679
+ "step": 3770
5680
+ },
5681
+ {
5682
+ "epoch": 19.184810126582278,
5683
+ "grad_norm": 779401.4265683588,
5684
+ "learning_rate": 2.594797869006581e-07,
5685
+ "logits/chosen": -1.2690767049789429,
5686
+ "logits/rejected": -0.7741214036941528,
5687
+ "logps/chosen": -35.147666931152344,
5688
+ "logps/rejected": -588.05810546875,
5689
+ "loss": 14594.675,
5690
+ "rewards/accuracies": 1.0,
5691
+ "rewards/chosen": 0.20034465193748474,
5692
+ "rewards/margins": 0.5537833571434021,
5693
+ "rewards/rejected": -0.3534386456012726,
5694
+ "step": 3780
5695
+ },
5696
+ {
5697
+ "epoch": 19.235443037974683,
5698
+ "grad_norm": 677896.0436831466,
5699
+ "learning_rate": 2.586963334377938e-07,
5700
+ "logits/chosen": 0.381600558757782,
5701
+ "logits/rejected": 0.3627360761165619,
5702
+ "logps/chosen": -47.129329681396484,
5703
+ "logps/rejected": -583.4297485351562,
5704
+ "loss": 14673.1125,
5705
+ "rewards/accuracies": 1.0,
5706
+ "rewards/chosen": 0.19634023308753967,
5707
+ "rewards/margins": 0.5409786105155945,
5708
+ "rewards/rejected": -0.3446383774280548,
5709
+ "step": 3790
5710
+ },
5711
+ {
5712
+ "epoch": 19.28607594936709,
5713
+ "grad_norm": 1708590.8406628803,
5714
+ "learning_rate": 2.579128799749295e-07,
5715
+ "logits/chosen": -0.6463128924369812,
5716
+ "logits/rejected": -0.1966671198606491,
5717
+ "logps/chosen": -51.58148193359375,
5718
+ "logps/rejected": -571.8802490234375,
5719
+ "loss": 14855.8094,
5720
+ "rewards/accuracies": 0.987500011920929,
5721
+ "rewards/chosen": 0.1947019398212433,
5722
+ "rewards/margins": 0.5256696343421936,
5723
+ "rewards/rejected": -0.3309677243232727,
5724
+ "step": 3800
5725
+ },
5726
+ {
5727
+ "epoch": 19.336708860759494,
5728
+ "grad_norm": 906394.5199246205,
5729
+ "learning_rate": 2.5712942651206515e-07,
5730
+ "logits/chosen": 0.6537224054336548,
5731
+ "logits/rejected": 1.356911301612854,
5732
+ "logps/chosen": -37.791786193847656,
5733
+ "logps/rejected": -541.84912109375,
5734
+ "loss": 14494.675,
5735
+ "rewards/accuracies": 0.9750000238418579,
5736
+ "rewards/chosen": 0.18946874141693115,
5737
+ "rewards/margins": 0.5098165273666382,
5738
+ "rewards/rejected": -0.3203478455543518,
5739
+ "step": 3810
5740
+ },
5741
+ {
5742
+ "epoch": 19.3873417721519,
5743
+ "grad_norm": 1248788.3894635146,
5744
+ "learning_rate": 2.5634597304920085e-07,
5745
+ "logits/chosen": -1.4148962497711182,
5746
+ "logits/rejected": -0.616938591003418,
5747
+ "logps/chosen": -39.15003204345703,
5748
+ "logps/rejected": -567.9779052734375,
5749
+ "loss": 14511.9828,
5750
+ "rewards/accuracies": 1.0,
5751
+ "rewards/chosen": 0.19468382000923157,
5752
+ "rewards/margins": 0.5306459665298462,
5753
+ "rewards/rejected": -0.33596211671829224,
5754
+ "step": 3820
5755
+ },
5756
+ {
5757
+ "epoch": 19.437974683544304,
5758
+ "grad_norm": 699507.4776687805,
5759
+ "learning_rate": 2.5556251958633656e-07,
5760
+ "logits/chosen": -0.786666214466095,
5761
+ "logits/rejected": -0.8524150848388672,
5762
+ "logps/chosen": -37.31165313720703,
5763
+ "logps/rejected": -559.5811767578125,
5764
+ "loss": 15226.8953,
5765
+ "rewards/accuracies": 0.987500011920929,
5766
+ "rewards/chosen": 0.18610945343971252,
5767
+ "rewards/margins": 0.5259476900100708,
5768
+ "rewards/rejected": -0.3398382067680359,
5769
+ "step": 3830
5770
+ },
5771
+ {
5772
+ "epoch": 19.48860759493671,
5773
+ "grad_norm": 750946.845865734,
5774
+ "learning_rate": 2.5477906612347227e-07,
5775
+ "logits/chosen": -0.5914249420166016,
5776
+ "logits/rejected": -0.1790940761566162,
5777
+ "logps/chosen": -41.875919342041016,
5778
+ "logps/rejected": -580.2433471679688,
5779
+ "loss": 15077.0078,
5780
+ "rewards/accuracies": 1.0,
5781
+ "rewards/chosen": 0.1914350688457489,
5782
+ "rewards/margins": 0.5402361154556274,
5783
+ "rewards/rejected": -0.34880098700523376,
5784
+ "step": 3840
5785
+ },
5786
+ {
5787
+ "epoch": 19.539240506329115,
5788
+ "grad_norm": 1438213.362152031,
5789
+ "learning_rate": 2.539956126606079e-07,
5790
+ "logits/chosen": -1.4764426946640015,
5791
+ "logits/rejected": -1.0852867364883423,
5792
+ "logps/chosen": -45.64619064331055,
5793
+ "logps/rejected": -574.2334594726562,
5794
+ "loss": 15001.6922,
5795
+ "rewards/accuracies": 0.9750000238418579,
5796
+ "rewards/chosen": 0.19227740168571472,
5797
+ "rewards/margins": 0.527544379234314,
5798
+ "rewards/rejected": -0.33526697754859924,
5799
+ "step": 3850
5800
+ },
5801
+ {
5802
+ "epoch": 19.58987341772152,
5803
+ "grad_norm": 1015656.6585732017,
5804
+ "learning_rate": 2.532121591977436e-07,
5805
+ "logits/chosen": 0.0265532024204731,
5806
+ "logits/rejected": 0.4305901527404785,
5807
+ "logps/chosen": -40.221275329589844,
5808
+ "logps/rejected": -580.7332763671875,
5809
+ "loss": 15005.4062,
5810
+ "rewards/accuracies": 0.987500011920929,
5811
+ "rewards/chosen": 0.1900371015071869,
5812
+ "rewards/margins": 0.5353468656539917,
5813
+ "rewards/rejected": -0.3453097939491272,
5814
+ "step": 3860
5815
+ },
5816
+ {
5817
+ "epoch": 19.640506329113926,
5818
+ "grad_norm": 1480021.6334817603,
5819
+ "learning_rate": 2.5242870573487933e-07,
5820
+ "logits/chosen": -2.3115265369415283,
5821
+ "logits/rejected": -1.9450628757476807,
5822
+ "logps/chosen": -43.31880187988281,
5823
+ "logps/rejected": -592.5906372070312,
5824
+ "loss": 14681.3906,
5825
+ "rewards/accuracies": 1.0,
5826
+ "rewards/chosen": 0.20574085414409637,
5827
+ "rewards/margins": 0.5489095449447632,
5828
+ "rewards/rejected": -0.3431686758995056,
5829
+ "step": 3870
5830
+ },
5831
+ {
5832
+ "epoch": 19.691139240506327,
5833
+ "grad_norm": 652464.7916313735,
5834
+ "learning_rate": 2.5164525227201504e-07,
5835
+ "logits/chosen": 0.63951176404953,
5836
+ "logits/rejected": 1.3804535865783691,
5837
+ "logps/chosen": -33.52408981323242,
5838
+ "logps/rejected": -556.9231567382812,
5839
+ "loss": 15124.1328,
5840
+ "rewards/accuracies": 1.0,
5841
+ "rewards/chosen": 0.1876874566078186,
5842
+ "rewards/margins": 0.5311328172683716,
5843
+ "rewards/rejected": -0.343445360660553,
5844
+ "step": 3880
5845
+ },
5846
+ {
5847
+ "epoch": 19.741772151898733,
5848
+ "grad_norm": 697435.0328174214,
5849
+ "learning_rate": 2.508617988091507e-07,
5850
+ "logits/chosen": -1.7422069311141968,
5851
+ "logits/rejected": -1.3413903713226318,
5852
+ "logps/chosen": -42.224788665771484,
5853
+ "logps/rejected": -584.3877563476562,
5854
+ "loss": 15307.875,
5855
+ "rewards/accuracies": 1.0,
5856
+ "rewards/chosen": 0.2013184279203415,
5857
+ "rewards/margins": 0.5388418436050415,
5858
+ "rewards/rejected": -0.3375234305858612,
5859
+ "step": 3890
5860
+ },
5861
+ {
5862
+ "epoch": 19.792405063291138,
5863
+ "grad_norm": 680395.3386900029,
5864
+ "learning_rate": 2.500783453462864e-07,
5865
+ "logits/chosen": 0.6034026741981506,
5866
+ "logits/rejected": 1.1066893339157104,
5867
+ "logps/chosen": -39.37774658203125,
5868
+ "logps/rejected": -585.9308471679688,
5869
+ "loss": 15434.6031,
5870
+ "rewards/accuracies": 1.0,
5871
+ "rewards/chosen": 0.20041151344776154,
5872
+ "rewards/margins": 0.545585036277771,
5873
+ "rewards/rejected": -0.34517353773117065,
5874
+ "step": 3900
5875
+ },
5876
+ {
5877
+ "epoch": 19.843037974683543,
5878
+ "grad_norm": 1036480.9072027011,
5879
+ "learning_rate": 2.492948918834221e-07,
5880
+ "logits/chosen": -0.37202078104019165,
5881
+ "logits/rejected": -0.6633853316307068,
5882
+ "logps/chosen": -50.845218658447266,
5883
+ "logps/rejected": -565.1788330078125,
5884
+ "loss": 14732.9813,
5885
+ "rewards/accuracies": 0.949999988079071,
5886
+ "rewards/chosen": 0.1860923022031784,
5887
+ "rewards/margins": 0.5141801834106445,
5888
+ "rewards/rejected": -0.32808783650398254,
5889
+ "step": 3910
5890
+ },
5891
+ {
5892
+ "epoch": 19.89367088607595,
5893
+ "grad_norm": 960769.0916438915,
5894
+ "learning_rate": 2.485114384205578e-07,
5895
+ "logits/chosen": -2.4451048374176025,
5896
+ "logits/rejected": -1.8602224588394165,
5897
+ "logps/chosen": -49.44445037841797,
5898
+ "logps/rejected": -588.972900390625,
5899
+ "loss": 14954.0281,
5900
+ "rewards/accuracies": 0.987500011920929,
5901
+ "rewards/chosen": 0.20443923771381378,
5902
+ "rewards/margins": 0.5393208265304565,
5903
+ "rewards/rejected": -0.33488157391548157,
5904
+ "step": 3920
5905
+ },
5906
+ {
5907
+ "epoch": 19.944303797468354,
5908
+ "grad_norm": 637831.6626185304,
5909
+ "learning_rate": 2.477279849576935e-07,
5910
+ "logits/chosen": -1.1525195837020874,
5911
+ "logits/rejected": -0.6883751153945923,
5912
+ "logps/chosen": -37.11662673950195,
5913
+ "logps/rejected": -576.5172729492188,
5914
+ "loss": 14910.0938,
5915
+ "rewards/accuracies": 0.987500011920929,
5916
+ "rewards/chosen": 0.20432814955711365,
5917
+ "rewards/margins": 0.5393826961517334,
5918
+ "rewards/rejected": -0.33505457639694214,
5919
+ "step": 3930
5920
+ },
5921
+ {
5922
+ "epoch": 19.99493670886076,
5923
+ "grad_norm": 926025.3002487151,
5924
+ "learning_rate": 2.4694453149482917e-07,
5925
+ "logits/chosen": 0.10631950944662094,
5926
+ "logits/rejected": 0.8977824449539185,
5927
+ "logps/chosen": -43.12347412109375,
5928
+ "logps/rejected": -561.4703369140625,
5929
+ "loss": 15041.1219,
5930
+ "rewards/accuracies": 0.9624999761581421,
5931
+ "rewards/chosen": 0.18227019906044006,
5932
+ "rewards/margins": 0.5218333005905151,
5933
+ "rewards/rejected": -0.3395631015300751,
5934
+ "step": 3940
5935
  }
5936
  ],
5937
  "logging_steps": 10,
5938
+ "max_steps": 7092,
5939
  "num_input_tokens_seen": 0,
5940
+ "num_train_epochs": 36,
5941
  "save_steps": 500,
5942
  "stateful_callbacks": {
5943
  "TrainerControl": {
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a753652c11ab0ea9bd892fb4f6a4c90dd38862fe7afe3d497f10eddbd97dad2f
3
  size 6584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32798bab9af49bccdac87f37019ce9013d05d2970814716fe899a3a90e4f5fcb
3
  size 6584