cat-searcher commited on
Commit
a9b8d85
·
verified ·
1 Parent(s): c8170ef

Training in progress, epoch 14, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step2962/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step2962/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step2962/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step2962/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step2962/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step2962/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step2962/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step2962/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step2962/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step2962/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step2962/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step2962/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step2962/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step2962/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step2962/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step2962/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step2962/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3471e29a40024ecfc28a8eea3bb1eb8c173c0609f355df8a706d782fd72a6f2
3
+ size 2506176112
last-checkpoint/global_step2962/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f3ab56eb73e7d800b1ce4430ccac77be0b6e08cde748e3316b3d44702f578fb
3
+ size 2506176112
last-checkpoint/global_step2962/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af0097a70a7247c363820d83118beb965b4feaa08ce4536fc3c02a548ca6b380
3
+ size 2506176112
last-checkpoint/global_step2962/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84e7119e680ce910838814ddf94b554d606d5e7476354a860c1a8d55374d1439
3
+ size 2506176112
last-checkpoint/global_step2962/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea4f33951cbb414f84b9a39e35b2a8be1a9c097ad68119aacd329beb669d4db
3
+ size 2506176112
last-checkpoint/global_step2962/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e1284901d00cb10e9afe3b47732279ca766e0d6683ca3402c6f06872de5e4e
3
+ size 2506176112
last-checkpoint/global_step2962/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3e426eb952650947dc7a89121f573fd79dc8d3051854c02d2171607d393ff4f
3
+ size 2506176112
last-checkpoint/global_step2962/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ec593fe6aee5d849160b36774007bf4c2a4f123200ad6bb931cbdf26823786c
3
+ size 2506176112
last-checkpoint/global_step2962/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8894ca4b20727ca9a3b8d9b04e6070339729765a18dda5530f295e94d25e4bf
3
+ size 85570
last-checkpoint/global_step2962/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c9eea5b5486c31f10c150a7429b6b01fcec5184835f886ed7ac9ff9bc9676e9
3
+ size 85506
last-checkpoint/global_step2962/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c520a6b9475cbcd8422c5021f1acb91e83f01f855c5bc86271aaadd93e8e321a
3
+ size 85506
last-checkpoint/global_step2962/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f22b61ce07985a14d7dd5d28da2a4c67e70cc37037742abd6945b69df55ba82
3
+ size 85506
last-checkpoint/global_step2962/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:828a5dd2c1f30bdd3116927dd889f8388e7a86fab99eb91e177b163c838854e4
3
+ size 85506
last-checkpoint/global_step2962/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef7d47d77deed99c61384314c2aa706dc6305ef596b77eda2482f1e5a49a2cb
3
+ size 85506
last-checkpoint/global_step2962/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:947db6371c681606d6fe68f817cc28c7de90e30a193cafef7dee08dfe850711f
3
+ size 85506
last-checkpoint/global_step2962/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:106a9948b226f2c26e9bd883317e2721530b2b7a29d30d49c7b252a8f554fe92
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2765
 
1
+ global_step2962
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51554b1b141cb737fe919c7a7d90b054dc54778551653b359782ad641e140b5d
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6ab34130d90aebe29868ecf9b47a15403e74bd3aa5e09f06dc3ea9032f8089b
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39ccf095d82d77e6ad3b265d9fdc49a86ab3fad18daa92e729265d0f5f3bbb71
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf91018b2355dca95814934115beac2e49e42607748ab6a28986a106363bbcd
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7891ffa7c7dae99113aa986d67278b52b8c57db55001dc3547a61f24569a34ee
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07d994b317c4df888a1a1aabc0c532e81f1fa34c18c8313cb2feadca3bb37194
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8b92875cb04deec367605433847d1bda444b178b643d2da7ed9aaf738d232b4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f5b05860618aa49c7f5d8c366d6ee73cf8b3b0d0adc17d9313b72621630d0aa
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9f5f3338a05e325b5408a1cd0b6f5e5b10fad05fe479d63f44bec4cf18107d6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7262faf861e984775b4fd85bc76a11b0b8b04037690e8a08a58cf9ff5328a042
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1be749fea477a3867d44010631937e0d8f071ca5f9614f9795c92c7fa68833a6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9479cad91150e2e266d17eb95fe678579a770f6df6b53496cf72067b186b094d
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbc4a5ea4532c621f4c8e9891117b2e597a7f005001e8b4f2a1b4da8c82bf964
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:435cb6cf559e0ce3fe0d4582cac16ea40b48b7a64589952402a4c399cafbfc00
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:480f9fe7dd71b54d915b46162e34b780ba2467d5542115cc809dbca60b394c0e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f51001b0d8dc5792180c3a9705ccbfa66b61d46d7639afb6f7abf409629ed74f
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c11d982dcd813e82c2d97a5491ce9624cff2dd22e8655ea617ccef1fc1474470
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1e87084f11088fdce293e1fbbb05e35f5c7385b00e2f9ba195bf61cb36f757d
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73494fac3a001cba7cedd097b97f028d4c1d136ee6709214b0a7fe305e5b9089
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d32e9bdd65145ae509e6c6ef4f6ea9d842f94a34c34a0d7d2ab6c248d3f2121
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:136693ea21ecf0b59fde813d184b14a037ef0bca92ae910a6f73169e6198ccb5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ef29c4eabe559fffbf188b61164c94ef6c3807ccd683770ebd49ca46d0f6823
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.0,
5
  "eval_steps": 100,
6
- "global_step": 2765,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4162,6 +4162,306 @@
4162
  "rewards/margins": 0.5153056383132935,
4163
  "rewards/rejected": -0.3264002799987793,
4164
  "step": 2760
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4165
  }
4166
  ],
4167
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.99746835443038,
5
  "eval_steps": 100,
6
+ "global_step": 2962,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4162
  "rewards/margins": 0.5153056383132935,
4163
  "rewards/rejected": -0.3264002799987793,
4164
  "step": 2760
4165
+ },
4166
+ {
4167
+ "epoch": 14.025316455696203,
4168
+ "grad_norm": 454762.9481647176,
4169
+ "learning_rate": 1.2159197743654026e-07,
4170
+ "logits/chosen": 2.4223504066467285,
4171
+ "logits/rejected": 3.487738847732544,
4172
+ "logps/chosen": -44.93278503417969,
4173
+ "logps/rejected": -561.3870849609375,
4174
+ "loss": 16557.4125,
4175
+ "rewards/accuracies": 1.0,
4176
+ "rewards/chosen": 0.1806286722421646,
4177
+ "rewards/margins": 0.5195534229278564,
4178
+ "rewards/rejected": -0.33892473578453064,
4179
+ "step": 2770
4180
+ },
4181
+ {
4182
+ "epoch": 14.075949367088608,
4183
+ "grad_norm": 487680.4985531969,
4184
+ "learning_rate": 1.2002507051081164e-07,
4185
+ "logits/chosen": 1.9585473537445068,
4186
+ "logits/rejected": 2.446890354156494,
4187
+ "logps/chosen": -39.52117919921875,
4188
+ "logps/rejected": -561.9512939453125,
4189
+ "loss": 15203.5906,
4190
+ "rewards/accuracies": 1.0,
4191
+ "rewards/chosen": 0.1834731251001358,
4192
+ "rewards/margins": 0.5265246629714966,
4193
+ "rewards/rejected": -0.343051552772522,
4194
+ "step": 2780
4195
+ },
4196
+ {
4197
+ "epoch": 14.126582278481013,
4198
+ "grad_norm": 335633.29006652284,
4199
+ "learning_rate": 1.1845816358508304e-07,
4200
+ "logits/chosen": -0.2361418753862381,
4201
+ "logits/rejected": 0.4229121804237366,
4202
+ "logps/chosen": -56.944580078125,
4203
+ "logps/rejected": -581.4995727539062,
4204
+ "loss": 14980.4906,
4205
+ "rewards/accuracies": 1.0,
4206
+ "rewards/chosen": 0.18962779641151428,
4207
+ "rewards/margins": 0.5268105268478394,
4208
+ "rewards/rejected": -0.3371827304363251,
4209
+ "step": 2790
4210
+ },
4211
+ {
4212
+ "epoch": 14.177215189873417,
4213
+ "grad_norm": 433336.52566667914,
4214
+ "learning_rate": 1.1689125665935443e-07,
4215
+ "logits/chosen": -0.8853734135627747,
4216
+ "logits/rejected": 0.24162235856056213,
4217
+ "logps/chosen": -49.96304702758789,
4218
+ "logps/rejected": -587.9956665039062,
4219
+ "loss": 15952.2594,
4220
+ "rewards/accuracies": 1.0,
4221
+ "rewards/chosen": 0.1883043497800827,
4222
+ "rewards/margins": 0.5334208607673645,
4223
+ "rewards/rejected": -0.345116525888443,
4224
+ "step": 2800
4225
+ },
4226
+ {
4227
+ "epoch": 14.227848101265822,
4228
+ "grad_norm": 352832.2810093542,
4229
+ "learning_rate": 1.1532434973362581e-07,
4230
+ "logits/chosen": -0.9270970225334167,
4231
+ "logits/rejected": -0.8106321096420288,
4232
+ "logps/chosen": -50.61150360107422,
4233
+ "logps/rejected": -579.4258422851562,
4234
+ "loss": 15482.3031,
4235
+ "rewards/accuracies": 1.0,
4236
+ "rewards/chosen": 0.18148374557495117,
4237
+ "rewards/margins": 0.5241626501083374,
4238
+ "rewards/rejected": -0.34267887473106384,
4239
+ "step": 2810
4240
+ },
4241
+ {
4242
+ "epoch": 14.278481012658228,
4243
+ "grad_norm": 518734.4787371263,
4244
+ "learning_rate": 1.137574428078972e-07,
4245
+ "logits/chosen": 2.115744113922119,
4246
+ "logits/rejected": 2.9750027656555176,
4247
+ "logps/chosen": -41.601097106933594,
4248
+ "logps/rejected": -573.6159057617188,
4249
+ "loss": 15787.4719,
4250
+ "rewards/accuracies": 0.987500011920929,
4251
+ "rewards/chosen": 0.18657180666923523,
4252
+ "rewards/margins": 0.5376033186912537,
4253
+ "rewards/rejected": -0.35103151202201843,
4254
+ "step": 2820
4255
+ },
4256
+ {
4257
+ "epoch": 14.329113924050633,
4258
+ "grad_norm": 637771.2756103254,
4259
+ "learning_rate": 1.1219053588216858e-07,
4260
+ "logits/chosen": -0.09557388722896576,
4261
+ "logits/rejected": -0.5708149671554565,
4262
+ "logps/chosen": -44.071807861328125,
4263
+ "logps/rejected": -585.6417236328125,
4264
+ "loss": 15660.4813,
4265
+ "rewards/accuracies": 1.0,
4266
+ "rewards/chosen": 0.18580812215805054,
4267
+ "rewards/margins": 0.5347784757614136,
4268
+ "rewards/rejected": -0.34897032380104065,
4269
+ "step": 2830
4270
+ },
4271
+ {
4272
+ "epoch": 14.379746835443038,
4273
+ "grad_norm": 469592.5817335632,
4274
+ "learning_rate": 1.1062362895643998e-07,
4275
+ "logits/chosen": 0.14405778050422668,
4276
+ "logits/rejected": 0.6720622181892395,
4277
+ "logps/chosen": -45.77620315551758,
4278
+ "logps/rejected": -562.7021484375,
4279
+ "loss": 15265.0797,
4280
+ "rewards/accuracies": 0.987500011920929,
4281
+ "rewards/chosen": 0.18696969747543335,
4282
+ "rewards/margins": 0.5200961828231812,
4283
+ "rewards/rejected": -0.3331265151500702,
4284
+ "step": 2840
4285
+ },
4286
+ {
4287
+ "epoch": 14.430379746835444,
4288
+ "grad_norm": 381405.89470487926,
4289
+ "learning_rate": 1.0905672203071137e-07,
4290
+ "logits/chosen": -0.46474942564964294,
4291
+ "logits/rejected": -0.6803582906723022,
4292
+ "logps/chosen": -43.475257873535156,
4293
+ "logps/rejected": -578.9302978515625,
4294
+ "loss": 15502.7,
4295
+ "rewards/accuracies": 1.0,
4296
+ "rewards/chosen": 0.18612739443778992,
4297
+ "rewards/margins": 0.5332227945327759,
4298
+ "rewards/rejected": -0.34709542989730835,
4299
+ "step": 2850
4300
+ },
4301
+ {
4302
+ "epoch": 14.481012658227849,
4303
+ "grad_norm": 389034.05049605225,
4304
+ "learning_rate": 1.0748981510498275e-07,
4305
+ "logits/chosen": 0.192867711186409,
4306
+ "logits/rejected": 0.04235720634460449,
4307
+ "logps/chosen": -45.57283020019531,
4308
+ "logps/rejected": -573.7398071289062,
4309
+ "loss": 16059.1625,
4310
+ "rewards/accuracies": 1.0,
4311
+ "rewards/chosen": 0.18987932801246643,
4312
+ "rewards/margins": 0.5239830613136292,
4313
+ "rewards/rejected": -0.33410370349884033,
4314
+ "step": 2860
4315
+ },
4316
+ {
4317
+ "epoch": 14.531645569620252,
4318
+ "grad_norm": 1027736.0673764712,
4319
+ "learning_rate": 1.0592290817925414e-07,
4320
+ "logits/chosen": -0.14229407906532288,
4321
+ "logits/rejected": 0.4352554380893707,
4322
+ "logps/chosen": -52.69159698486328,
4323
+ "logps/rejected": -584.4544067382812,
4324
+ "loss": 15405.6859,
4325
+ "rewards/accuracies": 1.0,
4326
+ "rewards/chosen": 0.19550864398479462,
4327
+ "rewards/margins": 0.5430904626846313,
4328
+ "rewards/rejected": -0.34758180379867554,
4329
+ "step": 2870
4330
+ },
4331
+ {
4332
+ "epoch": 14.582278481012658,
4333
+ "grad_norm": 384385.74028987245,
4334
+ "learning_rate": 1.0435600125352554e-07,
4335
+ "logits/chosen": -2.178337335586548,
4336
+ "logits/rejected": -0.7508569955825806,
4337
+ "logps/chosen": -59.098426818847656,
4338
+ "logps/rejected": -576.6027221679688,
4339
+ "loss": 14664.3844,
4340
+ "rewards/accuracies": 1.0,
4341
+ "rewards/chosen": 0.18826426565647125,
4342
+ "rewards/margins": 0.5217211842536926,
4343
+ "rewards/rejected": -0.33345693349838257,
4344
+ "step": 2880
4345
+ },
4346
+ {
4347
+ "epoch": 14.632911392405063,
4348
+ "grad_norm": 329341.72262227273,
4349
+ "learning_rate": 1.0278909432779692e-07,
4350
+ "logits/chosen": -0.5238679647445679,
4351
+ "logits/rejected": 0.5422592163085938,
4352
+ "logps/chosen": -45.037288665771484,
4353
+ "logps/rejected": -568.7276000976562,
4354
+ "loss": 15557.125,
4355
+ "rewards/accuracies": 0.987500011920929,
4356
+ "rewards/chosen": 0.18480226397514343,
4357
+ "rewards/margins": 0.5315712094306946,
4358
+ "rewards/rejected": -0.34676894545555115,
4359
+ "step": 2890
4360
+ },
4361
+ {
4362
+ "epoch": 14.683544303797468,
4363
+ "grad_norm": 543441.6169659087,
4364
+ "learning_rate": 1.0122218740206831e-07,
4365
+ "logits/chosen": -1.954636812210083,
4366
+ "logits/rejected": -1.2880172729492188,
4367
+ "logps/chosen": -42.44208908081055,
4368
+ "logps/rejected": -553.3023681640625,
4369
+ "loss": 15342.95,
4370
+ "rewards/accuracies": 0.9750000238418579,
4371
+ "rewards/chosen": 0.17853178083896637,
4372
+ "rewards/margins": 0.5080317258834839,
4373
+ "rewards/rejected": -0.32949990034103394,
4374
+ "step": 2900
4375
+ },
4376
+ {
4377
+ "epoch": 14.734177215189874,
4378
+ "grad_norm": 485286.8133606422,
4379
+ "learning_rate": 9.96552804763397e-08,
4380
+ "logits/chosen": -0.10534539073705673,
4381
+ "logits/rejected": -0.22817449271678925,
4382
+ "logps/chosen": -58.41508102416992,
4383
+ "logps/rejected": -589.82861328125,
4384
+ "loss": 14829.6719,
4385
+ "rewards/accuracies": 1.0,
4386
+ "rewards/chosen": 0.19155274331569672,
4387
+ "rewards/margins": 0.5371214747428894,
4388
+ "rewards/rejected": -0.3455687165260315,
4389
+ "step": 2910
4390
+ },
4391
+ {
4392
+ "epoch": 14.784810126582279,
4393
+ "grad_norm": 443260.47292018944,
4394
+ "learning_rate": 9.808837355061108e-08,
4395
+ "logits/chosen": 0.06932596862316132,
4396
+ "logits/rejected": -0.2167021781206131,
4397
+ "logps/chosen": -47.265785217285156,
4398
+ "logps/rejected": -564.3973388671875,
4399
+ "loss": 15330.0641,
4400
+ "rewards/accuracies": 0.987500011920929,
4401
+ "rewards/chosen": 0.1784828007221222,
4402
+ "rewards/margins": 0.5103118419647217,
4403
+ "rewards/rejected": -0.3318290710449219,
4404
+ "step": 2920
4405
+ },
4406
+ {
4407
+ "epoch": 14.835443037974684,
4408
+ "grad_norm": 483368.1079372665,
4409
+ "learning_rate": 9.652146662488248e-08,
4410
+ "logits/chosen": -0.06790392100811005,
4411
+ "logits/rejected": 0.29011401534080505,
4412
+ "logps/chosen": -54.78580856323242,
4413
+ "logps/rejected": -574.620361328125,
4414
+ "loss": 15093.3531,
4415
+ "rewards/accuracies": 1.0,
4416
+ "rewards/chosen": 0.18937523663043976,
4417
+ "rewards/margins": 0.5257763862609863,
4418
+ "rewards/rejected": -0.33640116453170776,
4419
+ "step": 2930
4420
+ },
4421
+ {
4422
+ "epoch": 14.886075949367088,
4423
+ "grad_norm": 955906.0887958824,
4424
+ "learning_rate": 9.495455969915387e-08,
4425
+ "logits/chosen": 1.4835760593414307,
4426
+ "logits/rejected": 1.6735947132110596,
4427
+ "logps/chosen": -46.26830291748047,
4428
+ "logps/rejected": -551.6137084960938,
4429
+ "loss": 15061.7437,
4430
+ "rewards/accuracies": 0.9750000238418579,
4431
+ "rewards/chosen": 0.1782112419605255,
4432
+ "rewards/margins": 0.5047247409820557,
4433
+ "rewards/rejected": -0.32651349902153015,
4434
+ "step": 2940
4435
+ },
4436
+ {
4437
+ "epoch": 14.936708860759493,
4438
+ "grad_norm": 389874.4777367002,
4439
+ "learning_rate": 9.338765277342525e-08,
4440
+ "logits/chosen": -0.45253458619117737,
4441
+ "logits/rejected": 0.04955162853002548,
4442
+ "logps/chosen": -44.50522994995117,
4443
+ "logps/rejected": -556.4650268554688,
4444
+ "loss": 15850.6094,
4445
+ "rewards/accuracies": 0.987500011920929,
4446
+ "rewards/chosen": 0.1812363862991333,
4447
+ "rewards/margins": 0.5129731893539429,
4448
+ "rewards/rejected": -0.33173683285713196,
4449
+ "step": 2950
4450
+ },
4451
+ {
4452
+ "epoch": 14.987341772151899,
4453
+ "grad_norm": 880494.2982969056,
4454
+ "learning_rate": 9.182074584769664e-08,
4455
+ "logits/chosen": -1.3114904165267944,
4456
+ "logits/rejected": -0.3469497859477997,
4457
+ "logps/chosen": -48.75851821899414,
4458
+ "logps/rejected": -542.8458862304688,
4459
+ "loss": 14465.8125,
4460
+ "rewards/accuracies": 0.949999988079071,
4461
+ "rewards/chosen": 0.1728508621454239,
4462
+ "rewards/margins": 0.49641647934913635,
4463
+ "rewards/rejected": -0.32356563210487366,
4464
+ "step": 2960
4465
  }
4466
  ],
4467
  "logging_steps": 10,