cat-searcher
commited on
Training in progress, epoch 14, checkpoint
Browse files- last-checkpoint/global_step2962/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2962/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2962/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2962/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2962/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2962/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2962/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2962/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2962/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2962/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2962/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2962/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2962/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2962/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2962/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2962/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step2962/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3471e29a40024ecfc28a8eea3bb1eb8c173c0609f355df8a706d782fd72a6f2
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2962/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f3ab56eb73e7d800b1ce4430ccac77be0b6e08cde748e3316b3d44702f578fb
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2962/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af0097a70a7247c363820d83118beb965b4feaa08ce4536fc3c02a548ca6b380
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2962/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84e7119e680ce910838814ddf94b554d606d5e7476354a860c1a8d55374d1439
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2962/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ea4f33951cbb414f84b9a39e35b2a8be1a9c097ad68119aacd329beb669d4db
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2962/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9e1284901d00cb10e9afe3b47732279ca766e0d6683ca3402c6f06872de5e4e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2962/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3e426eb952650947dc7a89121f573fd79dc8d3051854c02d2171607d393ff4f
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2962/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ec593fe6aee5d849160b36774007bf4c2a4f123200ad6bb931cbdf26823786c
|
3 |
+
size 2506176112
|
last-checkpoint/global_step2962/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8894ca4b20727ca9a3b8d9b04e6070339729765a18dda5530f295e94d25e4bf
|
3 |
+
size 85570
|
last-checkpoint/global_step2962/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c9eea5b5486c31f10c150a7429b6b01fcec5184835f886ed7ac9ff9bc9676e9
|
3 |
+
size 85506
|
last-checkpoint/global_step2962/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c520a6b9475cbcd8422c5021f1acb91e83f01f855c5bc86271aaadd93e8e321a
|
3 |
+
size 85506
|
last-checkpoint/global_step2962/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f22b61ce07985a14d7dd5d28da2a4c67e70cc37037742abd6945b69df55ba82
|
3 |
+
size 85506
|
last-checkpoint/global_step2962/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:828a5dd2c1f30bdd3116927dd889f8388e7a86fab99eb91e177b163c838854e4
|
3 |
+
size 85506
|
last-checkpoint/global_step2962/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ef7d47d77deed99c61384314c2aa706dc6305ef596b77eda2482f1e5a49a2cb
|
3 |
+
size 85506
|
last-checkpoint/global_step2962/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:947db6371c681606d6fe68f817cc28c7de90e30a193cafef7dee08dfe850711f
|
3 |
+
size 85506
|
last-checkpoint/global_step2962/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:106a9948b226f2c26e9bd883317e2721530b2b7a29d30d49c7b252a8f554fe92
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step2962
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6ab34130d90aebe29868ecf9b47a15403e74bd3aa5e09f06dc3ea9032f8089b
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3cf91018b2355dca95814934115beac2e49e42607748ab6a28986a106363bbcd
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07d994b317c4df888a1a1aabc0c532e81f1fa34c18c8313cb2feadca3bb37194
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f5b05860618aa49c7f5d8c366d6ee73cf8b3b0d0adc17d9313b72621630d0aa
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7262faf861e984775b4fd85bc76a11b0b8b04037690e8a08a58cf9ff5328a042
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9479cad91150e2e266d17eb95fe678579a770f6df6b53496cf72067b186b094d
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:435cb6cf559e0ce3fe0d4582cac16ea40b48b7a64589952402a4c399cafbfc00
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f51001b0d8dc5792180c3a9705ccbfa66b61d46d7639afb6f7abf409629ed74f
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1e87084f11088fdce293e1fbbb05e35f5c7385b00e2f9ba195bf61cb36f757d
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d32e9bdd65145ae509e6c6ef4f6ea9d842f94a34c34a0d7d2ab6c248d3f2121
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ef29c4eabe559fffbf188b61164c94ef6c3807ccd683770ebd49ca46d0f6823
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 14.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4162,6 +4162,306 @@
|
|
4162 |
"rewards/margins": 0.5153056383132935,
|
4163 |
"rewards/rejected": -0.3264002799987793,
|
4164 |
"step": 2760
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4165 |
}
|
4166 |
],
|
4167 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 14.99746835443038,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 2962,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4162 |
"rewards/margins": 0.5153056383132935,
|
4163 |
"rewards/rejected": -0.3264002799987793,
|
4164 |
"step": 2760
|
4165 |
+
},
|
4166 |
+
{
|
4167 |
+
"epoch": 14.025316455696203,
|
4168 |
+
"grad_norm": 454762.9481647176,
|
4169 |
+
"learning_rate": 1.2159197743654026e-07,
|
4170 |
+
"logits/chosen": 2.4223504066467285,
|
4171 |
+
"logits/rejected": 3.487738847732544,
|
4172 |
+
"logps/chosen": -44.93278503417969,
|
4173 |
+
"logps/rejected": -561.3870849609375,
|
4174 |
+
"loss": 16557.4125,
|
4175 |
+
"rewards/accuracies": 1.0,
|
4176 |
+
"rewards/chosen": 0.1806286722421646,
|
4177 |
+
"rewards/margins": 0.5195534229278564,
|
4178 |
+
"rewards/rejected": -0.33892473578453064,
|
4179 |
+
"step": 2770
|
4180 |
+
},
|
4181 |
+
{
|
4182 |
+
"epoch": 14.075949367088608,
|
4183 |
+
"grad_norm": 487680.4985531969,
|
4184 |
+
"learning_rate": 1.2002507051081164e-07,
|
4185 |
+
"logits/chosen": 1.9585473537445068,
|
4186 |
+
"logits/rejected": 2.446890354156494,
|
4187 |
+
"logps/chosen": -39.52117919921875,
|
4188 |
+
"logps/rejected": -561.9512939453125,
|
4189 |
+
"loss": 15203.5906,
|
4190 |
+
"rewards/accuracies": 1.0,
|
4191 |
+
"rewards/chosen": 0.1834731251001358,
|
4192 |
+
"rewards/margins": 0.5265246629714966,
|
4193 |
+
"rewards/rejected": -0.343051552772522,
|
4194 |
+
"step": 2780
|
4195 |
+
},
|
4196 |
+
{
|
4197 |
+
"epoch": 14.126582278481013,
|
4198 |
+
"grad_norm": 335633.29006652284,
|
4199 |
+
"learning_rate": 1.1845816358508304e-07,
|
4200 |
+
"logits/chosen": -0.2361418753862381,
|
4201 |
+
"logits/rejected": 0.4229121804237366,
|
4202 |
+
"logps/chosen": -56.944580078125,
|
4203 |
+
"logps/rejected": -581.4995727539062,
|
4204 |
+
"loss": 14980.4906,
|
4205 |
+
"rewards/accuracies": 1.0,
|
4206 |
+
"rewards/chosen": 0.18962779641151428,
|
4207 |
+
"rewards/margins": 0.5268105268478394,
|
4208 |
+
"rewards/rejected": -0.3371827304363251,
|
4209 |
+
"step": 2790
|
4210 |
+
},
|
4211 |
+
{
|
4212 |
+
"epoch": 14.177215189873417,
|
4213 |
+
"grad_norm": 433336.52566667914,
|
4214 |
+
"learning_rate": 1.1689125665935443e-07,
|
4215 |
+
"logits/chosen": -0.8853734135627747,
|
4216 |
+
"logits/rejected": 0.24162235856056213,
|
4217 |
+
"logps/chosen": -49.96304702758789,
|
4218 |
+
"logps/rejected": -587.9956665039062,
|
4219 |
+
"loss": 15952.2594,
|
4220 |
+
"rewards/accuracies": 1.0,
|
4221 |
+
"rewards/chosen": 0.1883043497800827,
|
4222 |
+
"rewards/margins": 0.5334208607673645,
|
4223 |
+
"rewards/rejected": -0.345116525888443,
|
4224 |
+
"step": 2800
|
4225 |
+
},
|
4226 |
+
{
|
4227 |
+
"epoch": 14.227848101265822,
|
4228 |
+
"grad_norm": 352832.2810093542,
|
4229 |
+
"learning_rate": 1.1532434973362581e-07,
|
4230 |
+
"logits/chosen": -0.9270970225334167,
|
4231 |
+
"logits/rejected": -0.8106321096420288,
|
4232 |
+
"logps/chosen": -50.61150360107422,
|
4233 |
+
"logps/rejected": -579.4258422851562,
|
4234 |
+
"loss": 15482.3031,
|
4235 |
+
"rewards/accuracies": 1.0,
|
4236 |
+
"rewards/chosen": 0.18148374557495117,
|
4237 |
+
"rewards/margins": 0.5241626501083374,
|
4238 |
+
"rewards/rejected": -0.34267887473106384,
|
4239 |
+
"step": 2810
|
4240 |
+
},
|
4241 |
+
{
|
4242 |
+
"epoch": 14.278481012658228,
|
4243 |
+
"grad_norm": 518734.4787371263,
|
4244 |
+
"learning_rate": 1.137574428078972e-07,
|
4245 |
+
"logits/chosen": 2.115744113922119,
|
4246 |
+
"logits/rejected": 2.9750027656555176,
|
4247 |
+
"logps/chosen": -41.601097106933594,
|
4248 |
+
"logps/rejected": -573.6159057617188,
|
4249 |
+
"loss": 15787.4719,
|
4250 |
+
"rewards/accuracies": 0.987500011920929,
|
4251 |
+
"rewards/chosen": 0.18657180666923523,
|
4252 |
+
"rewards/margins": 0.5376033186912537,
|
4253 |
+
"rewards/rejected": -0.35103151202201843,
|
4254 |
+
"step": 2820
|
4255 |
+
},
|
4256 |
+
{
|
4257 |
+
"epoch": 14.329113924050633,
|
4258 |
+
"grad_norm": 637771.2756103254,
|
4259 |
+
"learning_rate": 1.1219053588216858e-07,
|
4260 |
+
"logits/chosen": -0.09557388722896576,
|
4261 |
+
"logits/rejected": -0.5708149671554565,
|
4262 |
+
"logps/chosen": -44.071807861328125,
|
4263 |
+
"logps/rejected": -585.6417236328125,
|
4264 |
+
"loss": 15660.4813,
|
4265 |
+
"rewards/accuracies": 1.0,
|
4266 |
+
"rewards/chosen": 0.18580812215805054,
|
4267 |
+
"rewards/margins": 0.5347784757614136,
|
4268 |
+
"rewards/rejected": -0.34897032380104065,
|
4269 |
+
"step": 2830
|
4270 |
+
},
|
4271 |
+
{
|
4272 |
+
"epoch": 14.379746835443038,
|
4273 |
+
"grad_norm": 469592.5817335632,
|
4274 |
+
"learning_rate": 1.1062362895643998e-07,
|
4275 |
+
"logits/chosen": 0.14405778050422668,
|
4276 |
+
"logits/rejected": 0.6720622181892395,
|
4277 |
+
"logps/chosen": -45.77620315551758,
|
4278 |
+
"logps/rejected": -562.7021484375,
|
4279 |
+
"loss": 15265.0797,
|
4280 |
+
"rewards/accuracies": 0.987500011920929,
|
4281 |
+
"rewards/chosen": 0.18696969747543335,
|
4282 |
+
"rewards/margins": 0.5200961828231812,
|
4283 |
+
"rewards/rejected": -0.3331265151500702,
|
4284 |
+
"step": 2840
|
4285 |
+
},
|
4286 |
+
{
|
4287 |
+
"epoch": 14.430379746835444,
|
4288 |
+
"grad_norm": 381405.89470487926,
|
4289 |
+
"learning_rate": 1.0905672203071137e-07,
|
4290 |
+
"logits/chosen": -0.46474942564964294,
|
4291 |
+
"logits/rejected": -0.6803582906723022,
|
4292 |
+
"logps/chosen": -43.475257873535156,
|
4293 |
+
"logps/rejected": -578.9302978515625,
|
4294 |
+
"loss": 15502.7,
|
4295 |
+
"rewards/accuracies": 1.0,
|
4296 |
+
"rewards/chosen": 0.18612739443778992,
|
4297 |
+
"rewards/margins": 0.5332227945327759,
|
4298 |
+
"rewards/rejected": -0.34709542989730835,
|
4299 |
+
"step": 2850
|
4300 |
+
},
|
4301 |
+
{
|
4302 |
+
"epoch": 14.481012658227849,
|
4303 |
+
"grad_norm": 389034.05049605225,
|
4304 |
+
"learning_rate": 1.0748981510498275e-07,
|
4305 |
+
"logits/chosen": 0.192867711186409,
|
4306 |
+
"logits/rejected": 0.04235720634460449,
|
4307 |
+
"logps/chosen": -45.57283020019531,
|
4308 |
+
"logps/rejected": -573.7398071289062,
|
4309 |
+
"loss": 16059.1625,
|
4310 |
+
"rewards/accuracies": 1.0,
|
4311 |
+
"rewards/chosen": 0.18987932801246643,
|
4312 |
+
"rewards/margins": 0.5239830613136292,
|
4313 |
+
"rewards/rejected": -0.33410370349884033,
|
4314 |
+
"step": 2860
|
4315 |
+
},
|
4316 |
+
{
|
4317 |
+
"epoch": 14.531645569620252,
|
4318 |
+
"grad_norm": 1027736.0673764712,
|
4319 |
+
"learning_rate": 1.0592290817925414e-07,
|
4320 |
+
"logits/chosen": -0.14229407906532288,
|
4321 |
+
"logits/rejected": 0.4352554380893707,
|
4322 |
+
"logps/chosen": -52.69159698486328,
|
4323 |
+
"logps/rejected": -584.4544067382812,
|
4324 |
+
"loss": 15405.6859,
|
4325 |
+
"rewards/accuracies": 1.0,
|
4326 |
+
"rewards/chosen": 0.19550864398479462,
|
4327 |
+
"rewards/margins": 0.5430904626846313,
|
4328 |
+
"rewards/rejected": -0.34758180379867554,
|
4329 |
+
"step": 2870
|
4330 |
+
},
|
4331 |
+
{
|
4332 |
+
"epoch": 14.582278481012658,
|
4333 |
+
"grad_norm": 384385.74028987245,
|
4334 |
+
"learning_rate": 1.0435600125352554e-07,
|
4335 |
+
"logits/chosen": -2.178337335586548,
|
4336 |
+
"logits/rejected": -0.7508569955825806,
|
4337 |
+
"logps/chosen": -59.098426818847656,
|
4338 |
+
"logps/rejected": -576.6027221679688,
|
4339 |
+
"loss": 14664.3844,
|
4340 |
+
"rewards/accuracies": 1.0,
|
4341 |
+
"rewards/chosen": 0.18826426565647125,
|
4342 |
+
"rewards/margins": 0.5217211842536926,
|
4343 |
+
"rewards/rejected": -0.33345693349838257,
|
4344 |
+
"step": 2880
|
4345 |
+
},
|
4346 |
+
{
|
4347 |
+
"epoch": 14.632911392405063,
|
4348 |
+
"grad_norm": 329341.72262227273,
|
4349 |
+
"learning_rate": 1.0278909432779692e-07,
|
4350 |
+
"logits/chosen": -0.5238679647445679,
|
4351 |
+
"logits/rejected": 0.5422592163085938,
|
4352 |
+
"logps/chosen": -45.037288665771484,
|
4353 |
+
"logps/rejected": -568.7276000976562,
|
4354 |
+
"loss": 15557.125,
|
4355 |
+
"rewards/accuracies": 0.987500011920929,
|
4356 |
+
"rewards/chosen": 0.18480226397514343,
|
4357 |
+
"rewards/margins": 0.5315712094306946,
|
4358 |
+
"rewards/rejected": -0.34676894545555115,
|
4359 |
+
"step": 2890
|
4360 |
+
},
|
4361 |
+
{
|
4362 |
+
"epoch": 14.683544303797468,
|
4363 |
+
"grad_norm": 543441.6169659087,
|
4364 |
+
"learning_rate": 1.0122218740206831e-07,
|
4365 |
+
"logits/chosen": -1.954636812210083,
|
4366 |
+
"logits/rejected": -1.2880172729492188,
|
4367 |
+
"logps/chosen": -42.44208908081055,
|
4368 |
+
"logps/rejected": -553.3023681640625,
|
4369 |
+
"loss": 15342.95,
|
4370 |
+
"rewards/accuracies": 0.9750000238418579,
|
4371 |
+
"rewards/chosen": 0.17853178083896637,
|
4372 |
+
"rewards/margins": 0.5080317258834839,
|
4373 |
+
"rewards/rejected": -0.32949990034103394,
|
4374 |
+
"step": 2900
|
4375 |
+
},
|
4376 |
+
{
|
4377 |
+
"epoch": 14.734177215189874,
|
4378 |
+
"grad_norm": 485286.8133606422,
|
4379 |
+
"learning_rate": 9.96552804763397e-08,
|
4380 |
+
"logits/chosen": -0.10534539073705673,
|
4381 |
+
"logits/rejected": -0.22817449271678925,
|
4382 |
+
"logps/chosen": -58.41508102416992,
|
4383 |
+
"logps/rejected": -589.82861328125,
|
4384 |
+
"loss": 14829.6719,
|
4385 |
+
"rewards/accuracies": 1.0,
|
4386 |
+
"rewards/chosen": 0.19155274331569672,
|
4387 |
+
"rewards/margins": 0.5371214747428894,
|
4388 |
+
"rewards/rejected": -0.3455687165260315,
|
4389 |
+
"step": 2910
|
4390 |
+
},
|
4391 |
+
{
|
4392 |
+
"epoch": 14.784810126582279,
|
4393 |
+
"grad_norm": 443260.47292018944,
|
4394 |
+
"learning_rate": 9.808837355061108e-08,
|
4395 |
+
"logits/chosen": 0.06932596862316132,
|
4396 |
+
"logits/rejected": -0.2167021781206131,
|
4397 |
+
"logps/chosen": -47.265785217285156,
|
4398 |
+
"logps/rejected": -564.3973388671875,
|
4399 |
+
"loss": 15330.0641,
|
4400 |
+
"rewards/accuracies": 0.987500011920929,
|
4401 |
+
"rewards/chosen": 0.1784828007221222,
|
4402 |
+
"rewards/margins": 0.5103118419647217,
|
4403 |
+
"rewards/rejected": -0.3318290710449219,
|
4404 |
+
"step": 2920
|
4405 |
+
},
|
4406 |
+
{
|
4407 |
+
"epoch": 14.835443037974684,
|
4408 |
+
"grad_norm": 483368.1079372665,
|
4409 |
+
"learning_rate": 9.652146662488248e-08,
|
4410 |
+
"logits/chosen": -0.06790392100811005,
|
4411 |
+
"logits/rejected": 0.29011401534080505,
|
4412 |
+
"logps/chosen": -54.78580856323242,
|
4413 |
+
"logps/rejected": -574.620361328125,
|
4414 |
+
"loss": 15093.3531,
|
4415 |
+
"rewards/accuracies": 1.0,
|
4416 |
+
"rewards/chosen": 0.18937523663043976,
|
4417 |
+
"rewards/margins": 0.5257763862609863,
|
4418 |
+
"rewards/rejected": -0.33640116453170776,
|
4419 |
+
"step": 2930
|
4420 |
+
},
|
4421 |
+
{
|
4422 |
+
"epoch": 14.886075949367088,
|
4423 |
+
"grad_norm": 955906.0887958824,
|
4424 |
+
"learning_rate": 9.495455969915387e-08,
|
4425 |
+
"logits/chosen": 1.4835760593414307,
|
4426 |
+
"logits/rejected": 1.6735947132110596,
|
4427 |
+
"logps/chosen": -46.26830291748047,
|
4428 |
+
"logps/rejected": -551.6137084960938,
|
4429 |
+
"loss": 15061.7437,
|
4430 |
+
"rewards/accuracies": 0.9750000238418579,
|
4431 |
+
"rewards/chosen": 0.1782112419605255,
|
4432 |
+
"rewards/margins": 0.5047247409820557,
|
4433 |
+
"rewards/rejected": -0.32651349902153015,
|
4434 |
+
"step": 2940
|
4435 |
+
},
|
4436 |
+
{
|
4437 |
+
"epoch": 14.936708860759493,
|
4438 |
+
"grad_norm": 389874.4777367002,
|
4439 |
+
"learning_rate": 9.338765277342525e-08,
|
4440 |
+
"logits/chosen": -0.45253458619117737,
|
4441 |
+
"logits/rejected": 0.04955162853002548,
|
4442 |
+
"logps/chosen": -44.50522994995117,
|
4443 |
+
"logps/rejected": -556.4650268554688,
|
4444 |
+
"loss": 15850.6094,
|
4445 |
+
"rewards/accuracies": 0.987500011920929,
|
4446 |
+
"rewards/chosen": 0.1812363862991333,
|
4447 |
+
"rewards/margins": 0.5129731893539429,
|
4448 |
+
"rewards/rejected": -0.33173683285713196,
|
4449 |
+
"step": 2950
|
4450 |
+
},
|
4451 |
+
{
|
4452 |
+
"epoch": 14.987341772151899,
|
4453 |
+
"grad_norm": 880494.2982969056,
|
4454 |
+
"learning_rate": 9.182074584769664e-08,
|
4455 |
+
"logits/chosen": -1.3114904165267944,
|
4456 |
+
"logits/rejected": -0.3469497859477997,
|
4457 |
+
"logps/chosen": -48.75851821899414,
|
4458 |
+
"logps/rejected": -542.8458862304688,
|
4459 |
+
"loss": 14465.8125,
|
4460 |
+
"rewards/accuracies": 0.949999988079071,
|
4461 |
+
"rewards/chosen": 0.1728508621454239,
|
4462 |
+
"rewards/margins": 0.49641647934913635,
|
4463 |
+
"rewards/rejected": -0.32356563210487366,
|
4464 |
+
"step": 2960
|
4465 |
}
|
4466 |
],
|
4467 |
"logging_steps": 10,
|