Training in progress, epoch 20, checkpoint
Browse files- last-checkpoint/global_step3941/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3941/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3941/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3941/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3941/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3941/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3941/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3941/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3941/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3941/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3941/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3941/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3941/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3941/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3941/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3941/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +889 -4
- last-checkpoint/training_args.bin +1 -1
last-checkpoint/global_step3941/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94b4dcfd71cab30d4f84d893b6fd0d1a6e2aa4b3a61d6c79e5479ce39b939d94
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3941/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f954c542f98537a9ed834facf341309119d8d1fba9b2ce996edb91caa0185864
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3941/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b2c3b968bcad4a7393ddf5c5f119cdaac63191903276c1eb81d6c451c5d6752
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3941/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bee82ebc9e3eee25d3b0df80e977d29d2fb8d31a2c15e34a592ca2e3b23b894
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3941/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb059eb071e2c280b3205e611e8d35da6f4b0074daa865e2753febf692019be7
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3941/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88b041f26328228bde99957a111e977af41d76464988508861df2fe943631d98
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3941/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d86002b3f908ebc201f5fbcb089f747f46dc062a490b815c3292beeddbb2eba
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3941/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eff104ff6a3804962fc8e81a4a25a68e1ddd23153106604a90d90db75ed7cc23
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3941/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cac99f40ef6a29819b6bfee413e07a661faee0937b8d1c61c805da871e0945af
|
3 |
+
size 85570
|
last-checkpoint/global_step3941/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5701cbcfc2c6647d84113e3d13ee7cc172b6847210fec719fae584f61e7c3a0
|
3 |
+
size 85506
|
last-checkpoint/global_step3941/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24bf30e27f127c8845ba54ae9788e2ca7ed1ccbcbba555d599dab83769417d07
|
3 |
+
size 85506
|
last-checkpoint/global_step3941/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45c29e366a2f7623d367aac2611b78199725dcfee1510c37b39fb5eff91c9287
|
3 |
+
size 85506
|
last-checkpoint/global_step3941/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14f446b32558ac815c52ce36a2ece940f8c9860c1b6666d3cf16bcfedebda918
|
3 |
+
size 85506
|
last-checkpoint/global_step3941/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f63d5c8898ac42433f7add352d994d40bab319ea7c6840b8756db83a707506f3
|
3 |
+
size 85506
|
last-checkpoint/global_step3941/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c9e1729073f017d91bafd979b84d7c37c6570c15926abbd20aeeb138871a251
|
3 |
+
size 85506
|
last-checkpoint/global_step3941/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9d37159e66416476690c422874e2523ac0312d3e34aa742482bd47edb58aebc
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step3941
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acdc933f849726cf9f6626987c11cea6a7678ccfd80dc2d0483bec3e83e0c3a7
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b099839b0f1c5f0d5fb18759ba02999fd787c83dda2d02d2ba36941c44a246b
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f8f81f81a21cdf7d6a9d642f67427b821e87802eb5a4ca4a5c038480e9a673f
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c967add958d82a4ef6285ec19e0e5a560f82b1ea3488260d2539d200cb5f199b
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a5e30f320a5f3e4a4760af275c4a2bb9fdcfc6c661a3757da3b20b4c29a87fc
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3411b849d13fdd9e668c8cc5fafa8402de13fd4a2d5761eaf33f2791780721e4
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1765109deada93bbc4f9e38b5c3c6bbd9afe9a12839bbbd72227ca7ff46467b1
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6147f79a889f2906a81eea5c06f72acf722f674546fd6be8432d3d70a04392a8
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9f5815a19b60dc1bb303c451b515a4ec523fa0faf1c87c69b03ad4032c4baa2
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:395da160bbc37f8049700e0d82d501cbd1effc732719599c0cd143db7892910e
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dd8e459c5f044a7182784611ab9373a71c50f4a912290b4cfdcb531e69b1b46
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5047,12 +5047,897 @@
|
|
5047 |
"rewards/margins": 0.5159622430801392,
|
5048 |
"rewards/rejected": -0.33039581775665283,
|
5049 |
"step": 3350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5050 |
}
|
5051 |
],
|
5052 |
"logging_steps": 10,
|
5053 |
-
"max_steps":
|
5054 |
"num_input_tokens_seen": 0,
|
5055 |
-
"num_train_epochs":
|
5056 |
"save_steps": 500,
|
5057 |
"stateful_callbacks": {
|
5058 |
"TrainerControl": {
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 20.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 3941,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5047 |
"rewards/margins": 0.5159622430801392,
|
5048 |
"rewards/rejected": -0.33039581775665283,
|
5049 |
"step": 3350
|
5050 |
+
},
|
5051 |
+
{
|
5052 |
+
"epoch": 17.0126582278481,
|
5053 |
+
"grad_norm": 218651.56901322177,
|
5054 |
+
"learning_rate": 2.9144468818552176e-08,
|
5055 |
+
"logits/chosen": 0.41573429107666016,
|
5056 |
+
"logits/rejected": 1.103547215461731,
|
5057 |
+
"logps/chosen": -37.6799201965332,
|
5058 |
+
"logps/rejected": -569.5391235351562,
|
5059 |
+
"loss": 14029.3563,
|
5060 |
+
"rewards/accuracies": 0.987500011920929,
|
5061 |
+
"rewards/chosen": 0.18778078258037567,
|
5062 |
+
"rewards/margins": 0.5316546559333801,
|
5063 |
+
"rewards/rejected": -0.34387388825416565,
|
5064 |
+
"step": 3360
|
5065 |
+
},
|
5066 |
+
{
|
5067 |
+
"epoch": 17.063291139240505,
|
5068 |
+
"grad_norm": 236719.0916690887,
|
5069 |
+
"learning_rate": 2.7577561892823564e-08,
|
5070 |
+
"logits/chosen": -0.09267449378967285,
|
5071 |
+
"logits/rejected": 0.3535307049751282,
|
5072 |
+
"logps/chosen": -43.02147674560547,
|
5073 |
+
"logps/rejected": -571.3306884765625,
|
5074 |
+
"loss": 14216.225,
|
5075 |
+
"rewards/accuracies": 0.987500011920929,
|
5076 |
+
"rewards/chosen": 0.18815621733665466,
|
5077 |
+
"rewards/margins": 0.52230304479599,
|
5078 |
+
"rewards/rejected": -0.33414679765701294,
|
5079 |
+
"step": 3370
|
5080 |
+
},
|
5081 |
+
{
|
5082 |
+
"epoch": 17.11392405063291,
|
5083 |
+
"grad_norm": 151995.97062770248,
|
5084 |
+
"learning_rate": 2.6010654967094953e-08,
|
5085 |
+
"logits/chosen": 1.3600900173187256,
|
5086 |
+
"logits/rejected": 0.45606088638305664,
|
5087 |
+
"logps/chosen": -33.07421112060547,
|
5088 |
+
"logps/rejected": -574.2869262695312,
|
5089 |
+
"loss": 14569.3875,
|
5090 |
+
"rewards/accuracies": 1.0,
|
5091 |
+
"rewards/chosen": 0.1843741536140442,
|
5092 |
+
"rewards/margins": 0.533474862575531,
|
5093 |
+
"rewards/rejected": -0.3491007089614868,
|
5094 |
+
"step": 3380
|
5095 |
+
},
|
5096 |
+
{
|
5097 |
+
"epoch": 17.164556962025316,
|
5098 |
+
"grad_norm": 229039.39535517112,
|
5099 |
+
"learning_rate": 2.4443748041366342e-08,
|
5100 |
+
"logits/chosen": 0.012326288037002087,
|
5101 |
+
"logits/rejected": -0.24337856471538544,
|
5102 |
+
"logps/chosen": -48.85834503173828,
|
5103 |
+
"logps/rejected": -591.9976806640625,
|
5104 |
+
"loss": 15141.0031,
|
5105 |
+
"rewards/accuracies": 1.0,
|
5106 |
+
"rewards/chosen": 0.19536466896533966,
|
5107 |
+
"rewards/margins": 0.5443064570426941,
|
5108 |
+
"rewards/rejected": -0.34894177317619324,
|
5109 |
+
"step": 3390
|
5110 |
+
},
|
5111 |
+
{
|
5112 |
+
"epoch": 17.21518987341772,
|
5113 |
+
"grad_norm": 224579.22425486994,
|
5114 |
+
"learning_rate": 2.2876841115637728e-08,
|
5115 |
+
"logits/chosen": -0.07731113582849503,
|
5116 |
+
"logits/rejected": 0.8038260340690613,
|
5117 |
+
"logps/chosen": -42.96089172363281,
|
5118 |
+
"logps/rejected": -587.9930419921875,
|
5119 |
+
"loss": 13962.9406,
|
5120 |
+
"rewards/accuracies": 0.987500011920929,
|
5121 |
+
"rewards/chosen": 0.18969421088695526,
|
5122 |
+
"rewards/margins": 0.5471119284629822,
|
5123 |
+
"rewards/rejected": -0.3574177622795105,
|
5124 |
+
"step": 3400
|
5125 |
+
},
|
5126 |
+
{
|
5127 |
+
"epoch": 17.265822784810126,
|
5128 |
+
"grad_norm": 194108.45632178357,
|
5129 |
+
"learning_rate": 2.1309934189909117e-08,
|
5130 |
+
"logits/chosen": -1.735790491104126,
|
5131 |
+
"logits/rejected": -0.8417277336120605,
|
5132 |
+
"logps/chosen": -40.28795623779297,
|
5133 |
+
"logps/rejected": -577.9163208007812,
|
5134 |
+
"loss": 14457.5328,
|
5135 |
+
"rewards/accuracies": 0.987500011920929,
|
5136 |
+
"rewards/chosen": 0.19212636351585388,
|
5137 |
+
"rewards/margins": 0.54021155834198,
|
5138 |
+
"rewards/rejected": -0.3480851650238037,
|
5139 |
+
"step": 3410
|
5140 |
+
},
|
5141 |
+
{
|
5142 |
+
"epoch": 17.31645569620253,
|
5143 |
+
"grad_norm": 323871.0912725565,
|
5144 |
+
"learning_rate": 1.9743027264180506e-08,
|
5145 |
+
"logits/chosen": 1.0423898696899414,
|
5146 |
+
"logits/rejected": 1.1823880672454834,
|
5147 |
+
"logps/chosen": -50.077327728271484,
|
5148 |
+
"logps/rejected": -565.8704223632812,
|
5149 |
+
"loss": 14191.8531,
|
5150 |
+
"rewards/accuracies": 0.987500011920929,
|
5151 |
+
"rewards/chosen": 0.18371161818504333,
|
5152 |
+
"rewards/margins": 0.5181502103805542,
|
5153 |
+
"rewards/rejected": -0.3344385623931885,
|
5154 |
+
"step": 3420
|
5155 |
+
},
|
5156 |
+
{
|
5157 |
+
"epoch": 17.367088607594937,
|
5158 |
+
"grad_norm": 207973.13380554292,
|
5159 |
+
"learning_rate": 1.8176120338451895e-08,
|
5160 |
+
"logits/chosen": -0.8037737011909485,
|
5161 |
+
"logits/rejected": -0.8005819320678711,
|
5162 |
+
"logps/chosen": -45.626670837402344,
|
5163 |
+
"logps/rejected": -544.7116088867188,
|
5164 |
+
"loss": 14114.0906,
|
5165 |
+
"rewards/accuracies": 0.9750000238418579,
|
5166 |
+
"rewards/chosen": 0.1760983169078827,
|
5167 |
+
"rewards/margins": 0.5017568469047546,
|
5168 |
+
"rewards/rejected": -0.32565850019454956,
|
5169 |
+
"step": 3430
|
5170 |
+
},
|
5171 |
+
{
|
5172 |
+
"epoch": 17.417721518987342,
|
5173 |
+
"grad_norm": 191156.31750064602,
|
5174 |
+
"learning_rate": 1.6609213412723284e-08,
|
5175 |
+
"logits/chosen": 1.2277637720108032,
|
5176 |
+
"logits/rejected": 0.573845386505127,
|
5177 |
+
"logps/chosen": -50.492279052734375,
|
5178 |
+
"logps/rejected": -586.3282470703125,
|
5179 |
+
"loss": 13957.7594,
|
5180 |
+
"rewards/accuracies": 0.9750000238418579,
|
5181 |
+
"rewards/chosen": 0.19206462800502777,
|
5182 |
+
"rewards/margins": 0.5328875184059143,
|
5183 |
+
"rewards/rejected": -0.34082287549972534,
|
5184 |
+
"step": 3440
|
5185 |
+
},
|
5186 |
+
{
|
5187 |
+
"epoch": 17.468354430379748,
|
5188 |
+
"grad_norm": 262791.84599779843,
|
5189 |
+
"learning_rate": 1.5042306486994673e-08,
|
5190 |
+
"logits/chosen": 0.29228338599205017,
|
5191 |
+
"logits/rejected": 0.9747223854064941,
|
5192 |
+
"logps/chosen": -37.640201568603516,
|
5193 |
+
"logps/rejected": -557.47119140625,
|
5194 |
+
"loss": 14478.4906,
|
5195 |
+
"rewards/accuracies": 1.0,
|
5196 |
+
"rewards/chosen": 0.18818514049053192,
|
5197 |
+
"rewards/margins": 0.5214470624923706,
|
5198 |
+
"rewards/rejected": -0.3332619369029999,
|
5199 |
+
"step": 3450
|
5200 |
+
},
|
5201 |
+
{
|
5202 |
+
"epoch": 17.518987341772153,
|
5203 |
+
"grad_norm": 227441.5548714142,
|
5204 |
+
"learning_rate": 1.347539956126606e-08,
|
5205 |
+
"logits/chosen": -0.060483645647764206,
|
5206 |
+
"logits/rejected": 0.41309136152267456,
|
5207 |
+
"logps/chosen": -46.32054138183594,
|
5208 |
+
"logps/rejected": -588.6563720703125,
|
5209 |
+
"loss": 14804.9047,
|
5210 |
+
"rewards/accuracies": 0.987500011920929,
|
5211 |
+
"rewards/chosen": 0.19712397456169128,
|
5212 |
+
"rewards/margins": 0.5437620878219604,
|
5213 |
+
"rewards/rejected": -0.34663814306259155,
|
5214 |
+
"step": 3460
|
5215 |
+
},
|
5216 |
+
{
|
5217 |
+
"epoch": 17.569620253164558,
|
5218 |
+
"grad_norm": 378558.8588589865,
|
5219 |
+
"learning_rate": 1.1908492635537449e-08,
|
5220 |
+
"logits/chosen": 2.0075535774230957,
|
5221 |
+
"logits/rejected": 2.772726058959961,
|
5222 |
+
"logps/chosen": -46.09113693237305,
|
5223 |
+
"logps/rejected": -582.0597534179688,
|
5224 |
+
"loss": 14645.9562,
|
5225 |
+
"rewards/accuracies": 0.987500011920929,
|
5226 |
+
"rewards/chosen": 0.18593838810920715,
|
5227 |
+
"rewards/margins": 0.5331605076789856,
|
5228 |
+
"rewards/rejected": -0.34722214937210083,
|
5229 |
+
"step": 3470
|
5230 |
+
},
|
5231 |
+
{
|
5232 |
+
"epoch": 17.620253164556964,
|
5233 |
+
"grad_norm": 263891.6462573049,
|
5234 |
+
"learning_rate": 1.0341585709808836e-08,
|
5235 |
+
"logits/chosen": 0.273967444896698,
|
5236 |
+
"logits/rejected": 1.9021276235580444,
|
5237 |
+
"logps/chosen": -34.29851531982422,
|
5238 |
+
"logps/rejected": -567.2389526367188,
|
5239 |
+
"loss": 15085.2313,
|
5240 |
+
"rewards/accuracies": 1.0,
|
5241 |
+
"rewards/chosen": 0.18956169486045837,
|
5242 |
+
"rewards/margins": 0.5355597734451294,
|
5243 |
+
"rewards/rejected": -0.3459981083869934,
|
5244 |
+
"step": 3480
|
5245 |
+
},
|
5246 |
+
{
|
5247 |
+
"epoch": 17.67088607594937,
|
5248 |
+
"grad_norm": 276267.9285814808,
|
5249 |
+
"learning_rate": 8.774678784080225e-09,
|
5250 |
+
"logits/chosen": -0.02632077969610691,
|
5251 |
+
"logits/rejected": 0.4594387114048004,
|
5252 |
+
"logps/chosen": -45.098960876464844,
|
5253 |
+
"logps/rejected": -568.720947265625,
|
5254 |
+
"loss": 13750.4469,
|
5255 |
+
"rewards/accuracies": 0.987500011920929,
|
5256 |
+
"rewards/chosen": 0.19129987061023712,
|
5257 |
+
"rewards/margins": 0.5272942781448364,
|
5258 |
+
"rewards/rejected": -0.33599433302879333,
|
5259 |
+
"step": 3490
|
5260 |
+
},
|
5261 |
+
{
|
5262 |
+
"epoch": 17.72151898734177,
|
5263 |
+
"grad_norm": 156087.69298121333,
|
5264 |
+
"learning_rate": 7.207771858351613e-09,
|
5265 |
+
"logits/chosen": 0.04748225212097168,
|
5266 |
+
"logits/rejected": 0.4610685408115387,
|
5267 |
+
"logps/chosen": -49.872169494628906,
|
5268 |
+
"logps/rejected": -603.3367919921875,
|
5269 |
+
"loss": 13778.1469,
|
5270 |
+
"rewards/accuracies": 0.987500011920929,
|
5271 |
+
"rewards/chosen": 0.19287212193012238,
|
5272 |
+
"rewards/margins": 0.5504390001296997,
|
5273 |
+
"rewards/rejected": -0.3575669229030609,
|
5274 |
+
"step": 3500
|
5275 |
+
},
|
5276 |
+
{
|
5277 |
+
"epoch": 17.772151898734176,
|
5278 |
+
"grad_norm": 209667.9634643516,
|
5279 |
+
"learning_rate": 5.6408649326230014e-09,
|
5280 |
+
"logits/chosen": 1.4883615970611572,
|
5281 |
+
"logits/rejected": 2.2038960456848145,
|
5282 |
+
"logps/chosen": -46.18961715698242,
|
5283 |
+
"logps/rejected": -575.4703369140625,
|
5284 |
+
"loss": 13653.9672,
|
5285 |
+
"rewards/accuracies": 1.0,
|
5286 |
+
"rewards/chosen": 0.18806029856204987,
|
5287 |
+
"rewards/margins": 0.5310976505279541,
|
5288 |
+
"rewards/rejected": -0.34303733706474304,
|
5289 |
+
"step": 3510
|
5290 |
+
},
|
5291 |
+
{
|
5292 |
+
"epoch": 17.82278481012658,
|
5293 |
+
"grad_norm": 222056.5820151951,
|
5294 |
+
"learning_rate": 4.07395800689439e-09,
|
5295 |
+
"logits/chosen": -0.582931637763977,
|
5296 |
+
"logits/rejected": -0.23906604945659637,
|
5297 |
+
"logps/chosen": -60.795921325683594,
|
5298 |
+
"logps/rejected": -590.2525024414062,
|
5299 |
+
"loss": 14149.5938,
|
5300 |
+
"rewards/accuracies": 0.949999988079071,
|
5301 |
+
"rewards/chosen": 0.19504059851169586,
|
5302 |
+
"rewards/margins": 0.5360020399093628,
|
5303 |
+
"rewards/rejected": -0.34096142649650574,
|
5304 |
+
"step": 3520
|
5305 |
+
},
|
5306 |
+
{
|
5307 |
+
"epoch": 17.873417721518987,
|
5308 |
+
"grad_norm": 213324.38139465638,
|
5309 |
+
"learning_rate": 2.5070510811657785e-09,
|
5310 |
+
"logits/chosen": -0.3791787028312683,
|
5311 |
+
"logits/rejected": 0.26259681582450867,
|
5312 |
+
"logps/chosen": -48.315147399902344,
|
5313 |
+
"logps/rejected": -579.376220703125,
|
5314 |
+
"loss": 14028.4,
|
5315 |
+
"rewards/accuracies": 1.0,
|
5316 |
+
"rewards/chosen": 0.19789119064807892,
|
5317 |
+
"rewards/margins": 0.5311988592147827,
|
5318 |
+
"rewards/rejected": -0.333307683467865,
|
5319 |
+
"step": 3530
|
5320 |
+
},
|
5321 |
+
{
|
5322 |
+
"epoch": 17.924050632911392,
|
5323 |
+
"grad_norm": 207695.40556695752,
|
5324 |
+
"learning_rate": 9.40144155437167e-10,
|
5325 |
+
"logits/chosen": 2.0871522426605225,
|
5326 |
+
"logits/rejected": 2.378633975982666,
|
5327 |
+
"logps/chosen": -36.07915115356445,
|
5328 |
+
"logps/rejected": -560.524169921875,
|
5329 |
+
"loss": 13942.7234,
|
5330 |
+
"rewards/accuracies": 0.987500011920929,
|
5331 |
+
"rewards/chosen": 0.18208447098731995,
|
5332 |
+
"rewards/margins": 0.5249064564704895,
|
5333 |
+
"rewards/rejected": -0.34282201528549194,
|
5334 |
+
"step": 3540
|
5335 |
+
},
|
5336 |
+
{
|
5337 |
+
"epoch": 18.020253164556962,
|
5338 |
+
"grad_norm": 633377.3531549113,
|
5339 |
+
"learning_rate": 2.774992165465371e-07,
|
5340 |
+
"logits/chosen": 0.778042197227478,
|
5341 |
+
"logits/rejected": 0.4570779800415039,
|
5342 |
+
"logps/chosen": -30.87795639038086,
|
5343 |
+
"logps/rejected": -562.3123168945312,
|
5344 |
+
"loss": 14823.5117,
|
5345 |
+
"rewards/accuracies": 1.0,
|
5346 |
+
"rewards/chosen": 0.19000156223773956,
|
5347 |
+
"rewards/margins": 0.5307614803314209,
|
5348 |
+
"rewards/rejected": -0.34075987339019775,
|
5349 |
+
"step": 3550
|
5350 |
+
},
|
5351 |
+
{
|
5352 |
+
"epoch": 18.070886075949367,
|
5353 |
+
"grad_norm": 536501.4949459385,
|
5354 |
+
"learning_rate": 2.767157630836728e-07,
|
5355 |
+
"logits/chosen": -1.453107476234436,
|
5356 |
+
"logits/rejected": -1.1603299379348755,
|
5357 |
+
"logps/chosen": -48.59767532348633,
|
5358 |
+
"logps/rejected": -607.5595703125,
|
5359 |
+
"loss": 14268.2156,
|
5360 |
+
"rewards/accuracies": 1.0,
|
5361 |
+
"rewards/chosen": 0.19779345393180847,
|
5362 |
+
"rewards/margins": 0.5535213351249695,
|
5363 |
+
"rewards/rejected": -0.3557279109954834,
|
5364 |
+
"step": 3560
|
5365 |
+
},
|
5366 |
+
{
|
5367 |
+
"epoch": 18.121518987341773,
|
5368 |
+
"grad_norm": 613929.4964505757,
|
5369 |
+
"learning_rate": 2.7593230962080847e-07,
|
5370 |
+
"logits/chosen": -1.0204923152923584,
|
5371 |
+
"logits/rejected": -1.006306529045105,
|
5372 |
+
"logps/chosen": -40.379703521728516,
|
5373 |
+
"logps/rejected": -586.8853759765625,
|
5374 |
+
"loss": 14124.1406,
|
5375 |
+
"rewards/accuracies": 1.0,
|
5376 |
+
"rewards/chosen": 0.18828515708446503,
|
5377 |
+
"rewards/margins": 0.5416163206100464,
|
5378 |
+
"rewards/rejected": -0.35333114862442017,
|
5379 |
+
"step": 3570
|
5380 |
+
},
|
5381 |
+
{
|
5382 |
+
"epoch": 18.172151898734178,
|
5383 |
+
"grad_norm": 453188.0208924516,
|
5384 |
+
"learning_rate": 2.751488561579442e-07,
|
5385 |
+
"logits/chosen": 0.978573203086853,
|
5386 |
+
"logits/rejected": 1.6422239542007446,
|
5387 |
+
"logps/chosen": -40.75902557373047,
|
5388 |
+
"logps/rejected": -571.6940307617188,
|
5389 |
+
"loss": 14028.8266,
|
5390 |
+
"rewards/accuracies": 1.0,
|
5391 |
+
"rewards/chosen": 0.19008655846118927,
|
5392 |
+
"rewards/margins": 0.5349593758583069,
|
5393 |
+
"rewards/rejected": -0.34487277269363403,
|
5394 |
+
"step": 3580
|
5395 |
+
},
|
5396 |
+
{
|
5397 |
+
"epoch": 18.222784810126583,
|
5398 |
+
"grad_norm": 470617.1864493106,
|
5399 |
+
"learning_rate": 2.743654026950799e-07,
|
5400 |
+
"logits/chosen": 0.612755298614502,
|
5401 |
+
"logits/rejected": 1.586531639099121,
|
5402 |
+
"logps/chosen": -47.43413162231445,
|
5403 |
+
"logps/rejected": -567.2514038085938,
|
5404 |
+
"loss": 14305.0953,
|
5405 |
+
"rewards/accuracies": 0.987500011920929,
|
5406 |
+
"rewards/chosen": 0.18671520054340363,
|
5407 |
+
"rewards/margins": 0.5188931226730347,
|
5408 |
+
"rewards/rejected": -0.33217787742614746,
|
5409 |
+
"step": 3590
|
5410 |
+
},
|
5411 |
+
{
|
5412 |
+
"epoch": 18.27341772151899,
|
5413 |
+
"grad_norm": 568328.2123455897,
|
5414 |
+
"learning_rate": 2.7358194923221564e-07,
|
5415 |
+
"logits/chosen": 2.5831315517425537,
|
5416 |
+
"logits/rejected": 2.3743977546691895,
|
5417 |
+
"logps/chosen": -36.72047805786133,
|
5418 |
+
"logps/rejected": -561.4580688476562,
|
5419 |
+
"loss": 14931.7812,
|
5420 |
+
"rewards/accuracies": 0.987500011920929,
|
5421 |
+
"rewards/chosen": 0.18578791618347168,
|
5422 |
+
"rewards/margins": 0.5219975113868713,
|
5423 |
+
"rewards/rejected": -0.33620959520339966,
|
5424 |
+
"step": 3600
|
5425 |
+
},
|
5426 |
+
{
|
5427 |
+
"epoch": 18.324050632911394,
|
5428 |
+
"grad_norm": 258649.85824251673,
|
5429 |
+
"learning_rate": 2.727984957693513e-07,
|
5430 |
+
"logits/chosen": -0.6456964612007141,
|
5431 |
+
"logits/rejected": 0.10119187831878662,
|
5432 |
+
"logps/chosen": -45.66813659667969,
|
5433 |
+
"logps/rejected": -584.33984375,
|
5434 |
+
"loss": 13962.2891,
|
5435 |
+
"rewards/accuracies": 1.0,
|
5436 |
+
"rewards/chosen": 0.19075247645378113,
|
5437 |
+
"rewards/margins": 0.5430020093917847,
|
5438 |
+
"rewards/rejected": -0.35224950313568115,
|
5439 |
+
"step": 3610
|
5440 |
+
},
|
5441 |
+
{
|
5442 |
+
"epoch": 18.374683544303796,
|
5443 |
+
"grad_norm": 523823.39531677734,
|
5444 |
+
"learning_rate": 2.72015042306487e-07,
|
5445 |
+
"logits/chosen": -0.1337634027004242,
|
5446 |
+
"logits/rejected": 0.3194190561771393,
|
5447 |
+
"logps/chosen": -43.2452278137207,
|
5448 |
+
"logps/rejected": -576.6324462890625,
|
5449 |
+
"loss": 14478.6656,
|
5450 |
+
"rewards/accuracies": 0.987500011920929,
|
5451 |
+
"rewards/chosen": 0.19283099472522736,
|
5452 |
+
"rewards/margins": 0.5422399640083313,
|
5453 |
+
"rewards/rejected": -0.34940892457962036,
|
5454 |
+
"step": 3620
|
5455 |
+
},
|
5456 |
+
{
|
5457 |
+
"epoch": 18.4253164556962,
|
5458 |
+
"grad_norm": 369527.7483340646,
|
5459 |
+
"learning_rate": 2.712315888436227e-07,
|
5460 |
+
"logits/chosen": -0.5704905390739441,
|
5461 |
+
"logits/rejected": -0.24132680892944336,
|
5462 |
+
"logps/chosen": -39.81604766845703,
|
5463 |
+
"logps/rejected": -579.3060302734375,
|
5464 |
+
"loss": 14853.9188,
|
5465 |
+
"rewards/accuracies": 1.0,
|
5466 |
+
"rewards/chosen": 0.1893097311258316,
|
5467 |
+
"rewards/margins": 0.5385677218437195,
|
5468 |
+
"rewards/rejected": -0.3492580056190491,
|
5469 |
+
"step": 3630
|
5470 |
+
},
|
5471 |
+
{
|
5472 |
+
"epoch": 18.475949367088607,
|
5473 |
+
"grad_norm": 487722.91173438437,
|
5474 |
+
"learning_rate": 2.704481353807584e-07,
|
5475 |
+
"logits/chosen": 0.30203062295913696,
|
5476 |
+
"logits/rejected": 1.367623209953308,
|
5477 |
+
"logps/chosen": -43.79780578613281,
|
5478 |
+
"logps/rejected": -575.3096313476562,
|
5479 |
+
"loss": 14337.7125,
|
5480 |
+
"rewards/accuracies": 0.9750000238418579,
|
5481 |
+
"rewards/chosen": 0.18831488490104675,
|
5482 |
+
"rewards/margins": 0.5326144099235535,
|
5483 |
+
"rewards/rejected": -0.3442995548248291,
|
5484 |
+
"step": 3640
|
5485 |
+
},
|
5486 |
+
{
|
5487 |
+
"epoch": 18.526582278481012,
|
5488 |
+
"grad_norm": 769147.1132735502,
|
5489 |
+
"learning_rate": 2.6966468191789406e-07,
|
5490 |
+
"logits/chosen": 0.5818338990211487,
|
5491 |
+
"logits/rejected": 0.8189504742622375,
|
5492 |
+
"logps/chosen": -40.80295944213867,
|
5493 |
+
"logps/rejected": -569.6201171875,
|
5494 |
+
"loss": 14414.5,
|
5495 |
+
"rewards/accuracies": 1.0,
|
5496 |
+
"rewards/chosen": 0.19092252850532532,
|
5497 |
+
"rewards/margins": 0.5284001231193542,
|
5498 |
+
"rewards/rejected": -0.3374776244163513,
|
5499 |
+
"step": 3650
|
5500 |
+
},
|
5501 |
+
{
|
5502 |
+
"epoch": 18.577215189873417,
|
5503 |
+
"grad_norm": 423741.6615039136,
|
5504 |
+
"learning_rate": 2.6888122845502977e-07,
|
5505 |
+
"logits/chosen": -2.1757419109344482,
|
5506 |
+
"logits/rejected": -1.7465986013412476,
|
5507 |
+
"logps/chosen": -33.543739318847656,
|
5508 |
+
"logps/rejected": -566.9044189453125,
|
5509 |
+
"loss": 13990.4406,
|
5510 |
+
"rewards/accuracies": 0.987500011920929,
|
5511 |
+
"rewards/chosen": 0.18767623603343964,
|
5512 |
+
"rewards/margins": 0.5356841683387756,
|
5513 |
+
"rewards/rejected": -0.3480078876018524,
|
5514 |
+
"step": 3660
|
5515 |
+
},
|
5516 |
+
{
|
5517 |
+
"epoch": 18.627848101265823,
|
5518 |
+
"grad_norm": 405282.2937016151,
|
5519 |
+
"learning_rate": 2.680977749921655e-07,
|
5520 |
+
"logits/chosen": -0.054244786500930786,
|
5521 |
+
"logits/rejected": 0.9029023051261902,
|
5522 |
+
"logps/chosen": -49.31962966918945,
|
5523 |
+
"logps/rejected": -585.48779296875,
|
5524 |
+
"loss": 14779.0125,
|
5525 |
+
"rewards/accuracies": 1.0,
|
5526 |
+
"rewards/chosen": 0.19979842007160187,
|
5527 |
+
"rewards/margins": 0.5441454648971558,
|
5528 |
+
"rewards/rejected": -0.3443470597267151,
|
5529 |
+
"step": 3670
|
5530 |
+
},
|
5531 |
+
{
|
5532 |
+
"epoch": 18.678481012658228,
|
5533 |
+
"grad_norm": 468937.7683958159,
|
5534 |
+
"learning_rate": 2.673143215293012e-07,
|
5535 |
+
"logits/chosen": -0.046643782407045364,
|
5536 |
+
"logits/rejected": -0.1421128809452057,
|
5537 |
+
"logps/chosen": -40.85643768310547,
|
5538 |
+
"logps/rejected": -577.6583862304688,
|
5539 |
+
"loss": 14531.35,
|
5540 |
+
"rewards/accuracies": 0.987500011920929,
|
5541 |
+
"rewards/chosen": 0.195206418633461,
|
5542 |
+
"rewards/margins": 0.5361508131027222,
|
5543 |
+
"rewards/rejected": -0.3409443199634552,
|
5544 |
+
"step": 3680
|
5545 |
+
},
|
5546 |
+
{
|
5547 |
+
"epoch": 18.729113924050633,
|
5548 |
+
"grad_norm": 627917.5959141933,
|
5549 |
+
"learning_rate": 2.6653086806643683e-07,
|
5550 |
+
"logits/chosen": 1.5284700393676758,
|
5551 |
+
"logits/rejected": 1.2886362075805664,
|
5552 |
+
"logps/chosen": -48.694664001464844,
|
5553 |
+
"logps/rejected": -579.7990112304688,
|
5554 |
+
"loss": 15195.4844,
|
5555 |
+
"rewards/accuracies": 0.987500011920929,
|
5556 |
+
"rewards/chosen": 0.1851346641778946,
|
5557 |
+
"rewards/margins": 0.531388521194458,
|
5558 |
+
"rewards/rejected": -0.346253901720047,
|
5559 |
+
"step": 3690
|
5560 |
+
},
|
5561 |
+
{
|
5562 |
+
"epoch": 18.77974683544304,
|
5563 |
+
"grad_norm": 511207.857422736,
|
5564 |
+
"learning_rate": 2.6574741460357254e-07,
|
5565 |
+
"logits/chosen": 0.03928997367620468,
|
5566 |
+
"logits/rejected": 0.5418666005134583,
|
5567 |
+
"logps/chosen": -50.31745529174805,
|
5568 |
+
"logps/rejected": -593.0169677734375,
|
5569 |
+
"loss": 14929.1469,
|
5570 |
+
"rewards/accuracies": 0.987500011920929,
|
5571 |
+
"rewards/chosen": 0.19712677597999573,
|
5572 |
+
"rewards/margins": 0.5447811484336853,
|
5573 |
+
"rewards/rejected": -0.3476543724536896,
|
5574 |
+
"step": 3700
|
5575 |
+
},
|
5576 |
+
{
|
5577 |
+
"epoch": 18.830379746835444,
|
5578 |
+
"grad_norm": 568133.4282182837,
|
5579 |
+
"learning_rate": 2.6496396114070825e-07,
|
5580 |
+
"logits/chosen": -0.7848063707351685,
|
5581 |
+
"logits/rejected": -0.8312255144119263,
|
5582 |
+
"logps/chosen": -39.726234436035156,
|
5583 |
+
"logps/rejected": -566.0286254882812,
|
5584 |
+
"loss": 14112.2844,
|
5585 |
+
"rewards/accuracies": 0.987500011920929,
|
5586 |
+
"rewards/chosen": 0.18878208100795746,
|
5587 |
+
"rewards/margins": 0.5250921249389648,
|
5588 |
+
"rewards/rejected": -0.3363099992275238,
|
5589 |
+
"step": 3710
|
5590 |
+
},
|
5591 |
+
{
|
5592 |
+
"epoch": 18.88101265822785,
|
5593 |
+
"grad_norm": 293062.3175283677,
|
5594 |
+
"learning_rate": 2.6418050767784395e-07,
|
5595 |
+
"logits/chosen": -0.22776488959789276,
|
5596 |
+
"logits/rejected": -0.043119143694639206,
|
5597 |
+
"logps/chosen": -47.83971405029297,
|
5598 |
+
"logps/rejected": -575.6166381835938,
|
5599 |
+
"loss": 14345.3813,
|
5600 |
+
"rewards/accuracies": 0.987500011920929,
|
5601 |
+
"rewards/chosen": 0.19344884157180786,
|
5602 |
+
"rewards/margins": 0.5261351466178894,
|
5603 |
+
"rewards/rejected": -0.33268633484840393,
|
5604 |
+
"step": 3720
|
5605 |
+
},
|
5606 |
+
{
|
5607 |
+
"epoch": 18.931645569620255,
|
5608 |
+
"grad_norm": 369584.46121245134,
|
5609 |
+
"learning_rate": 2.633970542149796e-07,
|
5610 |
+
"logits/chosen": 0.6460098028182983,
|
5611 |
+
"logits/rejected": 0.6165057420730591,
|
5612 |
+
"logps/chosen": -53.0880126953125,
|
5613 |
+
"logps/rejected": -602.0147705078125,
|
5614 |
+
"loss": 14143.9609,
|
5615 |
+
"rewards/accuracies": 1.0,
|
5616 |
+
"rewards/chosen": 0.19915179908275604,
|
5617 |
+
"rewards/margins": 0.5483053922653198,
|
5618 |
+
"rewards/rejected": -0.3491537272930145,
|
5619 |
+
"step": 3730
|
5620 |
+
},
|
5621 |
+
{
|
5622 |
+
"epoch": 18.98227848101266,
|
5623 |
+
"grad_norm": 328959.5337312854,
|
5624 |
+
"learning_rate": 2.626136007521153e-07,
|
5625 |
+
"logits/chosen": 0.25958794355392456,
|
5626 |
+
"logits/rejected": 0.5823850631713867,
|
5627 |
+
"logps/chosen": -49.16436004638672,
|
5628 |
+
"logps/rejected": -584.5070190429688,
|
5629 |
+
"loss": 14187.4844,
|
5630 |
+
"rewards/accuracies": 0.987500011920929,
|
5631 |
+
"rewards/chosen": 0.19965310394763947,
|
5632 |
+
"rewards/margins": 0.5432143211364746,
|
5633 |
+
"rewards/rejected": -0.34356123208999634,
|
5634 |
+
"step": 3740
|
5635 |
+
},
|
5636 |
+
{
|
5637 |
+
"epoch": 19.03291139240506,
|
5638 |
+
"grad_norm": 1626740.8696455131,
|
5639 |
+
"learning_rate": 2.61830147289251e-07,
|
5640 |
+
"logits/chosen": -0.6601130366325378,
|
5641 |
+
"logits/rejected": -0.8405634164810181,
|
5642 |
+
"logps/chosen": -46.10778045654297,
|
5643 |
+
"logps/rejected": -587.3377685546875,
|
5644 |
+
"loss": 14051.6469,
|
5645 |
+
"rewards/accuracies": 0.987500011920929,
|
5646 |
+
"rewards/chosen": 0.1946493685245514,
|
5647 |
+
"rewards/margins": 0.54271399974823,
|
5648 |
+
"rewards/rejected": -0.34806469082832336,
|
5649 |
+
"step": 3750
|
5650 |
+
},
|
5651 |
+
{
|
5652 |
+
"epoch": 19.083544303797467,
|
5653 |
+
"grad_norm": 786920.4959477714,
|
5654 |
+
"learning_rate": 2.610466938263867e-07,
|
5655 |
+
"logits/chosen": 0.280475914478302,
|
5656 |
+
"logits/rejected": 1.5355632305145264,
|
5657 |
+
"logps/chosen": -40.83550262451172,
|
5658 |
+
"logps/rejected": -576.2233276367188,
|
5659 |
+
"loss": 14524.6172,
|
5660 |
+
"rewards/accuracies": 0.987500011920929,
|
5661 |
+
"rewards/chosen": 0.19496676325798035,
|
5662 |
+
"rewards/margins": 0.5368971228599548,
|
5663 |
+
"rewards/rejected": -0.3419303297996521,
|
5664 |
+
"step": 3760
|
5665 |
+
},
|
5666 |
+
{
|
5667 |
+
"epoch": 19.134177215189872,
|
5668 |
+
"grad_norm": 670222.9584254185,
|
5669 |
+
"learning_rate": 2.602632403635224e-07,
|
5670 |
+
"logits/chosen": 1.6073856353759766,
|
5671 |
+
"logits/rejected": 2.1679255962371826,
|
5672 |
+
"logps/chosen": -48.07741928100586,
|
5673 |
+
"logps/rejected": -568.386962890625,
|
5674 |
+
"loss": 16064.1922,
|
5675 |
+
"rewards/accuracies": 0.987500011920929,
|
5676 |
+
"rewards/chosen": 0.19028018414974213,
|
5677 |
+
"rewards/margins": 0.5232519507408142,
|
5678 |
+
"rewards/rejected": -0.3329717516899109,
|
5679 |
+
"step": 3770
|
5680 |
+
},
|
5681 |
+
{
|
5682 |
+
"epoch": 19.184810126582278,
|
5683 |
+
"grad_norm": 779401.4265683588,
|
5684 |
+
"learning_rate": 2.594797869006581e-07,
|
5685 |
+
"logits/chosen": -1.2690767049789429,
|
5686 |
+
"logits/rejected": -0.7741214036941528,
|
5687 |
+
"logps/chosen": -35.147666931152344,
|
5688 |
+
"logps/rejected": -588.05810546875,
|
5689 |
+
"loss": 14594.675,
|
5690 |
+
"rewards/accuracies": 1.0,
|
5691 |
+
"rewards/chosen": 0.20034465193748474,
|
5692 |
+
"rewards/margins": 0.5537833571434021,
|
5693 |
+
"rewards/rejected": -0.3534386456012726,
|
5694 |
+
"step": 3780
|
5695 |
+
},
|
5696 |
+
{
|
5697 |
+
"epoch": 19.235443037974683,
|
5698 |
+
"grad_norm": 677896.0436831466,
|
5699 |
+
"learning_rate": 2.586963334377938e-07,
|
5700 |
+
"logits/chosen": 0.381600558757782,
|
5701 |
+
"logits/rejected": 0.3627360761165619,
|
5702 |
+
"logps/chosen": -47.129329681396484,
|
5703 |
+
"logps/rejected": -583.4297485351562,
|
5704 |
+
"loss": 14673.1125,
|
5705 |
+
"rewards/accuracies": 1.0,
|
5706 |
+
"rewards/chosen": 0.19634023308753967,
|
5707 |
+
"rewards/margins": 0.5409786105155945,
|
5708 |
+
"rewards/rejected": -0.3446383774280548,
|
5709 |
+
"step": 3790
|
5710 |
+
},
|
5711 |
+
{
|
5712 |
+
"epoch": 19.28607594936709,
|
5713 |
+
"grad_norm": 1708590.8406628803,
|
5714 |
+
"learning_rate": 2.579128799749295e-07,
|
5715 |
+
"logits/chosen": -0.6463128924369812,
|
5716 |
+
"logits/rejected": -0.1966671198606491,
|
5717 |
+
"logps/chosen": -51.58148193359375,
|
5718 |
+
"logps/rejected": -571.8802490234375,
|
5719 |
+
"loss": 14855.8094,
|
5720 |
+
"rewards/accuracies": 0.987500011920929,
|
5721 |
+
"rewards/chosen": 0.1947019398212433,
|
5722 |
+
"rewards/margins": 0.5256696343421936,
|
5723 |
+
"rewards/rejected": -0.3309677243232727,
|
5724 |
+
"step": 3800
|
5725 |
+
},
|
5726 |
+
{
|
5727 |
+
"epoch": 19.336708860759494,
|
5728 |
+
"grad_norm": 906394.5199246205,
|
5729 |
+
"learning_rate": 2.5712942651206515e-07,
|
5730 |
+
"logits/chosen": 0.6537224054336548,
|
5731 |
+
"logits/rejected": 1.356911301612854,
|
5732 |
+
"logps/chosen": -37.791786193847656,
|
5733 |
+
"logps/rejected": -541.84912109375,
|
5734 |
+
"loss": 14494.675,
|
5735 |
+
"rewards/accuracies": 0.9750000238418579,
|
5736 |
+
"rewards/chosen": 0.18946874141693115,
|
5737 |
+
"rewards/margins": 0.5098165273666382,
|
5738 |
+
"rewards/rejected": -0.3203478455543518,
|
5739 |
+
"step": 3810
|
5740 |
+
},
|
5741 |
+
{
|
5742 |
+
"epoch": 19.3873417721519,
|
5743 |
+
"grad_norm": 1248788.3894635146,
|
5744 |
+
"learning_rate": 2.5634597304920085e-07,
|
5745 |
+
"logits/chosen": -1.4148962497711182,
|
5746 |
+
"logits/rejected": -0.616938591003418,
|
5747 |
+
"logps/chosen": -39.15003204345703,
|
5748 |
+
"logps/rejected": -567.9779052734375,
|
5749 |
+
"loss": 14511.9828,
|
5750 |
+
"rewards/accuracies": 1.0,
|
5751 |
+
"rewards/chosen": 0.19468382000923157,
|
5752 |
+
"rewards/margins": 0.5306459665298462,
|
5753 |
+
"rewards/rejected": -0.33596211671829224,
|
5754 |
+
"step": 3820
|
5755 |
+
},
|
5756 |
+
{
|
5757 |
+
"epoch": 19.437974683544304,
|
5758 |
+
"grad_norm": 699507.4776687805,
|
5759 |
+
"learning_rate": 2.5556251958633656e-07,
|
5760 |
+
"logits/chosen": -0.786666214466095,
|
5761 |
+
"logits/rejected": -0.8524150848388672,
|
5762 |
+
"logps/chosen": -37.31165313720703,
|
5763 |
+
"logps/rejected": -559.5811767578125,
|
5764 |
+
"loss": 15226.8953,
|
5765 |
+
"rewards/accuracies": 0.987500011920929,
|
5766 |
+
"rewards/chosen": 0.18610945343971252,
|
5767 |
+
"rewards/margins": 0.5259476900100708,
|
5768 |
+
"rewards/rejected": -0.3398382067680359,
|
5769 |
+
"step": 3830
|
5770 |
+
},
|
5771 |
+
{
|
5772 |
+
"epoch": 19.48860759493671,
|
5773 |
+
"grad_norm": 750946.845865734,
|
5774 |
+
"learning_rate": 2.5477906612347227e-07,
|
5775 |
+
"logits/chosen": -0.5914249420166016,
|
5776 |
+
"logits/rejected": -0.1790940761566162,
|
5777 |
+
"logps/chosen": -41.875919342041016,
|
5778 |
+
"logps/rejected": -580.2433471679688,
|
5779 |
+
"loss": 15077.0078,
|
5780 |
+
"rewards/accuracies": 1.0,
|
5781 |
+
"rewards/chosen": 0.1914350688457489,
|
5782 |
+
"rewards/margins": 0.5402361154556274,
|
5783 |
+
"rewards/rejected": -0.34880098700523376,
|
5784 |
+
"step": 3840
|
5785 |
+
},
|
5786 |
+
{
|
5787 |
+
"epoch": 19.539240506329115,
|
5788 |
+
"grad_norm": 1438213.362152031,
|
5789 |
+
"learning_rate": 2.539956126606079e-07,
|
5790 |
+
"logits/chosen": -1.4764426946640015,
|
5791 |
+
"logits/rejected": -1.0852867364883423,
|
5792 |
+
"logps/chosen": -45.64619064331055,
|
5793 |
+
"logps/rejected": -574.2334594726562,
|
5794 |
+
"loss": 15001.6922,
|
5795 |
+
"rewards/accuracies": 0.9750000238418579,
|
5796 |
+
"rewards/chosen": 0.19227740168571472,
|
5797 |
+
"rewards/margins": 0.527544379234314,
|
5798 |
+
"rewards/rejected": -0.33526697754859924,
|
5799 |
+
"step": 3850
|
5800 |
+
},
|
5801 |
+
{
|
5802 |
+
"epoch": 19.58987341772152,
|
5803 |
+
"grad_norm": 1015656.6585732017,
|
5804 |
+
"learning_rate": 2.532121591977436e-07,
|
5805 |
+
"logits/chosen": 0.0265532024204731,
|
5806 |
+
"logits/rejected": 0.4305901527404785,
|
5807 |
+
"logps/chosen": -40.221275329589844,
|
5808 |
+
"logps/rejected": -580.7332763671875,
|
5809 |
+
"loss": 15005.4062,
|
5810 |
+
"rewards/accuracies": 0.987500011920929,
|
5811 |
+
"rewards/chosen": 0.1900371015071869,
|
5812 |
+
"rewards/margins": 0.5353468656539917,
|
5813 |
+
"rewards/rejected": -0.3453097939491272,
|
5814 |
+
"step": 3860
|
5815 |
+
},
|
5816 |
+
{
|
5817 |
+
"epoch": 19.640506329113926,
|
5818 |
+
"grad_norm": 1480021.6334817603,
|
5819 |
+
"learning_rate": 2.5242870573487933e-07,
|
5820 |
+
"logits/chosen": -2.3115265369415283,
|
5821 |
+
"logits/rejected": -1.9450628757476807,
|
5822 |
+
"logps/chosen": -43.31880187988281,
|
5823 |
+
"logps/rejected": -592.5906372070312,
|
5824 |
+
"loss": 14681.3906,
|
5825 |
+
"rewards/accuracies": 1.0,
|
5826 |
+
"rewards/chosen": 0.20574085414409637,
|
5827 |
+
"rewards/margins": 0.5489095449447632,
|
5828 |
+
"rewards/rejected": -0.3431686758995056,
|
5829 |
+
"step": 3870
|
5830 |
+
},
|
5831 |
+
{
|
5832 |
+
"epoch": 19.691139240506327,
|
5833 |
+
"grad_norm": 652464.7916313735,
|
5834 |
+
"learning_rate": 2.5164525227201504e-07,
|
5835 |
+
"logits/chosen": 0.63951176404953,
|
5836 |
+
"logits/rejected": 1.3804535865783691,
|
5837 |
+
"logps/chosen": -33.52408981323242,
|
5838 |
+
"logps/rejected": -556.9231567382812,
|
5839 |
+
"loss": 15124.1328,
|
5840 |
+
"rewards/accuracies": 1.0,
|
5841 |
+
"rewards/chosen": 0.1876874566078186,
|
5842 |
+
"rewards/margins": 0.5311328172683716,
|
5843 |
+
"rewards/rejected": -0.343445360660553,
|
5844 |
+
"step": 3880
|
5845 |
+
},
|
5846 |
+
{
|
5847 |
+
"epoch": 19.741772151898733,
|
5848 |
+
"grad_norm": 697435.0328174214,
|
5849 |
+
"learning_rate": 2.508617988091507e-07,
|
5850 |
+
"logits/chosen": -1.7422069311141968,
|
5851 |
+
"logits/rejected": -1.3413903713226318,
|
5852 |
+
"logps/chosen": -42.224788665771484,
|
5853 |
+
"logps/rejected": -584.3877563476562,
|
5854 |
+
"loss": 15307.875,
|
5855 |
+
"rewards/accuracies": 1.0,
|
5856 |
+
"rewards/chosen": 0.2013184279203415,
|
5857 |
+
"rewards/margins": 0.5388418436050415,
|
5858 |
+
"rewards/rejected": -0.3375234305858612,
|
5859 |
+
"step": 3890
|
5860 |
+
},
|
5861 |
+
{
|
5862 |
+
"epoch": 19.792405063291138,
|
5863 |
+
"grad_norm": 680395.3386900029,
|
5864 |
+
"learning_rate": 2.500783453462864e-07,
|
5865 |
+
"logits/chosen": 0.6034026741981506,
|
5866 |
+
"logits/rejected": 1.1066893339157104,
|
5867 |
+
"logps/chosen": -39.37774658203125,
|
5868 |
+
"logps/rejected": -585.9308471679688,
|
5869 |
+
"loss": 15434.6031,
|
5870 |
+
"rewards/accuracies": 1.0,
|
5871 |
+
"rewards/chosen": 0.20041151344776154,
|
5872 |
+
"rewards/margins": 0.545585036277771,
|
5873 |
+
"rewards/rejected": -0.34517353773117065,
|
5874 |
+
"step": 3900
|
5875 |
+
},
|
5876 |
+
{
|
5877 |
+
"epoch": 19.843037974683543,
|
5878 |
+
"grad_norm": 1036480.9072027011,
|
5879 |
+
"learning_rate": 2.492948918834221e-07,
|
5880 |
+
"logits/chosen": -0.37202078104019165,
|
5881 |
+
"logits/rejected": -0.6633853316307068,
|
5882 |
+
"logps/chosen": -50.845218658447266,
|
5883 |
+
"logps/rejected": -565.1788330078125,
|
5884 |
+
"loss": 14732.9813,
|
5885 |
+
"rewards/accuracies": 0.949999988079071,
|
5886 |
+
"rewards/chosen": 0.1860923022031784,
|
5887 |
+
"rewards/margins": 0.5141801834106445,
|
5888 |
+
"rewards/rejected": -0.32808783650398254,
|
5889 |
+
"step": 3910
|
5890 |
+
},
|
5891 |
+
{
|
5892 |
+
"epoch": 19.89367088607595,
|
5893 |
+
"grad_norm": 960769.0916438915,
|
5894 |
+
"learning_rate": 2.485114384205578e-07,
|
5895 |
+
"logits/chosen": -2.4451048374176025,
|
5896 |
+
"logits/rejected": -1.8602224588394165,
|
5897 |
+
"logps/chosen": -49.44445037841797,
|
5898 |
+
"logps/rejected": -588.972900390625,
|
5899 |
+
"loss": 14954.0281,
|
5900 |
+
"rewards/accuracies": 0.987500011920929,
|
5901 |
+
"rewards/chosen": 0.20443923771381378,
|
5902 |
+
"rewards/margins": 0.5393208265304565,
|
5903 |
+
"rewards/rejected": -0.33488157391548157,
|
5904 |
+
"step": 3920
|
5905 |
+
},
|
5906 |
+
{
|
5907 |
+
"epoch": 19.944303797468354,
|
5908 |
+
"grad_norm": 637831.6626185304,
|
5909 |
+
"learning_rate": 2.477279849576935e-07,
|
5910 |
+
"logits/chosen": -1.1525195837020874,
|
5911 |
+
"logits/rejected": -0.6883751153945923,
|
5912 |
+
"logps/chosen": -37.11662673950195,
|
5913 |
+
"logps/rejected": -576.5172729492188,
|
5914 |
+
"loss": 14910.0938,
|
5915 |
+
"rewards/accuracies": 0.987500011920929,
|
5916 |
+
"rewards/chosen": 0.20432814955711365,
|
5917 |
+
"rewards/margins": 0.5393826961517334,
|
5918 |
+
"rewards/rejected": -0.33505457639694214,
|
5919 |
+
"step": 3930
|
5920 |
+
},
|
5921 |
+
{
|
5922 |
+
"epoch": 19.99493670886076,
|
5923 |
+
"grad_norm": 926025.3002487151,
|
5924 |
+
"learning_rate": 2.4694453149482917e-07,
|
5925 |
+
"logits/chosen": 0.10631950944662094,
|
5926 |
+
"logits/rejected": 0.8977824449539185,
|
5927 |
+
"logps/chosen": -43.12347412109375,
|
5928 |
+
"logps/rejected": -561.4703369140625,
|
5929 |
+
"loss": 15041.1219,
|
5930 |
+
"rewards/accuracies": 0.9624999761581421,
|
5931 |
+
"rewards/chosen": 0.18227019906044006,
|
5932 |
+
"rewards/margins": 0.5218333005905151,
|
5933 |
+
"rewards/rejected": -0.3395631015300751,
|
5934 |
+
"step": 3940
|
5935 |
}
|
5936 |
],
|
5937 |
"logging_steps": 10,
|
5938 |
+
"max_steps": 7092,
|
5939 |
"num_input_tokens_seen": 0,
|
5940 |
+
"num_train_epochs": 36,
|
5941 |
"save_steps": 500,
|
5942 |
"stateful_callbacks": {
|
5943 |
"TrainerControl": {
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32798bab9af49bccdac87f37019ce9013d05d2970814716fe899a3a90e4f5fcb
|
3 |
size 6584
|