cat-searcher
commited on
Training in progress, epoch 8, checkpoint
Browse files- last-checkpoint/global_step1777/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1777/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1777/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1777/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1777/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1777/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1777/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1777/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1777/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1777/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1777/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1777/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1777/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1777/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1777/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1777/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step1777/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8396550322b4e5b4b248b3598bbd612faf1fa0c5ec2263b91351b58d2ba6952a
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1777/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6758aacbace36eabc0e4345305942e2e83f6723c45f46b208d8cd0ee5af70eda
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1777/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:563d893311f93f922464265e39a4485a399038fc2e0efcb1c1d66325071fb85b
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1777/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea1cc6ab202a983deb94fa9cb07564d7a48cf8206762ab91af259355e0edfe53
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1777/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fe1dae4d2a047cd117a16e7741262e6b1f2c690ec0bc3ed4b2a053ec1b2806b
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1777/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90f7c18ec1fef487b9877248fac51b28d61d68af11a1fc503a4618502cfb4351
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1777/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:064b00e29d06a308d96db35ae527cdea3075421480545e44a2d07675a1115fa0
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1777/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9711cb2e001c562b5182d857e9e4f2bd198074c71345145a59801f3faac5a30
|
3 |
+
size 2506176112
|
last-checkpoint/global_step1777/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66a17f2748e0bf3a44e1eab236c57b741dfee963ce1997c03a9689ee0b8108ae
|
3 |
+
size 85570
|
last-checkpoint/global_step1777/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af2eb59000d01d77dae6afc2546735ca79ac1991df6a3a6f884f898192208e53
|
3 |
+
size 85506
|
last-checkpoint/global_step1777/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:376a8bbbe87d667a215c3706defabaf7ebe278616df6331e686b97721c8c3419
|
3 |
+
size 85506
|
last-checkpoint/global_step1777/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58007c191c01ab6ebb4123e973b76acca0b2e62aea4d42bc9e4f4b51b617cb0f
|
3 |
+
size 85506
|
last-checkpoint/global_step1777/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c27c952002e84236152e3f78661d14b975ebf305403311acad0ce73378b70aa2
|
3 |
+
size 85506
|
last-checkpoint/global_step1777/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f62c6ac1739dfb92af138a4fc3a2f3c6715a75287f68e525d1754abbba8e1a57
|
3 |
+
size 85506
|
last-checkpoint/global_step1777/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a45608a3414ad4258d01c2326f2238358551e58f5db9632e92aeb80934a65627
|
3 |
+
size 85506
|
last-checkpoint/global_step1777/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0306d65d9d07722ebfb6b2eb4d41ef08e69a70e64d0d995f29f8611a6959b1fb
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1777
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09b755aaa0de9a9d7be5dd7cc1cf82ccedd0ac145120aec2032a624323902370
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef4f1316aa95047dba9bf5e25ec454561d74e6768f37069512a5b410ac5fb8ad
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae9162e03c562553a5d9d13120f544d3c47ea71bb39aa44e18253675e17ed4a4
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4809456871b3a40c8db7e0926a9db11b01149a1d483fb29b16fc69dabaf36c6f
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bb6bcf25ff148b74eea7dd4895fc42e9433538fff5d75f0d2ae6cb0c2fdadf0
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f00ea04cd1a52c539d9cc948ac8a04676d6b99702acd09149565f781806f63f
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5571fb2fc1b413792b01ac691c759786855573992bab1d14875faccdaf8c881e
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59019ba23ead9c15851cb4349397254458ce50ea3c2987090404f4f3842c6d8f
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45fdffda57fda4a555da7a5de6fc6ec7324e0dae048b92519af6c4f6a1bc7412
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62fb2c13e63aba83c4505fae1639f79a33853d8f1bebe20cecb73bf53c8e7c46
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e16985aaf4cce287f446385c2d8f7c8409907ca0803309b7f28917440fa9de11
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 8.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2392,6 +2392,291 @@
|
|
2392 |
"rewards/margins": 0.42148295044898987,
|
2393 |
"rewards/rejected": -0.2844696640968323,
|
2394 |
"step": 1580
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2395 |
}
|
2396 |
],
|
2397 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.99746835443038,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 1777,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2392 |
"rewards/margins": 0.42148295044898987,
|
2393 |
"rewards/rejected": -0.2844696640968323,
|
2394 |
"step": 1580
|
2395 |
+
},
|
2396 |
+
{
|
2397 |
+
"epoch": 8.050632911392405,
|
2398 |
+
"grad_norm": 1502727.0577222395,
|
2399 |
+
"learning_rate": 3.064869946725164e-07,
|
2400 |
+
"logits/chosen": -2.0656542778015137,
|
2401 |
+
"logits/rejected": -1.5985521078109741,
|
2402 |
+
"logps/chosen": -84.60444641113281,
|
2403 |
+
"logps/rejected": -520.1857299804688,
|
2404 |
+
"loss": 24723.275,
|
2405 |
+
"rewards/accuracies": 0.9750000238418579,
|
2406 |
+
"rewards/chosen": 0.1492975652217865,
|
2407 |
+
"rewards/margins": 0.4404692053794861,
|
2408 |
+
"rewards/rejected": -0.2911716103553772,
|
2409 |
+
"step": 1590
|
2410 |
+
},
|
2411 |
+
{
|
2412 |
+
"epoch": 8.10126582278481,
|
2413 |
+
"grad_norm": 838369.9468876831,
|
2414 |
+
"learning_rate": 3.049200877467878e-07,
|
2415 |
+
"logits/chosen": -1.758178949356079,
|
2416 |
+
"logits/rejected": -0.7727742791175842,
|
2417 |
+
"logps/chosen": -83.45867919921875,
|
2418 |
+
"logps/rejected": -530.3883666992188,
|
2419 |
+
"loss": 25817.0203,
|
2420 |
+
"rewards/accuracies": 1.0,
|
2421 |
+
"rewards/chosen": 0.14538443088531494,
|
2422 |
+
"rewards/margins": 0.45367687940597534,
|
2423 |
+
"rewards/rejected": -0.3082924485206604,
|
2424 |
+
"step": 1600
|
2425 |
+
},
|
2426 |
+
{
|
2427 |
+
"epoch": 8.151898734177216,
|
2428 |
+
"grad_norm": 1012852.54550217,
|
2429 |
+
"learning_rate": 3.0335318082105923e-07,
|
2430 |
+
"logits/chosen": -2.217496156692505,
|
2431 |
+
"logits/rejected": -2.0143866539001465,
|
2432 |
+
"logps/chosen": -100.38580322265625,
|
2433 |
+
"logps/rejected": -549.8438720703125,
|
2434 |
+
"loss": 25090.8891,
|
2435 |
+
"rewards/accuracies": 0.987500011920929,
|
2436 |
+
"rewards/chosen": 0.13634233176708221,
|
2437 |
+
"rewards/margins": 0.44348135590553284,
|
2438 |
+
"rewards/rejected": -0.30713900923728943,
|
2439 |
+
"step": 1610
|
2440 |
+
},
|
2441 |
+
{
|
2442 |
+
"epoch": 8.20253164556962,
|
2443 |
+
"grad_norm": 1056784.1797241461,
|
2444 |
+
"learning_rate": 3.0178627389533064e-07,
|
2445 |
+
"logits/chosen": -1.1953948736190796,
|
2446 |
+
"logits/rejected": -0.2751680910587311,
|
2447 |
+
"logps/chosen": -89.64523315429688,
|
2448 |
+
"logps/rejected": -510.4059143066406,
|
2449 |
+
"loss": 24456.725,
|
2450 |
+
"rewards/accuracies": 0.9750000238418579,
|
2451 |
+
"rewards/chosen": 0.14029642939567566,
|
2452 |
+
"rewards/margins": 0.4281511902809143,
|
2453 |
+
"rewards/rejected": -0.28785476088523865,
|
2454 |
+
"step": 1620
|
2455 |
+
},
|
2456 |
+
{
|
2457 |
+
"epoch": 8.253164556962025,
|
2458 |
+
"grad_norm": 1147595.1251004518,
|
2459 |
+
"learning_rate": 3.00219366969602e-07,
|
2460 |
+
"logits/chosen": -2.550518035888672,
|
2461 |
+
"logits/rejected": -2.5027434825897217,
|
2462 |
+
"logps/chosen": -76.6513442993164,
|
2463 |
+
"logps/rejected": -524.4201049804688,
|
2464 |
+
"loss": 23486.5594,
|
2465 |
+
"rewards/accuracies": 0.987500011920929,
|
2466 |
+
"rewards/chosen": 0.15493164956569672,
|
2467 |
+
"rewards/margins": 0.44891220331192017,
|
2468 |
+
"rewards/rejected": -0.29398053884506226,
|
2469 |
+
"step": 1630
|
2470 |
+
},
|
2471 |
+
{
|
2472 |
+
"epoch": 8.30379746835443,
|
2473 |
+
"grad_norm": 1390175.0732444616,
|
2474 |
+
"learning_rate": 2.986524600438734e-07,
|
2475 |
+
"logits/chosen": -0.059876419603824615,
|
2476 |
+
"logits/rejected": 0.00422248849645257,
|
2477 |
+
"logps/chosen": -74.77996063232422,
|
2478 |
+
"logps/rejected": -544.7862548828125,
|
2479 |
+
"loss": 24176.6094,
|
2480 |
+
"rewards/accuracies": 1.0,
|
2481 |
+
"rewards/chosen": 0.151381716132164,
|
2482 |
+
"rewards/margins": 0.4694734215736389,
|
2483 |
+
"rewards/rejected": -0.3180916905403137,
|
2484 |
+
"step": 1640
|
2485 |
+
},
|
2486 |
+
{
|
2487 |
+
"epoch": 8.354430379746836,
|
2488 |
+
"grad_norm": 1846159.1203677754,
|
2489 |
+
"learning_rate": 2.970855531181448e-07,
|
2490 |
+
"logits/chosen": -3.206434726715088,
|
2491 |
+
"logits/rejected": -2.6545357704162598,
|
2492 |
+
"logps/chosen": -79.13458251953125,
|
2493 |
+
"logps/rejected": -529.1912841796875,
|
2494 |
+
"loss": 25560.5344,
|
2495 |
+
"rewards/accuracies": 0.987500011920929,
|
2496 |
+
"rewards/chosen": 0.14862783253192902,
|
2497 |
+
"rewards/margins": 0.4489147663116455,
|
2498 |
+
"rewards/rejected": -0.3002868890762329,
|
2499 |
+
"step": 1650
|
2500 |
+
},
|
2501 |
+
{
|
2502 |
+
"epoch": 8.405063291139241,
|
2503 |
+
"grad_norm": 1294602.7153889702,
|
2504 |
+
"learning_rate": 2.955186461924162e-07,
|
2505 |
+
"logits/chosen": -1.0581172704696655,
|
2506 |
+
"logits/rejected": -0.6744507551193237,
|
2507 |
+
"logps/chosen": -78.69017028808594,
|
2508 |
+
"logps/rejected": -526.4840087890625,
|
2509 |
+
"loss": 25549.9125,
|
2510 |
+
"rewards/accuracies": 0.9750000238418579,
|
2511 |
+
"rewards/chosen": 0.14595063030719757,
|
2512 |
+
"rewards/margins": 0.44837069511413574,
|
2513 |
+
"rewards/rejected": -0.302420049905777,
|
2514 |
+
"step": 1660
|
2515 |
+
},
|
2516 |
+
{
|
2517 |
+
"epoch": 8.455696202531646,
|
2518 |
+
"grad_norm": 1653521.5239311927,
|
2519 |
+
"learning_rate": 2.9395173926668755e-07,
|
2520 |
+
"logits/chosen": -0.9036309123039246,
|
2521 |
+
"logits/rejected": -0.16554176807403564,
|
2522 |
+
"logps/chosen": -83.71012878417969,
|
2523 |
+
"logps/rejected": -525.7719116210938,
|
2524 |
+
"loss": 25089.5516,
|
2525 |
+
"rewards/accuracies": 1.0,
|
2526 |
+
"rewards/chosen": 0.14826878905296326,
|
2527 |
+
"rewards/margins": 0.4438709616661072,
|
2528 |
+
"rewards/rejected": -0.2956022024154663,
|
2529 |
+
"step": 1670
|
2530 |
+
},
|
2531 |
+
{
|
2532 |
+
"epoch": 8.50632911392405,
|
2533 |
+
"grad_norm": 1371497.4089594388,
|
2534 |
+
"learning_rate": 2.9238483234095896e-07,
|
2535 |
+
"logits/chosen": -1.423182725906372,
|
2536 |
+
"logits/rejected": -1.0717556476593018,
|
2537 |
+
"logps/chosen": -89.4638671875,
|
2538 |
+
"logps/rejected": -577.1199340820312,
|
2539 |
+
"loss": 24558.0953,
|
2540 |
+
"rewards/accuracies": 1.0,
|
2541 |
+
"rewards/chosen": 0.15898647904396057,
|
2542 |
+
"rewards/margins": 0.48913446068763733,
|
2543 |
+
"rewards/rejected": -0.330147922039032,
|
2544 |
+
"step": 1680
|
2545 |
+
},
|
2546 |
+
{
|
2547 |
+
"epoch": 8.556962025316455,
|
2548 |
+
"grad_norm": 1476867.0955964676,
|
2549 |
+
"learning_rate": 2.908179254152303e-07,
|
2550 |
+
"logits/chosen": -3.2004425525665283,
|
2551 |
+
"logits/rejected": -2.7161200046539307,
|
2552 |
+
"logps/chosen": -86.7264633178711,
|
2553 |
+
"logps/rejected": -543.3889770507812,
|
2554 |
+
"loss": 26642.4781,
|
2555 |
+
"rewards/accuracies": 1.0,
|
2556 |
+
"rewards/chosen": 0.1485292911529541,
|
2557 |
+
"rewards/margins": 0.4551934599876404,
|
2558 |
+
"rewards/rejected": -0.3066641688346863,
|
2559 |
+
"step": 1690
|
2560 |
+
},
|
2561 |
+
{
|
2562 |
+
"epoch": 8.60759493670886,
|
2563 |
+
"grad_norm": 1134090.4892000444,
|
2564 |
+
"learning_rate": 2.8925101848950173e-07,
|
2565 |
+
"logits/chosen": -0.274528443813324,
|
2566 |
+
"logits/rejected": 0.4862538278102875,
|
2567 |
+
"logps/chosen": -79.16570281982422,
|
2568 |
+
"logps/rejected": -513.53173828125,
|
2569 |
+
"loss": 23741.9938,
|
2570 |
+
"rewards/accuracies": 0.9624999761581421,
|
2571 |
+
"rewards/chosen": 0.15034614503383636,
|
2572 |
+
"rewards/margins": 0.43597039580345154,
|
2573 |
+
"rewards/rejected": -0.28562426567077637,
|
2574 |
+
"step": 1700
|
2575 |
+
},
|
2576 |
+
{
|
2577 |
+
"epoch": 8.658227848101266,
|
2578 |
+
"grad_norm": 1314089.2981008843,
|
2579 |
+
"learning_rate": 2.876841115637731e-07,
|
2580 |
+
"logits/chosen": 0.6013806462287903,
|
2581 |
+
"logits/rejected": 1.2335985898971558,
|
2582 |
+
"logps/chosen": -90.46197509765625,
|
2583 |
+
"logps/rejected": -551.8345947265625,
|
2584 |
+
"loss": 24216.4281,
|
2585 |
+
"rewards/accuracies": 1.0,
|
2586 |
+
"rewards/chosen": 0.1541350781917572,
|
2587 |
+
"rewards/margins": 0.47102633118629456,
|
2588 |
+
"rewards/rejected": -0.3168913424015045,
|
2589 |
+
"step": 1710
|
2590 |
+
},
|
2591 |
+
{
|
2592 |
+
"epoch": 8.708860759493671,
|
2593 |
+
"grad_norm": 1622019.967143891,
|
2594 |
+
"learning_rate": 2.861172046380445e-07,
|
2595 |
+
"logits/chosen": 0.2407432496547699,
|
2596 |
+
"logits/rejected": 0.4264713227748871,
|
2597 |
+
"logps/chosen": -93.0431900024414,
|
2598 |
+
"logps/rejected": -564.0677490234375,
|
2599 |
+
"loss": 23649.3016,
|
2600 |
+
"rewards/accuracies": 1.0,
|
2601 |
+
"rewards/chosen": 0.147947758436203,
|
2602 |
+
"rewards/margins": 0.4662678837776184,
|
2603 |
+
"rewards/rejected": -0.3183201253414154,
|
2604 |
+
"step": 1720
|
2605 |
+
},
|
2606 |
+
{
|
2607 |
+
"epoch": 8.759493670886076,
|
2608 |
+
"grad_norm": 1520791.345848389,
|
2609 |
+
"learning_rate": 2.8455029771231586e-07,
|
2610 |
+
"logits/chosen": 0.6626393795013428,
|
2611 |
+
"logits/rejected": 0.7864507436752319,
|
2612 |
+
"logps/chosen": -94.95128631591797,
|
2613 |
+
"logps/rejected": -540.1358642578125,
|
2614 |
+
"loss": 25224.3125,
|
2615 |
+
"rewards/accuracies": 0.987500011920929,
|
2616 |
+
"rewards/chosen": 0.14551883935928345,
|
2617 |
+
"rewards/margins": 0.4529417157173157,
|
2618 |
+
"rewards/rejected": -0.3074227571487427,
|
2619 |
+
"step": 1730
|
2620 |
+
},
|
2621 |
+
{
|
2622 |
+
"epoch": 8.810126582278482,
|
2623 |
+
"grad_norm": 1625465.2135884068,
|
2624 |
+
"learning_rate": 2.8298339078658727e-07,
|
2625 |
+
"logits/chosen": -0.07786345481872559,
|
2626 |
+
"logits/rejected": -0.031427524983882904,
|
2627 |
+
"logps/chosen": -90.72882843017578,
|
2628 |
+
"logps/rejected": -539.1676025390625,
|
2629 |
+
"loss": 24133.7531,
|
2630 |
+
"rewards/accuracies": 0.987500011920929,
|
2631 |
+
"rewards/chosen": 0.15023007988929749,
|
2632 |
+
"rewards/margins": 0.4491490423679352,
|
2633 |
+
"rewards/rejected": -0.2989189624786377,
|
2634 |
+
"step": 1740
|
2635 |
+
},
|
2636 |
+
{
|
2637 |
+
"epoch": 8.860759493670885,
|
2638 |
+
"grad_norm": 1330490.8036484018,
|
2639 |
+
"learning_rate": 2.8141648386085863e-07,
|
2640 |
+
"logits/chosen": 0.1896178424358368,
|
2641 |
+
"logits/rejected": 1.3701179027557373,
|
2642 |
+
"logps/chosen": -78.11041259765625,
|
2643 |
+
"logps/rejected": -545.9954833984375,
|
2644 |
+
"loss": 24713.5375,
|
2645 |
+
"rewards/accuracies": 0.9750000238418579,
|
2646 |
+
"rewards/chosen": 0.15004639327526093,
|
2647 |
+
"rewards/margins": 0.4731353223323822,
|
2648 |
+
"rewards/rejected": -0.32308894395828247,
|
2649 |
+
"step": 1750
|
2650 |
+
},
|
2651 |
+
{
|
2652 |
+
"epoch": 8.91139240506329,
|
2653 |
+
"grad_norm": 1240332.5244059283,
|
2654 |
+
"learning_rate": 2.7984957693513004e-07,
|
2655 |
+
"logits/chosen": 0.09949211776256561,
|
2656 |
+
"logits/rejected": 0.6086061596870422,
|
2657 |
+
"logps/chosen": -84.04310607910156,
|
2658 |
+
"logps/rejected": -550.8171997070312,
|
2659 |
+
"loss": 24452.55,
|
2660 |
+
"rewards/accuracies": 0.987500011920929,
|
2661 |
+
"rewards/chosen": 0.14817103743553162,
|
2662 |
+
"rewards/margins": 0.47146469354629517,
|
2663 |
+
"rewards/rejected": -0.32329362630844116,
|
2664 |
+
"step": 1760
|
2665 |
+
},
|
2666 |
+
{
|
2667 |
+
"epoch": 8.962025316455696,
|
2668 |
+
"grad_norm": 1279998.0524960216,
|
2669 |
+
"learning_rate": 2.782826700094014e-07,
|
2670 |
+
"logits/chosen": -1.9250777959823608,
|
2671 |
+
"logits/rejected": -1.7448539733886719,
|
2672 |
+
"logps/chosen": -92.84037780761719,
|
2673 |
+
"logps/rejected": -539.1063232421875,
|
2674 |
+
"loss": 25664.2531,
|
2675 |
+
"rewards/accuracies": 0.9750000238418579,
|
2676 |
+
"rewards/chosen": 0.1440330594778061,
|
2677 |
+
"rewards/margins": 0.45180240273475647,
|
2678 |
+
"rewards/rejected": -0.3077693581581116,
|
2679 |
+
"step": 1770
|
2680 |
}
|
2681 |
],
|
2682 |
"logging_steps": 10,
|