cat-searcher commited on
Commit
1a8f4ed
·
verified ·
1 Parent(s): c52f2b9

Training in progress, epoch 8, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step1777/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step1777/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step1777/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step1777/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step1777/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step1777/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step1777/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step1777/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step1777/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step1777/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step1777/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step1777/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step1777/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step1777/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step1777/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step1777/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step1777/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8396550322b4e5b4b248b3598bbd612faf1fa0c5ec2263b91351b58d2ba6952a
3
+ size 2506176112
last-checkpoint/global_step1777/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6758aacbace36eabc0e4345305942e2e83f6723c45f46b208d8cd0ee5af70eda
3
+ size 2506176112
last-checkpoint/global_step1777/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:563d893311f93f922464265e39a4485a399038fc2e0efcb1c1d66325071fb85b
3
+ size 2506176112
last-checkpoint/global_step1777/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea1cc6ab202a983deb94fa9cb07564d7a48cf8206762ab91af259355e0edfe53
3
+ size 2506176112
last-checkpoint/global_step1777/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fe1dae4d2a047cd117a16e7741262e6b1f2c690ec0bc3ed4b2a053ec1b2806b
3
+ size 2506176112
last-checkpoint/global_step1777/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90f7c18ec1fef487b9877248fac51b28d61d68af11a1fc503a4618502cfb4351
3
+ size 2506176112
last-checkpoint/global_step1777/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064b00e29d06a308d96db35ae527cdea3075421480545e44a2d07675a1115fa0
3
+ size 2506176112
last-checkpoint/global_step1777/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9711cb2e001c562b5182d857e9e4f2bd198074c71345145a59801f3faac5a30
3
+ size 2506176112
last-checkpoint/global_step1777/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a17f2748e0bf3a44e1eab236c57b741dfee963ce1997c03a9689ee0b8108ae
3
+ size 85570
last-checkpoint/global_step1777/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2eb59000d01d77dae6afc2546735ca79ac1991df6a3a6f884f898192208e53
3
+ size 85506
last-checkpoint/global_step1777/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376a8bbbe87d667a215c3706defabaf7ebe278616df6331e686b97721c8c3419
3
+ size 85506
last-checkpoint/global_step1777/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58007c191c01ab6ebb4123e973b76acca0b2e62aea4d42bc9e4f4b51b617cb0f
3
+ size 85506
last-checkpoint/global_step1777/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c27c952002e84236152e3f78661d14b975ebf305403311acad0ce73378b70aa2
3
+ size 85506
last-checkpoint/global_step1777/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62c6ac1739dfb92af138a4fc3a2f3c6715a75287f68e525d1754abbba8e1a57
3
+ size 85506
last-checkpoint/global_step1777/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a45608a3414ad4258d01c2326f2238358551e58f5db9632e92aeb80934a65627
3
+ size 85506
last-checkpoint/global_step1777/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0306d65d9d07722ebfb6b2eb4d41ef08e69a70e64d0d995f29f8611a6959b1fb
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1580
 
1
+ global_step1777
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d4c8b92632023613bca2cbaab82aff2bfc0f1b7c62aab671b9cfd3d8f06b448
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09b755aaa0de9a9d7be5dd7cc1cf82ccedd0ac145120aec2032a624323902370
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12c27030d235bb5b6be6f9fb6111ad0d2904a9ae1dbba3911f671ef6abf0b238
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef4f1316aa95047dba9bf5e25ec454561d74e6768f37069512a5b410ac5fb8ad
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ee195ebde9bf012f945f068f133e7fe22fef5450c496607e3ef11cc2034a186
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae9162e03c562553a5d9d13120f544d3c47ea71bb39aa44e18253675e17ed4a4
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf0fe1a3315d60b197207c5cb249d0ce4f9ce6d7585e696276d9ffbcb5379893
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4809456871b3a40c8db7e0926a9db11b01149a1d483fb29b16fc69dabaf36c6f
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01c5bd6eae04542162b3e94245555bd81312524066bc01d0ebbfc4fd8554240e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bb6bcf25ff148b74eea7dd4895fc42e9433538fff5d75f0d2ae6cb0c2fdadf0
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45b74942c68b00d657cfce186b0eeb4aa8f52efa04b114803b605fee8de45972
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f00ea04cd1a52c539d9cc948ac8a04676d6b99702acd09149565f781806f63f
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cd66dd2ba958fc9929441817d8154abbd929c0aa9cd66ff3171965bdaaf5d78
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5571fb2fc1b413792b01ac691c759786855573992bab1d14875faccdaf8c881e
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89eeedefdd62514d0130acc330a5c08e9774c95d38c60997905cfd65fc54b710
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59019ba23ead9c15851cb4349397254458ce50ea3c2987090404f4f3842c6d8f
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f43ced939100082608f57561a10e1888e69210c80675068db530c5815889910e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45fdffda57fda4a555da7a5de6fc6ec7324e0dae048b92519af6c4f6a1bc7412
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d8d6ee244d99525e7004ae3f02d44ae63082d81fbbab7306f641ac6aeeb736f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62fb2c13e63aba83c4505fae1639f79a33853d8f1bebe20cecb73bf53c8e7c46
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4222d0b9fadaea1c2825a6be6146f638b45462a966591dbc095e76b291c3b43f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e16985aaf4cce287f446385c2d8f7c8409907ca0803309b7f28917440fa9de11
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.0,
5
  "eval_steps": 100,
6
- "global_step": 1580,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2392,6 +2392,291 @@
2392
  "rewards/margins": 0.42148295044898987,
2393
  "rewards/rejected": -0.2844696640968323,
2394
  "step": 1580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2395
  }
2396
  ],
2397
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.99746835443038,
5
  "eval_steps": 100,
6
+ "global_step": 1777,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2392
  "rewards/margins": 0.42148295044898987,
2393
  "rewards/rejected": -0.2844696640968323,
2394
  "step": 1580
2395
+ },
2396
+ {
2397
+ "epoch": 8.050632911392405,
2398
+ "grad_norm": 1502727.0577222395,
2399
+ "learning_rate": 3.064869946725164e-07,
2400
+ "logits/chosen": -2.0656542778015137,
2401
+ "logits/rejected": -1.5985521078109741,
2402
+ "logps/chosen": -84.60444641113281,
2403
+ "logps/rejected": -520.1857299804688,
2404
+ "loss": 24723.275,
2405
+ "rewards/accuracies": 0.9750000238418579,
2406
+ "rewards/chosen": 0.1492975652217865,
2407
+ "rewards/margins": 0.4404692053794861,
2408
+ "rewards/rejected": -0.2911716103553772,
2409
+ "step": 1590
2410
+ },
2411
+ {
2412
+ "epoch": 8.10126582278481,
2413
+ "grad_norm": 838369.9468876831,
2414
+ "learning_rate": 3.049200877467878e-07,
2415
+ "logits/chosen": -1.758178949356079,
2416
+ "logits/rejected": -0.7727742791175842,
2417
+ "logps/chosen": -83.45867919921875,
2418
+ "logps/rejected": -530.3883666992188,
2419
+ "loss": 25817.0203,
2420
+ "rewards/accuracies": 1.0,
2421
+ "rewards/chosen": 0.14538443088531494,
2422
+ "rewards/margins": 0.45367687940597534,
2423
+ "rewards/rejected": -0.3082924485206604,
2424
+ "step": 1600
2425
+ },
2426
+ {
2427
+ "epoch": 8.151898734177216,
2428
+ "grad_norm": 1012852.54550217,
2429
+ "learning_rate": 3.0335318082105923e-07,
2430
+ "logits/chosen": -2.217496156692505,
2431
+ "logits/rejected": -2.0143866539001465,
2432
+ "logps/chosen": -100.38580322265625,
2433
+ "logps/rejected": -549.8438720703125,
2434
+ "loss": 25090.8891,
2435
+ "rewards/accuracies": 0.987500011920929,
2436
+ "rewards/chosen": 0.13634233176708221,
2437
+ "rewards/margins": 0.44348135590553284,
2438
+ "rewards/rejected": -0.30713900923728943,
2439
+ "step": 1610
2440
+ },
2441
+ {
2442
+ "epoch": 8.20253164556962,
2443
+ "grad_norm": 1056784.1797241461,
2444
+ "learning_rate": 3.0178627389533064e-07,
2445
+ "logits/chosen": -1.1953948736190796,
2446
+ "logits/rejected": -0.2751680910587311,
2447
+ "logps/chosen": -89.64523315429688,
2448
+ "logps/rejected": -510.4059143066406,
2449
+ "loss": 24456.725,
2450
+ "rewards/accuracies": 0.9750000238418579,
2451
+ "rewards/chosen": 0.14029642939567566,
2452
+ "rewards/margins": 0.4281511902809143,
2453
+ "rewards/rejected": -0.28785476088523865,
2454
+ "step": 1620
2455
+ },
2456
+ {
2457
+ "epoch": 8.253164556962025,
2458
+ "grad_norm": 1147595.1251004518,
2459
+ "learning_rate": 3.00219366969602e-07,
2460
+ "logits/chosen": -2.550518035888672,
2461
+ "logits/rejected": -2.5027434825897217,
2462
+ "logps/chosen": -76.6513442993164,
2463
+ "logps/rejected": -524.4201049804688,
2464
+ "loss": 23486.5594,
2465
+ "rewards/accuracies": 0.987500011920929,
2466
+ "rewards/chosen": 0.15493164956569672,
2467
+ "rewards/margins": 0.44891220331192017,
2468
+ "rewards/rejected": -0.29398053884506226,
2469
+ "step": 1630
2470
+ },
2471
+ {
2472
+ "epoch": 8.30379746835443,
2473
+ "grad_norm": 1390175.0732444616,
2474
+ "learning_rate": 2.986524600438734e-07,
2475
+ "logits/chosen": -0.059876419603824615,
2476
+ "logits/rejected": 0.00422248849645257,
2477
+ "logps/chosen": -74.77996063232422,
2478
+ "logps/rejected": -544.7862548828125,
2479
+ "loss": 24176.6094,
2480
+ "rewards/accuracies": 1.0,
2481
+ "rewards/chosen": 0.151381716132164,
2482
+ "rewards/margins": 0.4694734215736389,
2483
+ "rewards/rejected": -0.3180916905403137,
2484
+ "step": 1640
2485
+ },
2486
+ {
2487
+ "epoch": 8.354430379746836,
2488
+ "grad_norm": 1846159.1203677754,
2489
+ "learning_rate": 2.970855531181448e-07,
2490
+ "logits/chosen": -3.206434726715088,
2491
+ "logits/rejected": -2.6545357704162598,
2492
+ "logps/chosen": -79.13458251953125,
2493
+ "logps/rejected": -529.1912841796875,
2494
+ "loss": 25560.5344,
2495
+ "rewards/accuracies": 0.987500011920929,
2496
+ "rewards/chosen": 0.14862783253192902,
2497
+ "rewards/margins": 0.4489147663116455,
2498
+ "rewards/rejected": -0.3002868890762329,
2499
+ "step": 1650
2500
+ },
2501
+ {
2502
+ "epoch": 8.405063291139241,
2503
+ "grad_norm": 1294602.7153889702,
2504
+ "learning_rate": 2.955186461924162e-07,
2505
+ "logits/chosen": -1.0581172704696655,
2506
+ "logits/rejected": -0.6744507551193237,
2507
+ "logps/chosen": -78.69017028808594,
2508
+ "logps/rejected": -526.4840087890625,
2509
+ "loss": 25549.9125,
2510
+ "rewards/accuracies": 0.9750000238418579,
2511
+ "rewards/chosen": 0.14595063030719757,
2512
+ "rewards/margins": 0.44837069511413574,
2513
+ "rewards/rejected": -0.302420049905777,
2514
+ "step": 1660
2515
+ },
2516
+ {
2517
+ "epoch": 8.455696202531646,
2518
+ "grad_norm": 1653521.5239311927,
2519
+ "learning_rate": 2.9395173926668755e-07,
2520
+ "logits/chosen": -0.9036309123039246,
2521
+ "logits/rejected": -0.16554176807403564,
2522
+ "logps/chosen": -83.71012878417969,
2523
+ "logps/rejected": -525.7719116210938,
2524
+ "loss": 25089.5516,
2525
+ "rewards/accuracies": 1.0,
2526
+ "rewards/chosen": 0.14826878905296326,
2527
+ "rewards/margins": 0.4438709616661072,
2528
+ "rewards/rejected": -0.2956022024154663,
2529
+ "step": 1670
2530
+ },
2531
+ {
2532
+ "epoch": 8.50632911392405,
2533
+ "grad_norm": 1371497.4089594388,
2534
+ "learning_rate": 2.9238483234095896e-07,
2535
+ "logits/chosen": -1.423182725906372,
2536
+ "logits/rejected": -1.0717556476593018,
2537
+ "logps/chosen": -89.4638671875,
2538
+ "logps/rejected": -577.1199340820312,
2539
+ "loss": 24558.0953,
2540
+ "rewards/accuracies": 1.0,
2541
+ "rewards/chosen": 0.15898647904396057,
2542
+ "rewards/margins": 0.48913446068763733,
2543
+ "rewards/rejected": -0.330147922039032,
2544
+ "step": 1680
2545
+ },
2546
+ {
2547
+ "epoch": 8.556962025316455,
2548
+ "grad_norm": 1476867.0955964676,
2549
+ "learning_rate": 2.908179254152303e-07,
2550
+ "logits/chosen": -3.2004425525665283,
2551
+ "logits/rejected": -2.7161200046539307,
2552
+ "logps/chosen": -86.7264633178711,
2553
+ "logps/rejected": -543.3889770507812,
2554
+ "loss": 26642.4781,
2555
+ "rewards/accuracies": 1.0,
2556
+ "rewards/chosen": 0.1485292911529541,
2557
+ "rewards/margins": 0.4551934599876404,
2558
+ "rewards/rejected": -0.3066641688346863,
2559
+ "step": 1690
2560
+ },
2561
+ {
2562
+ "epoch": 8.60759493670886,
2563
+ "grad_norm": 1134090.4892000444,
2564
+ "learning_rate": 2.8925101848950173e-07,
2565
+ "logits/chosen": -0.274528443813324,
2566
+ "logits/rejected": 0.4862538278102875,
2567
+ "logps/chosen": -79.16570281982422,
2568
+ "logps/rejected": -513.53173828125,
2569
+ "loss": 23741.9938,
2570
+ "rewards/accuracies": 0.9624999761581421,
2571
+ "rewards/chosen": 0.15034614503383636,
2572
+ "rewards/margins": 0.43597039580345154,
2573
+ "rewards/rejected": -0.28562426567077637,
2574
+ "step": 1700
2575
+ },
2576
+ {
2577
+ "epoch": 8.658227848101266,
2578
+ "grad_norm": 1314089.2981008843,
2579
+ "learning_rate": 2.876841115637731e-07,
2580
+ "logits/chosen": 0.6013806462287903,
2581
+ "logits/rejected": 1.2335985898971558,
2582
+ "logps/chosen": -90.46197509765625,
2583
+ "logps/rejected": -551.8345947265625,
2584
+ "loss": 24216.4281,
2585
+ "rewards/accuracies": 1.0,
2586
+ "rewards/chosen": 0.1541350781917572,
2587
+ "rewards/margins": 0.47102633118629456,
2588
+ "rewards/rejected": -0.3168913424015045,
2589
+ "step": 1710
2590
+ },
2591
+ {
2592
+ "epoch": 8.708860759493671,
2593
+ "grad_norm": 1622019.967143891,
2594
+ "learning_rate": 2.861172046380445e-07,
2595
+ "logits/chosen": 0.2407432496547699,
2596
+ "logits/rejected": 0.4264713227748871,
2597
+ "logps/chosen": -93.0431900024414,
2598
+ "logps/rejected": -564.0677490234375,
2599
+ "loss": 23649.3016,
2600
+ "rewards/accuracies": 1.0,
2601
+ "rewards/chosen": 0.147947758436203,
2602
+ "rewards/margins": 0.4662678837776184,
2603
+ "rewards/rejected": -0.3183201253414154,
2604
+ "step": 1720
2605
+ },
2606
+ {
2607
+ "epoch": 8.759493670886076,
2608
+ "grad_norm": 1520791.345848389,
2609
+ "learning_rate": 2.8455029771231586e-07,
2610
+ "logits/chosen": 0.6626393795013428,
2611
+ "logits/rejected": 0.7864507436752319,
2612
+ "logps/chosen": -94.95128631591797,
2613
+ "logps/rejected": -540.1358642578125,
2614
+ "loss": 25224.3125,
2615
+ "rewards/accuracies": 0.987500011920929,
2616
+ "rewards/chosen": 0.14551883935928345,
2617
+ "rewards/margins": 0.4529417157173157,
2618
+ "rewards/rejected": -0.3074227571487427,
2619
+ "step": 1730
2620
+ },
2621
+ {
2622
+ "epoch": 8.810126582278482,
2623
+ "grad_norm": 1625465.2135884068,
2624
+ "learning_rate": 2.8298339078658727e-07,
2625
+ "logits/chosen": -0.07786345481872559,
2626
+ "logits/rejected": -0.031427524983882904,
2627
+ "logps/chosen": -90.72882843017578,
2628
+ "logps/rejected": -539.1676025390625,
2629
+ "loss": 24133.7531,
2630
+ "rewards/accuracies": 0.987500011920929,
2631
+ "rewards/chosen": 0.15023007988929749,
2632
+ "rewards/margins": 0.4491490423679352,
2633
+ "rewards/rejected": -0.2989189624786377,
2634
+ "step": 1740
2635
+ },
2636
+ {
2637
+ "epoch": 8.860759493670885,
2638
+ "grad_norm": 1330490.8036484018,
2639
+ "learning_rate": 2.8141648386085863e-07,
2640
+ "logits/chosen": 0.1896178424358368,
2641
+ "logits/rejected": 1.3701179027557373,
2642
+ "logps/chosen": -78.11041259765625,
2643
+ "logps/rejected": -545.9954833984375,
2644
+ "loss": 24713.5375,
2645
+ "rewards/accuracies": 0.9750000238418579,
2646
+ "rewards/chosen": 0.15004639327526093,
2647
+ "rewards/margins": 0.4731353223323822,
2648
+ "rewards/rejected": -0.32308894395828247,
2649
+ "step": 1750
2650
+ },
2651
+ {
2652
+ "epoch": 8.91139240506329,
2653
+ "grad_norm": 1240332.5244059283,
2654
+ "learning_rate": 2.7984957693513004e-07,
2655
+ "logits/chosen": 0.09949211776256561,
2656
+ "logits/rejected": 0.6086061596870422,
2657
+ "logps/chosen": -84.04310607910156,
2658
+ "logps/rejected": -550.8171997070312,
2659
+ "loss": 24452.55,
2660
+ "rewards/accuracies": 0.987500011920929,
2661
+ "rewards/chosen": 0.14817103743553162,
2662
+ "rewards/margins": 0.47146469354629517,
2663
+ "rewards/rejected": -0.32329362630844116,
2664
+ "step": 1760
2665
+ },
2666
+ {
2667
+ "epoch": 8.962025316455696,
2668
+ "grad_norm": 1279998.0524960216,
2669
+ "learning_rate": 2.782826700094014e-07,
2670
+ "logits/chosen": -1.9250777959823608,
2671
+ "logits/rejected": -1.7448539733886719,
2672
+ "logps/chosen": -92.84037780761719,
2673
+ "logps/rejected": -539.1063232421875,
2674
+ "loss": 25664.2531,
2675
+ "rewards/accuracies": 0.9750000238418579,
2676
+ "rewards/chosen": 0.1440330594778061,
2677
+ "rewards/margins": 0.45180240273475647,
2678
+ "rewards/rejected": -0.3077693581581116,
2679
+ "step": 1770
2680
  }
2681
  ],
2682
  "logging_steps": 10,