cat-searcher commited on
Commit
1ddaac7
·
verified ·
1 Parent(s): a1151a6

Training in progress, epoch 12, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step2370/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step2370/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step2370/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step2370/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step2370/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step2370/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step2370/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step2370/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step2370/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step2370/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step2370/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step2370/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step2370/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step2370/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step2370/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step2370/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step2370/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:539b878ac428efe179a9375e8b771e2bbe9959772f656a49d248e6c21219607e
3
+ size 2506176112
last-checkpoint/global_step2370/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19da0539a63298d7f1955a78fcaf7a604fcad4711a600d78009f59c133c389ac
3
+ size 2506176112
last-checkpoint/global_step2370/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc663754bb20fa5b5fde7512f1fd6852c63dd93b8632058b00f46439ec37ebe5
3
+ size 2506176112
last-checkpoint/global_step2370/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21f8c0cf635bb9dda9a9fde6692374f03cdec0db6fde80b9759d17dd0883e01
3
+ size 2506176112
last-checkpoint/global_step2370/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb76669b49a0a5796f4b28891fa464c5214f5dea1974df32eafd9088b754c68c
3
+ size 2506176112
last-checkpoint/global_step2370/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a25cae859fbf4e0314c2387d5bce4af8d0f6fb7afff106b574200c2f21a2a9
3
+ size 2506176112
last-checkpoint/global_step2370/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:091a2d886d23f7b7b11c575b96df3e036faee52f2a7be10747eb98191e548a3e
3
+ size 2506176112
last-checkpoint/global_step2370/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7474f03f70f02ec545471985af7899d5803c748676ad98efc15902b0a9f59521
3
+ size 2506176112
last-checkpoint/global_step2370/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8547ba8eff0308b0e571bd9e50e88b0cfc15f4d6be3778ecad8040612a04adc
3
+ size 85570
last-checkpoint/global_step2370/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc231122d4c07d64f4a593043fb64d590896f1ad7dd839020d0cbab2ff092571
3
+ size 85506
last-checkpoint/global_step2370/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7069360f0bcf13c37ea51d6b4e220a839816597f887ed5d2071d7bdca870d0dc
3
+ size 85506
last-checkpoint/global_step2370/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a56bf198409faf60746637d9bb3cce97bb0a9234fe5b1b53a73493dda801298
3
+ size 85506
last-checkpoint/global_step2370/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c930481b99d54823a71618a492aa499fee19fa8a46038c05c3ada2955784e7ec
3
+ size 85506
last-checkpoint/global_step2370/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8db9c13190d5d46ec451b202094fd847a2aa3b4ceb0e87506dcb32bb5f3ed3
3
+ size 85506
last-checkpoint/global_step2370/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88990d974f323a1ad602fb79ac849fba4eb11c4e06a6896eee838f3e06bddc1b
3
+ size 85506
last-checkpoint/global_step2370/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3933146a75f8eb8995b85d55978af32d948210229f919e0a6a99c609e4ed4d1a
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2172
 
1
+ global_step2370
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34c2fdc6dc356fbd1457bd44a4aaf9ba031e2601ef060eb917fb58507b625392
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb90761b8d1100caed65f46e62bc543938eea85fd3c409acef5287472762cc31
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2855531bbf77a7757bdcaa0136c3f7ea44a90f0b68feedc3569b88783065fd99
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:933b765e67fd27d0106fb7378964b3b6e3a143eaa550740f03ab1d3a10ff3bea
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba4c26c615bd5830d41566fab54dc69174be292761b34514b27fbe82b45b630b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feb6462d333dbc5bb5e497ea9b0adb960f7616f79e6eea63222de6d5bd559516
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60c761d7f9b90c29c2d348a1133fd39be52c65e6bee4c2d179f6a6e564eb3a40
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b045e1bfa728f51c8b51ab0faa20b128a4fbd350da006b9b39a19e24abdf5a74
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccab847cc956e055fd3f9dcce06898826d065211e945b83576c8d487f87c5469
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f76a3d058d2628a61848c2441d313f251278bd8f74ce43dc44d8cd8ad3e619a8
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e5f1dbdcf6ec820c22fd1e4258fcd7af2a2bce65c480988d3f111aa574c9c06
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7f72fc498e6eaa671cdc0e8a627a668b8ef607063a22ddb4edbc05e791be830
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a23184c3e806d2649776427d1da2c0c9137f9b23a84468f3bdd5bbc75f696c9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12889af98e175b734a788f4c5b8c4da91dd61ff3a05aaf61b9d4c66aa3dd8ad6
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:382fc01b809542bf6f5e26742e3e19e80a1f189ac5de24cf8cd822e303916b83
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe21a86abfceeac2cf2f48afd61a9a506cf61a287f3403f1adf391bb2ffa5a83
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b178265c7d2ae07bff10b7312e5e49b9f5b4914c38969d2f64a6ca006296bca
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73488bec91f9dee6d8105d06f99edaf4d27b6b064250d4c7023f33285b2f3132
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:668825a859126c4cf32afb883895c91004130b6aee02178736ca2840e5429ad0
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edf6ee1cc2e1325b428a21172ec4e61b7220c5489751ea11c06bb66c77a0cd08
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:450ea4feab6edb2c0a6bc824ba5e7d3864d6d5f94f04f20b34071a93ed9c22f2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80293d1d3039c03cadd9a7663af0dceb761b51cb1e901c839618d66f90e7f384
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.99746835443038,
5
  "eval_steps": 100,
6
- "global_step": 2172,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3277,6 +3277,306 @@
3277
  "rewards/margins": 0.5280236601829529,
3278
  "rewards/rejected": -0.35101914405822754,
3279
  "step": 2170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3280
  }
3281
  ],
3282
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.0,
5
  "eval_steps": 100,
6
+ "global_step": 2370,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3277
  "rewards/margins": 0.5280236601829529,
3278
  "rewards/rejected": -0.35101914405822754,
3279
  "step": 2170
3280
+ },
3281
+ {
3282
+ "epoch": 11.037974683544304,
3283
+ "grad_norm": 852169.287356795,
3284
+ "learning_rate": 2.1403948605452835e-07,
3285
+ "logits/chosen": -1.0383515357971191,
3286
+ "logits/rejected": 0.3044077157974243,
3287
+ "logps/chosen": -60.7518196105957,
3288
+ "logps/rejected": -550.4581909179688,
3289
+ "loss": 18261.975,
3290
+ "rewards/accuracies": 0.9624999761581421,
3291
+ "rewards/chosen": 0.16871869564056396,
3292
+ "rewards/margins": 0.49391689896583557,
3293
+ "rewards/rejected": -0.32519814372062683,
3294
+ "step": 2180
3295
+ },
3296
+ {
3297
+ "epoch": 11.08860759493671,
3298
+ "grad_norm": 850664.061578799,
3299
+ "learning_rate": 2.1247257912879973e-07,
3300
+ "logits/chosen": -0.5247487425804138,
3301
+ "logits/rejected": -0.718704342842102,
3302
+ "logps/chosen": -48.23347473144531,
3303
+ "logps/rejected": -571.79296875,
3304
+ "loss": 17780.6719,
3305
+ "rewards/accuracies": 1.0,
3306
+ "rewards/chosen": 0.17942146956920624,
3307
+ "rewards/margins": 0.5196394920349121,
3308
+ "rewards/rejected": -0.34021803736686707,
3309
+ "step": 2190
3310
+ },
3311
+ {
3312
+ "epoch": 11.139240506329115,
3313
+ "grad_norm": 795813.8223153341,
3314
+ "learning_rate": 2.1090567220307112e-07,
3315
+ "logits/chosen": 0.2913626730442047,
3316
+ "logits/rejected": 0.3964959681034088,
3317
+ "logps/chosen": -57.057777404785156,
3318
+ "logps/rejected": -553.8439331054688,
3319
+ "loss": 19198.0062,
3320
+ "rewards/accuracies": 1.0,
3321
+ "rewards/chosen": 0.1739949882030487,
3322
+ "rewards/margins": 0.49791765213012695,
3323
+ "rewards/rejected": -0.32392266392707825,
3324
+ "step": 2200
3325
+ },
3326
+ {
3327
+ "epoch": 11.189873417721518,
3328
+ "grad_norm": 1113023.3688515616,
3329
+ "learning_rate": 2.093387652773425e-07,
3330
+ "logits/chosen": 1.5053379535675049,
3331
+ "logits/rejected": 2.2073726654052734,
3332
+ "logps/chosen": -52.245140075683594,
3333
+ "logps/rejected": -549.0379028320312,
3334
+ "loss": 18112.9031,
3335
+ "rewards/accuracies": 1.0,
3336
+ "rewards/chosen": 0.1701221615076065,
3337
+ "rewards/margins": 0.49869513511657715,
3338
+ "rewards/rejected": -0.32857298851013184,
3339
+ "step": 2210
3340
+ },
3341
+ {
3342
+ "epoch": 11.240506329113924,
3343
+ "grad_norm": 1112437.2131689412,
3344
+ "learning_rate": 2.077718583516139e-07,
3345
+ "logits/chosen": -0.7113906741142273,
3346
+ "logits/rejected": -0.593052089214325,
3347
+ "logps/chosen": -56.02216720581055,
3348
+ "logps/rejected": -588.62255859375,
3349
+ "loss": 18765.7359,
3350
+ "rewards/accuracies": 0.987500011920929,
3351
+ "rewards/chosen": 0.18194417655467987,
3352
+ "rewards/margins": 0.529647707939148,
3353
+ "rewards/rejected": -0.3477035462856293,
3354
+ "step": 2220
3355
+ },
3356
+ {
3357
+ "epoch": 11.291139240506329,
3358
+ "grad_norm": 735799.2580717172,
3359
+ "learning_rate": 2.0620495142588527e-07,
3360
+ "logits/chosen": -0.9520748257637024,
3361
+ "logits/rejected": -0.6387659907341003,
3362
+ "logps/chosen": -58.523109436035156,
3363
+ "logps/rejected": -582.5303344726562,
3364
+ "loss": 17604.2656,
3365
+ "rewards/accuracies": 1.0,
3366
+ "rewards/chosen": 0.17585232853889465,
3367
+ "rewards/margins": 0.522950291633606,
3368
+ "rewards/rejected": -0.3470980226993561,
3369
+ "step": 2230
3370
+ },
3371
+ {
3372
+ "epoch": 11.341772151898734,
3373
+ "grad_norm": 716407.5247360148,
3374
+ "learning_rate": 2.0463804450015669e-07,
3375
+ "logits/chosen": 1.4925919771194458,
3376
+ "logits/rejected": 1.6499805450439453,
3377
+ "logps/chosen": -63.138038635253906,
3378
+ "logps/rejected": -546.4395751953125,
3379
+ "loss": 18588.6406,
3380
+ "rewards/accuracies": 0.9750000238418579,
3381
+ "rewards/chosen": 0.1618063747882843,
3382
+ "rewards/margins": 0.48370370268821716,
3383
+ "rewards/rejected": -0.3218972980976105,
3384
+ "step": 2240
3385
+ },
3386
+ {
3387
+ "epoch": 11.39240506329114,
3388
+ "grad_norm": 598500.3265676593,
3389
+ "learning_rate": 2.0307113757442807e-07,
3390
+ "logits/chosen": 0.6475615501403809,
3391
+ "logits/rejected": 1.338098406791687,
3392
+ "logps/chosen": -58.75787353515625,
3393
+ "logps/rejected": -563.3907470703125,
3394
+ "loss": 18119.6031,
3395
+ "rewards/accuracies": 0.9750000238418579,
3396
+ "rewards/chosen": 0.17143133282661438,
3397
+ "rewards/margins": 0.5086871981620789,
3398
+ "rewards/rejected": -0.3372558653354645,
3399
+ "step": 2250
3400
+ },
3401
+ {
3402
+ "epoch": 11.443037974683545,
3403
+ "grad_norm": 1221314.1531539639,
3404
+ "learning_rate": 2.0150423064869946e-07,
3405
+ "logits/chosen": -0.327157199382782,
3406
+ "logits/rejected": 0.03896377235651016,
3407
+ "logps/chosen": -58.68574905395508,
3408
+ "logps/rejected": -558.2637329101562,
3409
+ "loss": 17534.2281,
3410
+ "rewards/accuracies": 0.9750000238418579,
3411
+ "rewards/chosen": 0.17224976420402527,
3412
+ "rewards/margins": 0.49942049384117126,
3413
+ "rewards/rejected": -0.327170729637146,
3414
+ "step": 2260
3415
+ },
3416
+ {
3417
+ "epoch": 11.49367088607595,
3418
+ "grad_norm": 456316.6263000263,
3419
+ "learning_rate": 1.9993732372297084e-07,
3420
+ "logits/chosen": -0.07340321689844131,
3421
+ "logits/rejected": 0.9581168293952942,
3422
+ "logps/chosen": -56.39067459106445,
3423
+ "logps/rejected": -567.6375732421875,
3424
+ "loss": 17502.8781,
3425
+ "rewards/accuracies": 1.0,
3426
+ "rewards/chosen": 0.1778116524219513,
3427
+ "rewards/margins": 0.5131680965423584,
3428
+ "rewards/rejected": -0.3353564143180847,
3429
+ "step": 2270
3430
+ },
3431
+ {
3432
+ "epoch": 11.544303797468354,
3433
+ "grad_norm": 711686.0768962563,
3434
+ "learning_rate": 1.9837041679724223e-07,
3435
+ "logits/chosen": -0.8106869459152222,
3436
+ "logits/rejected": -0.6330159902572632,
3437
+ "logps/chosen": -61.687591552734375,
3438
+ "logps/rejected": -573.0241088867188,
3439
+ "loss": 17796.2391,
3440
+ "rewards/accuracies": 0.987500011920929,
3441
+ "rewards/chosen": 0.18241460621356964,
3442
+ "rewards/margins": 0.5145494937896729,
3443
+ "rewards/rejected": -0.3321349024772644,
3444
+ "step": 2280
3445
+ },
3446
+ {
3447
+ "epoch": 11.594936708860759,
3448
+ "grad_norm": 1355769.5974116765,
3449
+ "learning_rate": 1.9680350987151361e-07,
3450
+ "logits/chosen": 2.7271580696105957,
3451
+ "logits/rejected": 3.408385753631592,
3452
+ "logps/chosen": -53.9175910949707,
3453
+ "logps/rejected": -532.6714477539062,
3454
+ "loss": 18442.0969,
3455
+ "rewards/accuracies": 0.9624999761581421,
3456
+ "rewards/chosen": 0.16783255338668823,
3457
+ "rewards/margins": 0.4785786271095276,
3458
+ "rewards/rejected": -0.31074607372283936,
3459
+ "step": 2290
3460
+ },
3461
+ {
3462
+ "epoch": 11.645569620253164,
3463
+ "grad_norm": 1885360.6056858273,
3464
+ "learning_rate": 1.95236602945785e-07,
3465
+ "logits/chosen": -0.4679819941520691,
3466
+ "logits/rejected": 0.16113388538360596,
3467
+ "logps/chosen": -63.9486198425293,
3468
+ "logps/rejected": -550.3961181640625,
3469
+ "loss": 17411.3969,
3470
+ "rewards/accuracies": 0.9624999761581421,
3471
+ "rewards/chosen": 0.17148110270500183,
3472
+ "rewards/margins": 0.4901048243045807,
3473
+ "rewards/rejected": -0.3186236619949341,
3474
+ "step": 2300
3475
+ },
3476
+ {
3477
+ "epoch": 11.69620253164557,
3478
+ "grad_norm": 758901.4037823884,
3479
+ "learning_rate": 1.9366969602005639e-07,
3480
+ "logits/chosen": 0.85181725025177,
3481
+ "logits/rejected": 1.3077051639556885,
3482
+ "logps/chosen": -73.22114562988281,
3483
+ "logps/rejected": -575.5013427734375,
3484
+ "loss": 17968.0844,
3485
+ "rewards/accuracies": 1.0,
3486
+ "rewards/chosen": 0.1745305359363556,
3487
+ "rewards/margins": 0.5058612823486328,
3488
+ "rewards/rejected": -0.33133071660995483,
3489
+ "step": 2310
3490
+ },
3491
+ {
3492
+ "epoch": 11.746835443037975,
3493
+ "grad_norm": 520118.42882549425,
3494
+ "learning_rate": 1.9210278909432777e-07,
3495
+ "logits/chosen": -0.6327224969863892,
3496
+ "logits/rejected": 0.7259325385093689,
3497
+ "logps/chosen": -60.48676681518555,
3498
+ "logps/rejected": -574.37939453125,
3499
+ "loss": 18215.2938,
3500
+ "rewards/accuracies": 1.0,
3501
+ "rewards/chosen": 0.18099671602249146,
3502
+ "rewards/margins": 0.5182011127471924,
3503
+ "rewards/rejected": -0.33720433712005615,
3504
+ "step": 2320
3505
+ },
3506
+ {
3507
+ "epoch": 11.79746835443038,
3508
+ "grad_norm": 743117.6330674689,
3509
+ "learning_rate": 1.9053588216859918e-07,
3510
+ "logits/chosen": 1.2280547618865967,
3511
+ "logits/rejected": 1.3038314580917358,
3512
+ "logps/chosen": -59.2470817565918,
3513
+ "logps/rejected": -559.13916015625,
3514
+ "loss": 17567.2906,
3515
+ "rewards/accuracies": 0.987500011920929,
3516
+ "rewards/chosen": 0.17356745898723602,
3517
+ "rewards/margins": 0.49933862686157227,
3518
+ "rewards/rejected": -0.32577118277549744,
3519
+ "step": 2330
3520
+ },
3521
+ {
3522
+ "epoch": 11.848101265822784,
3523
+ "grad_norm": 730673.5249396141,
3524
+ "learning_rate": 1.8896897524287057e-07,
3525
+ "logits/chosen": 1.2314859628677368,
3526
+ "logits/rejected": 1.3703396320343018,
3527
+ "logps/chosen": -58.14827346801758,
3528
+ "logps/rejected": -552.53759765625,
3529
+ "loss": 17758.8719,
3530
+ "rewards/accuracies": 0.987500011920929,
3531
+ "rewards/chosen": 0.1748059093952179,
3532
+ "rewards/margins": 0.4981175363063812,
3533
+ "rewards/rejected": -0.3233116567134857,
3534
+ "step": 2340
3535
+ },
3536
+ {
3537
+ "epoch": 11.89873417721519,
3538
+ "grad_norm": 597117.4885736415,
3539
+ "learning_rate": 1.8740206831714195e-07,
3540
+ "logits/chosen": -0.7092142105102539,
3541
+ "logits/rejected": -0.0756240040063858,
3542
+ "logps/chosen": -62.97068405151367,
3543
+ "logps/rejected": -567.6489868164062,
3544
+ "loss": 18044.8,
3545
+ "rewards/accuracies": 0.9750000238418579,
3546
+ "rewards/chosen": 0.17830543220043182,
3547
+ "rewards/margins": 0.5064790844917297,
3548
+ "rewards/rejected": -0.3281736969947815,
3549
+ "step": 2350
3550
+ },
3551
+ {
3552
+ "epoch": 11.949367088607595,
3553
+ "grad_norm": 687586.0618323467,
3554
+ "learning_rate": 1.8583516139141334e-07,
3555
+ "logits/chosen": -1.2183369398117065,
3556
+ "logits/rejected": -1.056317925453186,
3557
+ "logps/chosen": -65.71519470214844,
3558
+ "logps/rejected": -578.7620239257812,
3559
+ "loss": 18082.8625,
3560
+ "rewards/accuracies": 0.987500011920929,
3561
+ "rewards/chosen": 0.18341727554798126,
3562
+ "rewards/margins": 0.5148480534553528,
3563
+ "rewards/rejected": -0.33143073320388794,
3564
+ "step": 2360
3565
+ },
3566
+ {
3567
+ "epoch": 12.0,
3568
+ "grad_norm": 748926.1941504646,
3569
+ "learning_rate": 1.8426825446568473e-07,
3570
+ "logits/chosen": -0.35043638944625854,
3571
+ "logits/rejected": -1.1868419647216797,
3572
+ "logps/chosen": -59.269996643066406,
3573
+ "logps/rejected": -581.2828369140625,
3574
+ "loss": 17352.5563,
3575
+ "rewards/accuracies": 1.0,
3576
+ "rewards/chosen": 0.16442957520484924,
3577
+ "rewards/margins": 0.5158518552780151,
3578
+ "rewards/rejected": -0.3514222800731659,
3579
+ "step": 2370
3580
  }
3581
  ],
3582
  "logging_steps": 10,