narekvslife
commited on
Commit
•
8904848
1
Parent(s):
0e3ef6e
dpo_5wiothfs 5.9
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +452 -2
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18900240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9527902bd6aeaf0355fc706a007b7e21ee1a936860b8d1b9bd19824385fc4972
|
3 |
size 18900240
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 37910458
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7dfe12af6d9861c0b41c1b9ca0e7b6f45d90828d79882df00f0e054a2f011d0
|
3 |
size 37910458
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0b908a911ffc3dc212618df71c6aa766b5d758bf18eb427c2dcfb767a1b2cba
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 2000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8439,6 +8439,456 @@
|
|
8439 |
"rewards/margins": 0.27365249395370483,
|
8440 |
"rewards/rejected": 0.5168629884719849,
|
8441 |
"step": 5600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8442 |
}
|
8443 |
],
|
8444 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.85819455926932,
|
5 |
"eval_steps": 2000,
|
6 |
+
"global_step": 5900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8439 |
"rewards/margins": 0.27365249395370483,
|
8440 |
"rewards/rejected": 0.5168629884719849,
|
8441 |
"step": 5600
|
8442 |
+
},
|
8443 |
+
{
|
8444 |
+
"epoch": 1.7668595724577774,
|
8445 |
+
"grad_norm": 2.46875,
|
8446 |
+
"learning_rate": 3.4943208086663183e-06,
|
8447 |
+
"logits/chosen": -0.4847659170627594,
|
8448 |
+
"logits/rejected": -0.33793026208877563,
|
8449 |
+
"logps/chosen": -197.34933471679688,
|
8450 |
+
"logps/rejected": -174.9829559326172,
|
8451 |
+
"loss": 0.646,
|
8452 |
+
"rewards/accuracies": 0.6000000238418579,
|
8453 |
+
"rewards/chosen": 0.7014733552932739,
|
8454 |
+
"rewards/margins": 0.12839707732200623,
|
8455 |
+
"rewards/rejected": 0.5730762481689453,
|
8456 |
+
"step": 5610
|
8457 |
+
},
|
8458 |
+
{
|
8459 |
+
"epoch": 1.7700090547616236,
|
8460 |
+
"grad_norm": 3.578125,
|
8461 |
+
"learning_rate": 3.4894823245512986e-06,
|
8462 |
+
"logits/chosen": -0.506749153137207,
|
8463 |
+
"logits/rejected": -0.45556968450546265,
|
8464 |
+
"logps/chosen": -197.71902465820312,
|
8465 |
+
"logps/rejected": -186.50241088867188,
|
8466 |
+
"loss": 0.6803,
|
8467 |
+
"rewards/accuracies": 0.550000011920929,
|
8468 |
+
"rewards/chosen": 0.7117626070976257,
|
8469 |
+
"rewards/margins": 0.06737571209669113,
|
8470 |
+
"rewards/rejected": 0.644386887550354,
|
8471 |
+
"step": 5620
|
8472 |
+
},
|
8473 |
+
{
|
8474 |
+
"epoch": 1.7731585370654699,
|
8475 |
+
"grad_norm": 2.578125,
|
8476 |
+
"learning_rate": 3.484639441627448e-06,
|
8477 |
+
"logits/chosen": -0.5070594549179077,
|
8478 |
+
"logits/rejected": -0.3329693078994751,
|
8479 |
+
"logps/chosen": -220.60986328125,
|
8480 |
+
"logps/rejected": -183.98416137695312,
|
8481 |
+
"loss": 0.6042,
|
8482 |
+
"rewards/accuracies": 0.737500011920929,
|
8483 |
+
"rewards/chosen": 0.7875211834907532,
|
8484 |
+
"rewards/margins": 0.2286391705274582,
|
8485 |
+
"rewards/rejected": 0.5588821172714233,
|
8486 |
+
"step": 5630
|
8487 |
+
},
|
8488 |
+
{
|
8489 |
+
"epoch": 1.7763080193693161,
|
8490 |
+
"grad_norm": 2.546875,
|
8491 |
+
"learning_rate": 3.4797921814241196e-06,
|
8492 |
+
"logits/chosen": -0.48938584327697754,
|
8493 |
+
"logits/rejected": -0.37643399834632874,
|
8494 |
+
"logps/chosen": -194.7692413330078,
|
8495 |
+
"logps/rejected": -171.0836944580078,
|
8496 |
+
"loss": 0.6345,
|
8497 |
+
"rewards/accuracies": 0.637499988079071,
|
8498 |
+
"rewards/chosen": 0.7208179235458374,
|
8499 |
+
"rewards/margins": 0.17952939867973328,
|
8500 |
+
"rewards/rejected": 0.5412884950637817,
|
8501 |
+
"step": 5640
|
8502 |
+
},
|
8503 |
+
{
|
8504 |
+
"epoch": 1.7794575016731624,
|
8505 |
+
"grad_norm": 2.71875,
|
8506 |
+
"learning_rate": 3.4749405654901297e-06,
|
8507 |
+
"logits/chosen": -0.5021311044692993,
|
8508 |
+
"logits/rejected": -0.3592470586299896,
|
8509 |
+
"logps/chosen": -203.04798889160156,
|
8510 |
+
"logps/rejected": -170.28916931152344,
|
8511 |
+
"loss": 0.6468,
|
8512 |
+
"rewards/accuracies": 0.5874999761581421,
|
8513 |
+
"rewards/chosen": 0.7304830551147461,
|
8514 |
+
"rewards/margins": 0.14200101792812347,
|
8515 |
+
"rewards/rejected": 0.5884820222854614,
|
8516 |
+
"step": 5650
|
8517 |
+
},
|
8518 |
+
{
|
8519 |
+
"epoch": 1.7826069839770087,
|
8520 |
+
"grad_norm": 1.8125,
|
8521 |
+
"learning_rate": 3.470084615393655e-06,
|
8522 |
+
"logits/chosen": -0.5099314451217651,
|
8523 |
+
"logits/rejected": -0.36777496337890625,
|
8524 |
+
"logps/chosen": -188.96286010742188,
|
8525 |
+
"logps/rejected": -158.13487243652344,
|
8526 |
+
"loss": 0.5854,
|
8527 |
+
"rewards/accuracies": 0.8374999761581421,
|
8528 |
+
"rewards/chosen": 0.7638787031173706,
|
8529 |
+
"rewards/margins": 0.25533777475357056,
|
8530 |
+
"rewards/rejected": 0.5085408687591553,
|
8531 |
+
"step": 5660
|
8532 |
+
},
|
8533 |
+
{
|
8534 |
+
"epoch": 1.785756466280855,
|
8535 |
+
"grad_norm": 2.71875,
|
8536 |
+
"learning_rate": 3.4652243527221423e-06,
|
8537 |
+
"logits/chosen": -0.4756031632423401,
|
8538 |
+
"logits/rejected": -0.44920986890792847,
|
8539 |
+
"logps/chosen": -185.1388397216797,
|
8540 |
+
"logps/rejected": -172.55137634277344,
|
8541 |
+
"loss": 0.6583,
|
8542 |
+
"rewards/accuracies": 0.5375000238418579,
|
8543 |
+
"rewards/chosen": 0.7094627618789673,
|
8544 |
+
"rewards/margins": 0.13025884330272675,
|
8545 |
+
"rewards/rejected": 0.5792039036750793,
|
8546 |
+
"step": 5670
|
8547 |
+
},
|
8548 |
+
{
|
8549 |
+
"epoch": 1.7889059485847014,
|
8550 |
+
"grad_norm": 3.171875,
|
8551 |
+
"learning_rate": 3.460359799082209e-06,
|
8552 |
+
"logits/chosen": -0.47689515352249146,
|
8553 |
+
"logits/rejected": -0.34241801500320435,
|
8554 |
+
"logps/chosen": -204.8109588623047,
|
8555 |
+
"logps/rejected": -166.13514709472656,
|
8556 |
+
"loss": 0.615,
|
8557 |
+
"rewards/accuracies": 0.7250000238418579,
|
8558 |
+
"rewards/chosen": 0.7595565915107727,
|
8559 |
+
"rewards/margins": 0.21238622069358826,
|
8560 |
+
"rewards/rejected": 0.5471702814102173,
|
8561 |
+
"step": 5680
|
8562 |
+
},
|
8563 |
+
{
|
8564 |
+
"epoch": 1.7920554308885477,
|
8565 |
+
"grad_norm": 3.765625,
|
8566 |
+
"learning_rate": 3.4554909760995485e-06,
|
8567 |
+
"logits/chosen": -0.5418170094490051,
|
8568 |
+
"logits/rejected": -0.41362690925598145,
|
8569 |
+
"logps/chosen": -187.98043823242188,
|
8570 |
+
"logps/rejected": -167.5854034423828,
|
8571 |
+
"loss": 0.6338,
|
8572 |
+
"rewards/accuracies": 0.699999988079071,
|
8573 |
+
"rewards/chosen": 0.7204712629318237,
|
8574 |
+
"rewards/margins": 0.1737706959247589,
|
8575 |
+
"rewards/rejected": 0.5467005968093872,
|
8576 |
+
"step": 5690
|
8577 |
+
},
|
8578 |
+
{
|
8579 |
+
"epoch": 1.795204913192394,
|
8580 |
+
"grad_norm": 3.3125,
|
8581 |
+
"learning_rate": 3.450617905418834e-06,
|
8582 |
+
"logits/chosen": -0.442087322473526,
|
8583 |
+
"logits/rejected": -0.3480719029903412,
|
8584 |
+
"logps/chosen": -205.0787353515625,
|
8585 |
+
"logps/rejected": -176.585693359375,
|
8586 |
+
"loss": 0.6078,
|
8587 |
+
"rewards/accuracies": 0.699999988079071,
|
8588 |
+
"rewards/chosen": 0.7968889474868774,
|
8589 |
+
"rewards/margins": 0.2236328423023224,
|
8590 |
+
"rewards/rejected": 0.5732561349868774,
|
8591 |
+
"step": 5700
|
8592 |
+
},
|
8593 |
+
{
|
8594 |
+
"epoch": 1.7983543954962404,
|
8595 |
+
"grad_norm": 3.125,
|
8596 |
+
"learning_rate": 3.4457406087036233e-06,
|
8597 |
+
"logits/chosen": -0.4669428765773773,
|
8598 |
+
"logits/rejected": -0.379183828830719,
|
8599 |
+
"logps/chosen": -183.84532165527344,
|
8600 |
+
"logps/rejected": -169.44937133789062,
|
8601 |
+
"loss": 0.6755,
|
8602 |
+
"rewards/accuracies": 0.637499988079071,
|
8603 |
+
"rewards/chosen": 0.6309347748756409,
|
8604 |
+
"rewards/margins": 0.07157482206821442,
|
8605 |
+
"rewards/rejected": 0.5593599081039429,
|
8606 |
+
"step": 5710
|
8607 |
+
},
|
8608 |
+
{
|
8609 |
+
"epoch": 1.8015038778000867,
|
8610 |
+
"grad_norm": 2.984375,
|
8611 |
+
"learning_rate": 3.4408591076362585e-06,
|
8612 |
+
"logits/chosen": -0.5323187112808228,
|
8613 |
+
"logits/rejected": -0.45780545473098755,
|
8614 |
+
"logps/chosen": -205.9134521484375,
|
8615 |
+
"logps/rejected": -180.65916442871094,
|
8616 |
+
"loss": 0.6566,
|
8617 |
+
"rewards/accuracies": 0.612500011920929,
|
8618 |
+
"rewards/chosen": 0.7317408323287964,
|
8619 |
+
"rewards/margins": 0.11702696233987808,
|
8620 |
+
"rewards/rejected": 0.6147138476371765,
|
8621 |
+
"step": 5720
|
8622 |
+
},
|
8623 |
+
{
|
8624 |
+
"epoch": 1.804653360103933,
|
8625 |
+
"grad_norm": 2.859375,
|
8626 |
+
"learning_rate": 3.435973423917774e-06,
|
8627 |
+
"logits/chosen": -0.48551005125045776,
|
8628 |
+
"logits/rejected": -0.40477806329727173,
|
8629 |
+
"logps/chosen": -195.50228881835938,
|
8630 |
+
"logps/rejected": -173.91912841796875,
|
8631 |
+
"loss": 0.6842,
|
8632 |
+
"rewards/accuracies": 0.5375000238418579,
|
8633 |
+
"rewards/chosen": 0.7036144137382507,
|
8634 |
+
"rewards/margins": 0.06239970773458481,
|
8635 |
+
"rewards/rejected": 0.6412147283554077,
|
8636 |
+
"step": 5730
|
8637 |
+
},
|
8638 |
+
{
|
8639 |
+
"epoch": 1.8078028424077792,
|
8640 |
+
"grad_norm": 2.40625,
|
8641 |
+
"learning_rate": 3.4310835792677995e-06,
|
8642 |
+
"logits/chosen": -0.4431411623954773,
|
8643 |
+
"logits/rejected": -0.3337770104408264,
|
8644 |
+
"logps/chosen": -198.4442138671875,
|
8645 |
+
"logps/rejected": -162.93258666992188,
|
8646 |
+
"loss": 0.6348,
|
8647 |
+
"rewards/accuracies": 0.637499988079071,
|
8648 |
+
"rewards/chosen": 0.6712988018989563,
|
8649 |
+
"rewards/margins": 0.1662341058254242,
|
8650 |
+
"rewards/rejected": 0.5050647854804993,
|
8651 |
+
"step": 5740
|
8652 |
+
},
|
8653 |
+
{
|
8654 |
+
"epoch": 1.8109523247116255,
|
8655 |
+
"grad_norm": 3.015625,
|
8656 |
+
"learning_rate": 3.4261895954244613e-06,
|
8657 |
+
"logits/chosen": -0.4226387143135071,
|
8658 |
+
"logits/rejected": -0.3787776827812195,
|
8659 |
+
"logps/chosen": -173.4969024658203,
|
8660 |
+
"logps/rejected": -161.3011932373047,
|
8661 |
+
"loss": 0.6435,
|
8662 |
+
"rewards/accuracies": 0.6499999761581421,
|
8663 |
+
"rewards/chosen": 0.6374837160110474,
|
8664 |
+
"rewards/margins": 0.13156263530254364,
|
8665 |
+
"rewards/rejected": 0.5059210658073425,
|
8666 |
+
"step": 5750
|
8667 |
+
},
|
8668 |
+
{
|
8669 |
+
"epoch": 1.8141018070154717,
|
8670 |
+
"grad_norm": 3.09375,
|
8671 |
+
"learning_rate": 3.4212914941442866e-06,
|
8672 |
+
"logits/chosen": -0.48183003067970276,
|
8673 |
+
"logits/rejected": -0.3869970142841339,
|
8674 |
+
"logps/chosen": -199.9102020263672,
|
8675 |
+
"logps/rejected": -183.46273803710938,
|
8676 |
+
"loss": 0.6739,
|
8677 |
+
"rewards/accuracies": 0.512499988079071,
|
8678 |
+
"rewards/chosen": 0.7144922614097595,
|
8679 |
+
"rewards/margins": 0.07754186540842056,
|
8680 |
+
"rewards/rejected": 0.6369503736495972,
|
8681 |
+
"step": 5760
|
8682 |
+
},
|
8683 |
+
{
|
8684 |
+
"epoch": 1.817251289319318,
|
8685 |
+
"grad_norm": 2.796875,
|
8686 |
+
"learning_rate": 3.416389297202107e-06,
|
8687 |
+
"logits/chosen": -0.435200035572052,
|
8688 |
+
"logits/rejected": -0.273305743932724,
|
8689 |
+
"logps/chosen": -200.13018798828125,
|
8690 |
+
"logps/rejected": -172.42526245117188,
|
8691 |
+
"loss": 0.6273,
|
8692 |
+
"rewards/accuracies": 0.6499999761581421,
|
8693 |
+
"rewards/chosen": 0.7225381135940552,
|
8694 |
+
"rewards/margins": 0.19170936942100525,
|
8695 |
+
"rewards/rejected": 0.5308286547660828,
|
8696 |
+
"step": 5770
|
8697 |
+
},
|
8698 |
+
{
|
8699 |
+
"epoch": 1.8204007716231645,
|
8700 |
+
"grad_norm": 3.203125,
|
8701 |
+
"learning_rate": 3.4114830263909615e-06,
|
8702 |
+
"logits/chosen": -0.488565593957901,
|
8703 |
+
"logits/rejected": -0.3196925222873688,
|
8704 |
+
"logps/chosen": -203.71237182617188,
|
8705 |
+
"logps/rejected": -175.8201141357422,
|
8706 |
+
"loss": 0.6425,
|
8707 |
+
"rewards/accuracies": 0.6625000238418579,
|
8708 |
+
"rewards/chosen": 0.6835566759109497,
|
8709 |
+
"rewards/margins": 0.13755542039871216,
|
8710 |
+
"rewards/rejected": 0.5460013151168823,
|
8711 |
+
"step": 5780
|
8712 |
+
},
|
8713 |
+
{
|
8714 |
+
"epoch": 1.8235502539270108,
|
8715 |
+
"grad_norm": 2.84375,
|
8716 |
+
"learning_rate": 3.4065727035220013e-06,
|
8717 |
+
"logits/chosen": -0.48802971839904785,
|
8718 |
+
"logits/rejected": -0.401599645614624,
|
8719 |
+
"logps/chosen": -203.4430694580078,
|
8720 |
+
"logps/rejected": -178.24978637695312,
|
8721 |
+
"loss": 0.6509,
|
8722 |
+
"rewards/accuracies": 0.574999988079071,
|
8723 |
+
"rewards/chosen": 0.7013251185417175,
|
8724 |
+
"rewards/margins": 0.12661480903625488,
|
8725 |
+
"rewards/rejected": 0.5747103095054626,
|
8726 |
+
"step": 5790
|
8727 |
+
},
|
8728 |
+
{
|
8729 |
+
"epoch": 1.826699736230857,
|
8730 |
+
"grad_norm": 3.15625,
|
8731 |
+
"learning_rate": 3.4016583504243892e-06,
|
8732 |
+
"logits/chosen": -0.39509814977645874,
|
8733 |
+
"logits/rejected": -0.3049541115760803,
|
8734 |
+
"logps/chosen": -193.34628295898438,
|
8735 |
+
"logps/rejected": -168.88990783691406,
|
8736 |
+
"loss": 0.6467,
|
8737 |
+
"rewards/accuracies": 0.625,
|
8738 |
+
"rewards/chosen": 0.6939215660095215,
|
8739 |
+
"rewards/margins": 0.13865116238594055,
|
8740 |
+
"rewards/rejected": 0.5552703738212585,
|
8741 |
+
"step": 5800
|
8742 |
+
},
|
8743 |
+
{
|
8744 |
+
"epoch": 1.8298492185347035,
|
8745 |
+
"grad_norm": 2.609375,
|
8746 |
+
"learning_rate": 3.3967399889452056e-06,
|
8747 |
+
"logits/chosen": -0.5302572250366211,
|
8748 |
+
"logits/rejected": -0.42114171385765076,
|
8749 |
+
"logps/chosen": -187.310791015625,
|
8750 |
+
"logps/rejected": -158.18551635742188,
|
8751 |
+
"loss": 0.62,
|
8752 |
+
"rewards/accuracies": 0.699999988079071,
|
8753 |
+
"rewards/chosen": 0.6706022024154663,
|
8754 |
+
"rewards/margins": 0.18203167617321014,
|
8755 |
+
"rewards/rejected": 0.48857051134109497,
|
8756 |
+
"step": 5810
|
8757 |
+
},
|
8758 |
+
{
|
8759 |
+
"epoch": 1.8329987008385498,
|
8760 |
+
"grad_norm": 2.359375,
|
8761 |
+
"learning_rate": 3.3918176409493498e-06,
|
8762 |
+
"logits/chosen": -0.4302283227443695,
|
8763 |
+
"logits/rejected": -0.3126838207244873,
|
8764 |
+
"logps/chosen": -207.9413604736328,
|
8765 |
+
"logps/rejected": -186.14862060546875,
|
8766 |
+
"loss": 0.6106,
|
8767 |
+
"rewards/accuracies": 0.737500011920929,
|
8768 |
+
"rewards/chosen": 0.8021620512008667,
|
8769 |
+
"rewards/margins": 0.22595825791358948,
|
8770 |
+
"rewards/rejected": 0.5762038826942444,
|
8771 |
+
"step": 5820
|
8772 |
+
},
|
8773 |
+
{
|
8774 |
+
"epoch": 1.836148183142396,
|
8775 |
+
"grad_norm": 3.15625,
|
8776 |
+
"learning_rate": 3.3868913283194445e-06,
|
8777 |
+
"logits/chosen": -0.4245404303073883,
|
8778 |
+
"logits/rejected": -0.3099447190761566,
|
8779 |
+
"logps/chosen": -215.6573486328125,
|
8780 |
+
"logps/rejected": -180.88473510742188,
|
8781 |
+
"loss": 0.6243,
|
8782 |
+
"rewards/accuracies": 0.6499999761581421,
|
8783 |
+
"rewards/chosen": 0.8092790842056274,
|
8784 |
+
"rewards/margins": 0.21156442165374756,
|
8785 |
+
"rewards/rejected": 0.5977145433425903,
|
8786 |
+
"step": 5830
|
8787 |
+
},
|
8788 |
+
{
|
8789 |
+
"epoch": 1.8392976654462423,
|
8790 |
+
"grad_norm": 2.203125,
|
8791 |
+
"learning_rate": 3.381961072955737e-06,
|
8792 |
+
"logits/chosen": -0.4956479072570801,
|
8793 |
+
"logits/rejected": -0.4022194743156433,
|
8794 |
+
"logps/chosen": -181.72386169433594,
|
8795 |
+
"logps/rejected": -157.3038330078125,
|
8796 |
+
"loss": 0.6444,
|
8797 |
+
"rewards/accuracies": 0.699999988079071,
|
8798 |
+
"rewards/chosen": 0.6013648509979248,
|
8799 |
+
"rewards/margins": 0.13577811419963837,
|
8800 |
+
"rewards/rejected": 0.46558675169944763,
|
8801 |
+
"step": 5840
|
8802 |
+
},
|
8803 |
+
{
|
8804 |
+
"epoch": 1.8424471477500886,
|
8805 |
+
"grad_norm": 2.828125,
|
8806 |
+
"learning_rate": 3.3770268967760026e-06,
|
8807 |
+
"logits/chosen": -0.4699929356575012,
|
8808 |
+
"logits/rejected": -0.38960105180740356,
|
8809 |
+
"logps/chosen": -190.84512329101562,
|
8810 |
+
"logps/rejected": -165.31561279296875,
|
8811 |
+
"loss": 0.6521,
|
8812 |
+
"rewards/accuracies": 0.6000000238418579,
|
8813 |
+
"rewards/chosen": 0.7368890047073364,
|
8814 |
+
"rewards/margins": 0.1246052160859108,
|
8815 |
+
"rewards/rejected": 0.6122837662696838,
|
8816 |
+
"step": 5850
|
8817 |
+
},
|
8818 |
+
{
|
8819 |
+
"epoch": 1.8455966300539348,
|
8820 |
+
"grad_norm": 3.640625,
|
8821 |
+
"learning_rate": 3.372088821715446e-06,
|
8822 |
+
"logits/chosen": -0.5164574384689331,
|
8823 |
+
"logits/rejected": -0.40460482239723206,
|
8824 |
+
"logps/chosen": -215.09130859375,
|
8825 |
+
"logps/rejected": -181.18551635742188,
|
8826 |
+
"loss": 0.6583,
|
8827 |
+
"rewards/accuracies": 0.6000000238418579,
|
8828 |
+
"rewards/chosen": 0.7538167238235474,
|
8829 |
+
"rewards/margins": 0.11776645481586456,
|
8830 |
+
"rewards/rejected": 0.636050283908844,
|
8831 |
+
"step": 5860
|
8832 |
+
},
|
8833 |
+
{
|
8834 |
+
"epoch": 1.848746112357781,
|
8835 |
+
"grad_norm": 2.65625,
|
8836 |
+
"learning_rate": 3.3671468697266048e-06,
|
8837 |
+
"logits/chosen": -0.486356645822525,
|
8838 |
+
"logits/rejected": -0.45697417855262756,
|
8839 |
+
"logps/chosen": -189.52955627441406,
|
8840 |
+
"logps/rejected": -172.86190795898438,
|
8841 |
+
"loss": 0.6822,
|
8842 |
+
"rewards/accuracies": 0.5375000238418579,
|
8843 |
+
"rewards/chosen": 0.6347873210906982,
|
8844 |
+
"rewards/margins": 0.054320335388183594,
|
8845 |
+
"rewards/rejected": 0.5804670453071594,
|
8846 |
+
"step": 5870
|
8847 |
+
},
|
8848 |
+
{
|
8849 |
+
"epoch": 1.8518955946616273,
|
8850 |
+
"grad_norm": 3.375,
|
8851 |
+
"learning_rate": 3.3622010627792513e-06,
|
8852 |
+
"logits/chosen": -0.5492820143699646,
|
8853 |
+
"logits/rejected": -0.38086193799972534,
|
8854 |
+
"logps/chosen": -194.9511260986328,
|
8855 |
+
"logps/rejected": -161.57528686523438,
|
8856 |
+
"loss": 0.6699,
|
8857 |
+
"rewards/accuracies": 0.550000011920929,
|
8858 |
+
"rewards/chosen": 0.6926398873329163,
|
8859 |
+
"rewards/margins": 0.09596933424472809,
|
8860 |
+
"rewards/rejected": 0.5966705083847046,
|
8861 |
+
"step": 5880
|
8862 |
+
},
|
8863 |
+
{
|
8864 |
+
"epoch": 1.8550450769654738,
|
8865 |
+
"grad_norm": 2.84375,
|
8866 |
+
"learning_rate": 3.3572514228602977e-06,
|
8867 |
+
"logits/chosen": -0.4424726366996765,
|
8868 |
+
"logits/rejected": -0.35579612851142883,
|
8869 |
+
"logps/chosen": -196.1681671142578,
|
8870 |
+
"logps/rejected": -165.40811157226562,
|
8871 |
+
"loss": 0.6129,
|
8872 |
+
"rewards/accuracies": 0.6875,
|
8873 |
+
"rewards/chosen": 0.7343538999557495,
|
8874 |
+
"rewards/margins": 0.21051523089408875,
|
8875 |
+
"rewards/rejected": 0.5238386392593384,
|
8876 |
+
"step": 5890
|
8877 |
+
},
|
8878 |
+
{
|
8879 |
+
"epoch": 1.85819455926932,
|
8880 |
+
"grad_norm": 2.96875,
|
8881 |
+
"learning_rate": 3.3522979719736923e-06,
|
8882 |
+
"logits/chosen": -0.4300655722618103,
|
8883 |
+
"logits/rejected": -0.23585304617881775,
|
8884 |
+
"logps/chosen": -209.92355346679688,
|
8885 |
+
"logps/rejected": -173.3553924560547,
|
8886 |
+
"loss": 0.639,
|
8887 |
+
"rewards/accuracies": 0.5874999761581421,
|
8888 |
+
"rewards/chosen": 0.7228736877441406,
|
8889 |
+
"rewards/margins": 0.16360947489738464,
|
8890 |
+
"rewards/rejected": 0.5592643022537231,
|
8891 |
+
"step": 5900
|
8892 |
}
|
8893 |
],
|
8894 |
"logging_steps": 10,
|