Training in progress, step 3072, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 891644712
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2d30406d7c467767499ef4fa93e0814e4b9c839e83cc877c2f8eaaf710781d1
|
3 |
size 891644712
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1783444794
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d95e37a4b2b1e8065e39772f0ad6f60340e21cd105dce5b06f199029f9a8d550
|
3 |
size 1783444794
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d2f5ef411fc40d8d9b3f53029f0d2bde94e51e311c130b07e4428069fee892d
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3543c156e7d71b14c19e0a0a6a897c5b126e8bc6938f4ff38dab3dadeb331bb4
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9647,6 +9647,1126 @@
|
|
9647 |
"learning_rate": 2.801472809339294e-05,
|
9648 |
"loss": 0.3601,
|
9649 |
"step": 2752
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9650 |
}
|
9651 |
],
|
9652 |
"logging_steps": 2,
|
@@ -9666,7 +10786,7 @@
|
|
9666 |
"attributes": {}
|
9667 |
}
|
9668 |
},
|
9669 |
-
"total_flos":
|
9670 |
"train_batch_size": 8,
|
9671 |
"trial_name": null,
|
9672 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.7038269550748752,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 3072,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9647 |
"learning_rate": 2.801472809339294e-05,
|
9648 |
"loss": 0.3601,
|
9649 |
"step": 2752
|
9650 |
+
},
|
9651 |
+
{
|
9652 |
+
"epoch": 1.5274542429284526,
|
9653 |
+
"grad_norm": 0.28103169798851013,
|
9654 |
+
"learning_rate": 2.7890128096392477e-05,
|
9655 |
+
"loss": 0.3864,
|
9656 |
+
"step": 2754
|
9657 |
+
},
|
9658 |
+
{
|
9659 |
+
"epoch": 1.528563505268996,
|
9660 |
+
"grad_norm": 0.3099244236946106,
|
9661 |
+
"learning_rate": 2.7765760888726855e-05,
|
9662 |
+
"loss": 0.3917,
|
9663 |
+
"step": 2756
|
9664 |
+
},
|
9665 |
+
{
|
9666 |
+
"epoch": 1.5296727676095396,
|
9667 |
+
"grad_norm": 0.23606978356838226,
|
9668 |
+
"learning_rate": 2.7641626871885596e-05,
|
9669 |
+
"loss": 0.3313,
|
9670 |
+
"step": 2758
|
9671 |
+
},
|
9672 |
+
{
|
9673 |
+
"epoch": 1.5307820299500832,
|
9674 |
+
"grad_norm": 0.36333397030830383,
|
9675 |
+
"learning_rate": 2.7517726446605406e-05,
|
9676 |
+
"loss": 0.3982,
|
9677 |
+
"step": 2760
|
9678 |
+
},
|
9679 |
+
{
|
9680 |
+
"epoch": 1.5318912922906267,
|
9681 |
+
"grad_norm": 0.27382388710975647,
|
9682 |
+
"learning_rate": 2.7394060012868995e-05,
|
9683 |
+
"loss": 0.2713,
|
9684 |
+
"step": 2762
|
9685 |
+
},
|
9686 |
+
{
|
9687 |
+
"epoch": 1.5330005546311702,
|
9688 |
+
"grad_norm": 0.2855754494667053,
|
9689 |
+
"learning_rate": 2.7270627969903608e-05,
|
9690 |
+
"loss": 0.4792,
|
9691 |
+
"step": 2764
|
9692 |
+
},
|
9693 |
+
{
|
9694 |
+
"epoch": 1.5341098169717138,
|
9695 |
+
"grad_norm": 0.35454511642456055,
|
9696 |
+
"learning_rate": 2.714743071617979e-05,
|
9697 |
+
"loss": 0.3661,
|
9698 |
+
"step": 2766
|
9699 |
+
},
|
9700 |
+
{
|
9701 |
+
"epoch": 1.5352190793122573,
|
9702 |
+
"grad_norm": 0.22000765800476074,
|
9703 |
+
"learning_rate": 2.7024468649410228e-05,
|
9704 |
+
"loss": 0.3621,
|
9705 |
+
"step": 2768
|
9706 |
+
},
|
9707 |
+
{
|
9708 |
+
"epoch": 1.5363283416528009,
|
9709 |
+
"grad_norm": 0.28072547912597656,
|
9710 |
+
"learning_rate": 2.6901742166548262e-05,
|
9711 |
+
"loss": 0.3846,
|
9712 |
+
"step": 2770
|
9713 |
+
},
|
9714 |
+
{
|
9715 |
+
"epoch": 1.5374376039933444,
|
9716 |
+
"grad_norm": 0.2560584545135498,
|
9717 |
+
"learning_rate": 2.6779251663786797e-05,
|
9718 |
+
"loss": 0.4105,
|
9719 |
+
"step": 2772
|
9720 |
+
},
|
9721 |
+
{
|
9722 |
+
"epoch": 1.538546866333888,
|
9723 |
+
"grad_norm": 0.33404773473739624,
|
9724 |
+
"learning_rate": 2.665699753655684e-05,
|
9725 |
+
"loss": 0.3561,
|
9726 |
+
"step": 2774
|
9727 |
+
},
|
9728 |
+
{
|
9729 |
+
"epoch": 1.5396561286744315,
|
9730 |
+
"grad_norm": 0.33240342140197754,
|
9731 |
+
"learning_rate": 2.6534980179526415e-05,
|
9732 |
+
"loss": 0.3972,
|
9733 |
+
"step": 2776
|
9734 |
+
},
|
9735 |
+
{
|
9736 |
+
"epoch": 1.540765391014975,
|
9737 |
+
"grad_norm": 0.26221776008605957,
|
9738 |
+
"learning_rate": 2.6413199986599112e-05,
|
9739 |
+
"loss": 0.3542,
|
9740 |
+
"step": 2778
|
9741 |
+
},
|
9742 |
+
{
|
9743 |
+
"epoch": 1.5418746533555185,
|
9744 |
+
"grad_norm": 0.2851394712924957,
|
9745 |
+
"learning_rate": 2.6291657350912923e-05,
|
9746 |
+
"loss": 0.3402,
|
9747 |
+
"step": 2780
|
9748 |
+
},
|
9749 |
+
{
|
9750 |
+
"epoch": 1.542983915696062,
|
9751 |
+
"grad_norm": 0.27777722477912903,
|
9752 |
+
"learning_rate": 2.6170352664838903e-05,
|
9753 |
+
"loss": 0.4094,
|
9754 |
+
"step": 2782
|
9755 |
+
},
|
9756 |
+
{
|
9757 |
+
"epoch": 1.5440931780366056,
|
9758 |
+
"grad_norm": 0.32692790031433105,
|
9759 |
+
"learning_rate": 2.6049286319980014e-05,
|
9760 |
+
"loss": 0.4145,
|
9761 |
+
"step": 2784
|
9762 |
+
},
|
9763 |
+
{
|
9764 |
+
"epoch": 1.5452024403771492,
|
9765 |
+
"grad_norm": 0.37069231271743774,
|
9766 |
+
"learning_rate": 2.5928458707169813e-05,
|
9767 |
+
"loss": 0.4012,
|
9768 |
+
"step": 2786
|
9769 |
+
},
|
9770 |
+
{
|
9771 |
+
"epoch": 1.5463117027176927,
|
9772 |
+
"grad_norm": 0.28681105375289917,
|
9773 |
+
"learning_rate": 2.5807870216471052e-05,
|
9774 |
+
"loss": 0.4338,
|
9775 |
+
"step": 2788
|
9776 |
+
},
|
9777 |
+
{
|
9778 |
+
"epoch": 1.5474209650582362,
|
9779 |
+
"grad_norm": 0.3061560094356537,
|
9780 |
+
"learning_rate": 2.5687521237174584e-05,
|
9781 |
+
"loss": 0.4096,
|
9782 |
+
"step": 2790
|
9783 |
+
},
|
9784 |
+
{
|
9785 |
+
"epoch": 1.5485302273987798,
|
9786 |
+
"grad_norm": 0.3024190664291382,
|
9787 |
+
"learning_rate": 2.5567412157798133e-05,
|
9788 |
+
"loss": 0.3737,
|
9789 |
+
"step": 2792
|
9790 |
+
},
|
9791 |
+
{
|
9792 |
+
"epoch": 1.5496394897393233,
|
9793 |
+
"grad_norm": 0.22082455456256866,
|
9794 |
+
"learning_rate": 2.544754336608486e-05,
|
9795 |
+
"loss": 0.3517,
|
9796 |
+
"step": 2794
|
9797 |
+
},
|
9798 |
+
{
|
9799 |
+
"epoch": 1.5507487520798668,
|
9800 |
+
"grad_norm": 0.23570817708969116,
|
9801 |
+
"learning_rate": 2.5327915249002245e-05,
|
9802 |
+
"loss": 0.358,
|
9803 |
+
"step": 2796
|
9804 |
+
},
|
9805 |
+
{
|
9806 |
+
"epoch": 1.5518580144204104,
|
9807 |
+
"grad_norm": 0.28938063979148865,
|
9808 |
+
"learning_rate": 2.5208528192740834e-05,
|
9809 |
+
"loss": 0.3861,
|
9810 |
+
"step": 2798
|
9811 |
+
},
|
9812 |
+
{
|
9813 |
+
"epoch": 1.552967276760954,
|
9814 |
+
"grad_norm": 0.22857078909873962,
|
9815 |
+
"learning_rate": 2.5089382582712994e-05,
|
9816 |
+
"loss": 0.3072,
|
9817 |
+
"step": 2800
|
9818 |
+
},
|
9819 |
+
{
|
9820 |
+
"epoch": 1.5540765391014975,
|
9821 |
+
"grad_norm": 0.28918081521987915,
|
9822 |
+
"learning_rate": 2.4970478803551565e-05,
|
9823 |
+
"loss": 0.3366,
|
9824 |
+
"step": 2802
|
9825 |
+
},
|
9826 |
+
{
|
9827 |
+
"epoch": 1.555185801442041,
|
9828 |
+
"grad_norm": 0.28605908155441284,
|
9829 |
+
"learning_rate": 2.4851817239108688e-05,
|
9830 |
+
"loss": 0.31,
|
9831 |
+
"step": 2804
|
9832 |
+
},
|
9833 |
+
{
|
9834 |
+
"epoch": 1.5562950637825845,
|
9835 |
+
"grad_norm": 0.26103734970092773,
|
9836 |
+
"learning_rate": 2.4733398272454687e-05,
|
9837 |
+
"loss": 0.3324,
|
9838 |
+
"step": 2806
|
9839 |
+
},
|
9840 |
+
{
|
9841 |
+
"epoch": 1.557404326123128,
|
9842 |
+
"grad_norm": 0.3307429850101471,
|
9843 |
+
"learning_rate": 2.4615222285876616e-05,
|
9844 |
+
"loss": 0.3568,
|
9845 |
+
"step": 2808
|
9846 |
+
},
|
9847 |
+
{
|
9848 |
+
"epoch": 1.5585135884636716,
|
9849 |
+
"grad_norm": 0.2729584574699402,
|
9850 |
+
"learning_rate": 2.449728966087712e-05,
|
9851 |
+
"loss": 0.3475,
|
9852 |
+
"step": 2810
|
9853 |
+
},
|
9854 |
+
{
|
9855 |
+
"epoch": 1.5596228508042151,
|
9856 |
+
"grad_norm": 0.27880561351776123,
|
9857 |
+
"learning_rate": 2.437960077817326e-05,
|
9858 |
+
"loss": 0.371,
|
9859 |
+
"step": 2812
|
9860 |
+
},
|
9861 |
+
{
|
9862 |
+
"epoch": 1.5607321131447587,
|
9863 |
+
"grad_norm": 0.29016315937042236,
|
9864 |
+
"learning_rate": 2.426215601769526e-05,
|
9865 |
+
"loss": 0.3247,
|
9866 |
+
"step": 2814
|
9867 |
+
},
|
9868 |
+
{
|
9869 |
+
"epoch": 1.5618413754853022,
|
9870 |
+
"grad_norm": 0.3246505558490753,
|
9871 |
+
"learning_rate": 2.4144955758585184e-05,
|
9872 |
+
"loss": 0.4428,
|
9873 |
+
"step": 2816
|
9874 |
+
},
|
9875 |
+
{
|
9876 |
+
"epoch": 1.5629506378258458,
|
9877 |
+
"grad_norm": 0.27316877245903015,
|
9878 |
+
"learning_rate": 2.402800037919578e-05,
|
9879 |
+
"loss": 0.3025,
|
9880 |
+
"step": 2818
|
9881 |
+
},
|
9882 |
+
{
|
9883 |
+
"epoch": 1.5640599001663893,
|
9884 |
+
"grad_norm": 0.2579948902130127,
|
9885 |
+
"learning_rate": 2.3911290257089348e-05,
|
9886 |
+
"loss": 0.3673,
|
9887 |
+
"step": 2820
|
9888 |
+
},
|
9889 |
+
{
|
9890 |
+
"epoch": 1.5651691625069328,
|
9891 |
+
"grad_norm": 0.3941158354282379,
|
9892 |
+
"learning_rate": 2.3794825769036334e-05,
|
9893 |
+
"loss": 0.4028,
|
9894 |
+
"step": 2822
|
9895 |
+
},
|
9896 |
+
{
|
9897 |
+
"epoch": 1.5662784248474764,
|
9898 |
+
"grad_norm": 0.2645871341228485,
|
9899 |
+
"learning_rate": 2.3678607291014242e-05,
|
9900 |
+
"loss": 0.3511,
|
9901 |
+
"step": 2824
|
9902 |
+
},
|
9903 |
+
{
|
9904 |
+
"epoch": 1.56738768718802,
|
9905 |
+
"grad_norm": 0.2745266854763031,
|
9906 |
+
"learning_rate": 2.356263519820647e-05,
|
9907 |
+
"loss": 0.3726,
|
9908 |
+
"step": 2826
|
9909 |
+
},
|
9910 |
+
{
|
9911 |
+
"epoch": 1.5684969495285634,
|
9912 |
+
"grad_norm": 0.4434897303581238,
|
9913 |
+
"learning_rate": 2.3446909865000886e-05,
|
9914 |
+
"loss": 0.5269,
|
9915 |
+
"step": 2828
|
9916 |
+
},
|
9917 |
+
{
|
9918 |
+
"epoch": 1.569606211869107,
|
9919 |
+
"grad_norm": 0.27076244354248047,
|
9920 |
+
"learning_rate": 2.333143166498889e-05,
|
9921 |
+
"loss": 0.3558,
|
9922 |
+
"step": 2830
|
9923 |
+
},
|
9924 |
+
{
|
9925 |
+
"epoch": 1.5707154742096505,
|
9926 |
+
"grad_norm": 0.3606158196926117,
|
9927 |
+
"learning_rate": 2.3216200970963954e-05,
|
9928 |
+
"loss": 0.4266,
|
9929 |
+
"step": 2832
|
9930 |
+
},
|
9931 |
+
{
|
9932 |
+
"epoch": 1.571824736550194,
|
9933 |
+
"grad_norm": 0.2903146743774414,
|
9934 |
+
"learning_rate": 2.3101218154920633e-05,
|
9935 |
+
"loss": 0.3087,
|
9936 |
+
"step": 2834
|
9937 |
+
},
|
9938 |
+
{
|
9939 |
+
"epoch": 1.5729339988907376,
|
9940 |
+
"grad_norm": 0.3455762565135956,
|
9941 |
+
"learning_rate": 2.298648358805322e-05,
|
9942 |
+
"loss": 0.389,
|
9943 |
+
"step": 2836
|
9944 |
+
},
|
9945 |
+
{
|
9946 |
+
"epoch": 1.5740432612312811,
|
9947 |
+
"grad_norm": 0.2828775942325592,
|
9948 |
+
"learning_rate": 2.2871997640754572e-05,
|
9949 |
+
"loss": 0.3795,
|
9950 |
+
"step": 2838
|
9951 |
+
},
|
9952 |
+
{
|
9953 |
+
"epoch": 1.5751525235718247,
|
9954 |
+
"grad_norm": 0.3083617389202118,
|
9955 |
+
"learning_rate": 2.275776068261495e-05,
|
9956 |
+
"loss": 0.3764,
|
9957 |
+
"step": 2840
|
9958 |
+
},
|
9959 |
+
{
|
9960 |
+
"epoch": 1.5762617859123682,
|
9961 |
+
"grad_norm": 0.32137981057167053,
|
9962 |
+
"learning_rate": 2.264377308242086e-05,
|
9963 |
+
"loss": 0.3609,
|
9964 |
+
"step": 2842
|
9965 |
+
},
|
9966 |
+
{
|
9967 |
+
"epoch": 1.5773710482529117,
|
9968 |
+
"grad_norm": 0.2916238605976105,
|
9969 |
+
"learning_rate": 2.2530035208153822e-05,
|
9970 |
+
"loss": 0.3584,
|
9971 |
+
"step": 2844
|
9972 |
+
},
|
9973 |
+
{
|
9974 |
+
"epoch": 1.5784803105934553,
|
9975 |
+
"grad_norm": 0.26702216267585754,
|
9976 |
+
"learning_rate": 2.241654742698909e-05,
|
9977 |
+
"loss": 0.3635,
|
9978 |
+
"step": 2846
|
9979 |
+
},
|
9980 |
+
{
|
9981 |
+
"epoch": 1.5795895729339988,
|
9982 |
+
"grad_norm": 0.3438095152378082,
|
9983 |
+
"learning_rate": 2.2303310105294582e-05,
|
9984 |
+
"loss": 0.372,
|
9985 |
+
"step": 2848
|
9986 |
+
},
|
9987 |
+
{
|
9988 |
+
"epoch": 1.5806988352745424,
|
9989 |
+
"grad_norm": 0.31688249111175537,
|
9990 |
+
"learning_rate": 2.219032360862976e-05,
|
9991 |
+
"loss": 0.3912,
|
9992 |
+
"step": 2850
|
9993 |
+
},
|
9994 |
+
{
|
9995 |
+
"epoch": 1.581808097615086,
|
9996 |
+
"grad_norm": 0.2813704013824463,
|
9997 |
+
"learning_rate": 2.2077588301744233e-05,
|
9998 |
+
"loss": 0.3545,
|
9999 |
+
"step": 2852
|
10000 |
+
},
|
10001 |
+
{
|
10002 |
+
"epoch": 1.5829173599556294,
|
10003 |
+
"grad_norm": 0.23737509548664093,
|
10004 |
+
"learning_rate": 2.1965104548576753e-05,
|
10005 |
+
"loss": 0.3507,
|
10006 |
+
"step": 2854
|
10007 |
+
},
|
10008 |
+
{
|
10009 |
+
"epoch": 1.584026622296173,
|
10010 |
+
"grad_norm": 0.32858365774154663,
|
10011 |
+
"learning_rate": 2.1852872712254002e-05,
|
10012 |
+
"loss": 0.3221,
|
10013 |
+
"step": 2856
|
10014 |
+
},
|
10015 |
+
{
|
10016 |
+
"epoch": 1.5851358846367165,
|
10017 |
+
"grad_norm": 0.2847982943058014,
|
10018 |
+
"learning_rate": 2.1740893155089447e-05,
|
10019 |
+
"loss": 0.3456,
|
10020 |
+
"step": 2858
|
10021 |
+
},
|
10022 |
+
{
|
10023 |
+
"epoch": 1.58624514697726,
|
10024 |
+
"grad_norm": 0.28835567831993103,
|
10025 |
+
"learning_rate": 2.1629166238582056e-05,
|
10026 |
+
"loss": 0.3682,
|
10027 |
+
"step": 2860
|
10028 |
+
},
|
10029 |
+
{
|
10030 |
+
"epoch": 1.5873544093178036,
|
10031 |
+
"grad_norm": 0.2693901062011719,
|
10032 |
+
"learning_rate": 2.1517692323415205e-05,
|
10033 |
+
"loss": 0.3503,
|
10034 |
+
"step": 2862
|
10035 |
+
},
|
10036 |
+
{
|
10037 |
+
"epoch": 1.5884636716583471,
|
10038 |
+
"grad_norm": 0.2496192455291748,
|
10039 |
+
"learning_rate": 2.1406471769455615e-05,
|
10040 |
+
"loss": 0.3414,
|
10041 |
+
"step": 2864
|
10042 |
+
},
|
10043 |
+
{
|
10044 |
+
"epoch": 1.5895729339988907,
|
10045 |
+
"grad_norm": 0.2739793658256531,
|
10046 |
+
"learning_rate": 2.129550493575201e-05,
|
10047 |
+
"loss": 0.4304,
|
10048 |
+
"step": 2866
|
10049 |
+
},
|
10050 |
+
{
|
10051 |
+
"epoch": 1.5906821963394342,
|
10052 |
+
"grad_norm": 0.2115955650806427,
|
10053 |
+
"learning_rate": 2.118479218053401e-05,
|
10054 |
+
"loss": 0.3131,
|
10055 |
+
"step": 2868
|
10056 |
+
},
|
10057 |
+
{
|
10058 |
+
"epoch": 1.5917914586799777,
|
10059 |
+
"grad_norm": 0.283636212348938,
|
10060 |
+
"learning_rate": 2.1074333861211103e-05,
|
10061 |
+
"loss": 0.4183,
|
10062 |
+
"step": 2870
|
10063 |
+
},
|
10064 |
+
{
|
10065 |
+
"epoch": 1.5929007210205213,
|
10066 |
+
"grad_norm": 0.2762402594089508,
|
10067 |
+
"learning_rate": 2.096413033437131e-05,
|
10068 |
+
"loss": 0.3805,
|
10069 |
+
"step": 2872
|
10070 |
+
},
|
10071 |
+
{
|
10072 |
+
"epoch": 1.5940099833610648,
|
10073 |
+
"grad_norm": 0.27344250679016113,
|
10074 |
+
"learning_rate": 2.0854181955780183e-05,
|
10075 |
+
"loss": 0.3537,
|
10076 |
+
"step": 2874
|
10077 |
+
},
|
10078 |
+
{
|
10079 |
+
"epoch": 1.5951192457016083,
|
10080 |
+
"grad_norm": 0.3143325448036194,
|
10081 |
+
"learning_rate": 2.0744489080379504e-05,
|
10082 |
+
"loss": 0.3461,
|
10083 |
+
"step": 2876
|
10084 |
+
},
|
10085 |
+
{
|
10086 |
+
"epoch": 1.5962285080421519,
|
10087 |
+
"grad_norm": 0.26111075282096863,
|
10088 |
+
"learning_rate": 2.063505206228632e-05,
|
10089 |
+
"loss": 0.3634,
|
10090 |
+
"step": 2878
|
10091 |
+
},
|
10092 |
+
{
|
10093 |
+
"epoch": 1.5973377703826954,
|
10094 |
+
"grad_norm": 0.32173627614974976,
|
10095 |
+
"learning_rate": 2.0525871254791627e-05,
|
10096 |
+
"loss": 0.3973,
|
10097 |
+
"step": 2880
|
10098 |
+
},
|
10099 |
+
{
|
10100 |
+
"epoch": 1.598447032723239,
|
10101 |
+
"grad_norm": 0.2806760370731354,
|
10102 |
+
"learning_rate": 2.0416947010359355e-05,
|
10103 |
+
"loss": 0.3786,
|
10104 |
+
"step": 2882
|
10105 |
+
},
|
10106 |
+
{
|
10107 |
+
"epoch": 1.5995562950637825,
|
10108 |
+
"grad_norm": 0.30123627185821533,
|
10109 |
+
"learning_rate": 2.030827968062513e-05,
|
10110 |
+
"loss": 0.427,
|
10111 |
+
"step": 2884
|
10112 |
+
},
|
10113 |
+
{
|
10114 |
+
"epoch": 1.600665557404326,
|
10115 |
+
"grad_norm": 0.322729229927063,
|
10116 |
+
"learning_rate": 2.019986961639524e-05,
|
10117 |
+
"loss": 0.353,
|
10118 |
+
"step": 2886
|
10119 |
+
},
|
10120 |
+
{
|
10121 |
+
"epoch": 1.6017748197448696,
|
10122 |
+
"grad_norm": 0.2584727108478546,
|
10123 |
+
"learning_rate": 2.0091717167645475e-05,
|
10124 |
+
"loss": 0.2905,
|
10125 |
+
"step": 2888
|
10126 |
+
},
|
10127 |
+
{
|
10128 |
+
"epoch": 1.602884082085413,
|
10129 |
+
"grad_norm": 0.2751784026622772,
|
10130 |
+
"learning_rate": 1.9983822683519915e-05,
|
10131 |
+
"loss": 0.3394,
|
10132 |
+
"step": 2890
|
10133 |
+
},
|
10134 |
+
{
|
10135 |
+
"epoch": 1.6039933444259566,
|
10136 |
+
"grad_norm": 0.29693764448165894,
|
10137 |
+
"learning_rate": 1.9876186512329853e-05,
|
10138 |
+
"loss": 0.4027,
|
10139 |
+
"step": 2892
|
10140 |
+
},
|
10141 |
+
{
|
10142 |
+
"epoch": 1.6051026067665002,
|
10143 |
+
"grad_norm": 0.2711539566516876,
|
10144 |
+
"learning_rate": 1.9768809001552768e-05,
|
10145 |
+
"loss": 0.349,
|
10146 |
+
"step": 2894
|
10147 |
+
},
|
10148 |
+
{
|
10149 |
+
"epoch": 1.6062118691070437,
|
10150 |
+
"grad_norm": 0.25827860832214355,
|
10151 |
+
"learning_rate": 1.9661690497831053e-05,
|
10152 |
+
"loss": 0.4183,
|
10153 |
+
"step": 2896
|
10154 |
+
},
|
10155 |
+
{
|
10156 |
+
"epoch": 1.6073211314475873,
|
10157 |
+
"grad_norm": 0.34938088059425354,
|
10158 |
+
"learning_rate": 1.9554831346970925e-05,
|
10159 |
+
"loss": 0.3684,
|
10160 |
+
"step": 2898
|
10161 |
+
},
|
10162 |
+
{
|
10163 |
+
"epoch": 1.6084303937881308,
|
10164 |
+
"grad_norm": 0.26432278752326965,
|
10165 |
+
"learning_rate": 1.9448231893941414e-05,
|
10166 |
+
"loss": 0.4979,
|
10167 |
+
"step": 2900
|
10168 |
+
},
|
10169 |
+
{
|
10170 |
+
"epoch": 1.6095396561286743,
|
10171 |
+
"grad_norm": 0.32702112197875977,
|
10172 |
+
"learning_rate": 1.9341892482873192e-05,
|
10173 |
+
"loss": 0.3844,
|
10174 |
+
"step": 2902
|
10175 |
+
},
|
10176 |
+
{
|
10177 |
+
"epoch": 1.6106489184692179,
|
10178 |
+
"grad_norm": 0.36097395420074463,
|
10179 |
+
"learning_rate": 1.923581345705736e-05,
|
10180 |
+
"loss": 0.3576,
|
10181 |
+
"step": 2904
|
10182 |
+
},
|
10183 |
+
{
|
10184 |
+
"epoch": 1.6117581808097614,
|
10185 |
+
"grad_norm": 0.3077182471752167,
|
10186 |
+
"learning_rate": 1.912999515894448e-05,
|
10187 |
+
"loss": 0.5143,
|
10188 |
+
"step": 2906
|
10189 |
+
},
|
10190 |
+
{
|
10191 |
+
"epoch": 1.612867443150305,
|
10192 |
+
"grad_norm": 0.2704939544200897,
|
10193 |
+
"learning_rate": 1.9024437930143435e-05,
|
10194 |
+
"loss": 0.3342,
|
10195 |
+
"step": 2908
|
10196 |
+
},
|
10197 |
+
{
|
10198 |
+
"epoch": 1.6139767054908485,
|
10199 |
+
"grad_norm": 0.22881537675857544,
|
10200 |
+
"learning_rate": 1.8919142111420284e-05,
|
10201 |
+
"loss": 0.3769,
|
10202 |
+
"step": 2910
|
10203 |
+
},
|
10204 |
+
{
|
10205 |
+
"epoch": 1.615085967831392,
|
10206 |
+
"grad_norm": 0.29385611414909363,
|
10207 |
+
"learning_rate": 1.8814108042697144e-05,
|
10208 |
+
"loss": 0.3847,
|
10209 |
+
"step": 2912
|
10210 |
+
},
|
10211 |
+
{
|
10212 |
+
"epoch": 1.6161952301719356,
|
10213 |
+
"grad_norm": 0.4236384630203247,
|
10214 |
+
"learning_rate": 1.870933606305122e-05,
|
10215 |
+
"loss": 0.4581,
|
10216 |
+
"step": 2914
|
10217 |
+
},
|
10218 |
+
{
|
10219 |
+
"epoch": 1.617304492512479,
|
10220 |
+
"grad_norm": 0.2979065477848053,
|
10221 |
+
"learning_rate": 1.8604826510713613e-05,
|
10222 |
+
"loss": 0.4182,
|
10223 |
+
"step": 2916
|
10224 |
+
},
|
10225 |
+
{
|
10226 |
+
"epoch": 1.6184137548530226,
|
10227 |
+
"grad_norm": 0.335405170917511,
|
10228 |
+
"learning_rate": 1.8500579723068177e-05,
|
10229 |
+
"loss": 0.3544,
|
10230 |
+
"step": 2918
|
10231 |
+
},
|
10232 |
+
{
|
10233 |
+
"epoch": 1.6195230171935662,
|
10234 |
+
"grad_norm": 0.2822960615158081,
|
10235 |
+
"learning_rate": 1.8396596036650514e-05,
|
10236 |
+
"loss": 0.336,
|
10237 |
+
"step": 2920
|
10238 |
+
},
|
10239 |
+
{
|
10240 |
+
"epoch": 1.6206322795341097,
|
10241 |
+
"grad_norm": 0.3513801395893097,
|
10242 |
+
"learning_rate": 1.8292875787146946e-05,
|
10243 |
+
"loss": 0.4,
|
10244 |
+
"step": 2922
|
10245 |
+
},
|
10246 |
+
{
|
10247 |
+
"epoch": 1.6217415418746532,
|
10248 |
+
"grad_norm": 0.2501135766506195,
|
10249 |
+
"learning_rate": 1.8189419309393242e-05,
|
10250 |
+
"loss": 0.3641,
|
10251 |
+
"step": 2924
|
10252 |
+
},
|
10253 |
+
{
|
10254 |
+
"epoch": 1.6228508042151968,
|
10255 |
+
"grad_norm": 0.3006201684474945,
|
10256 |
+
"learning_rate": 1.8086226937373674e-05,
|
10257 |
+
"loss": 0.4112,
|
10258 |
+
"step": 2926
|
10259 |
+
},
|
10260 |
+
{
|
10261 |
+
"epoch": 1.6239600665557403,
|
10262 |
+
"grad_norm": 0.2748831808567047,
|
10263 |
+
"learning_rate": 1.798329900422e-05,
|
10264 |
+
"loss": 0.31,
|
10265 |
+
"step": 2928
|
10266 |
+
},
|
10267 |
+
{
|
10268 |
+
"epoch": 1.6250693288962839,
|
10269 |
+
"grad_norm": 0.3650710880756378,
|
10270 |
+
"learning_rate": 1.788063584221017e-05,
|
10271 |
+
"loss": 0.3872,
|
10272 |
+
"step": 2930
|
10273 |
+
},
|
10274 |
+
{
|
10275 |
+
"epoch": 1.6261785912368274,
|
10276 |
+
"grad_norm": 0.3932930827140808,
|
10277 |
+
"learning_rate": 1.7778237782767504e-05,
|
10278 |
+
"loss": 0.4484,
|
10279 |
+
"step": 2932
|
10280 |
+
},
|
10281 |
+
{
|
10282 |
+
"epoch": 1.627287853577371,
|
10283 |
+
"grad_norm": 0.25739145278930664,
|
10284 |
+
"learning_rate": 1.7676105156459398e-05,
|
10285 |
+
"loss": 0.3541,
|
10286 |
+
"step": 2934
|
10287 |
+
},
|
10288 |
+
{
|
10289 |
+
"epoch": 1.6283971159179145,
|
10290 |
+
"grad_norm": 0.22192710638046265,
|
10291 |
+
"learning_rate": 1.7574238292996458e-05,
|
10292 |
+
"loss": 0.3301,
|
10293 |
+
"step": 2936
|
10294 |
+
},
|
10295 |
+
{
|
10296 |
+
"epoch": 1.629506378258458,
|
10297 |
+
"grad_norm": 0.2925964593887329,
|
10298 |
+
"learning_rate": 1.7472637521231283e-05,
|
10299 |
+
"loss": 0.4855,
|
10300 |
+
"step": 2938
|
10301 |
+
},
|
10302 |
+
{
|
10303 |
+
"epoch": 1.6306156405990015,
|
10304 |
+
"grad_norm": 0.2878285050392151,
|
10305 |
+
"learning_rate": 1.737130316915744e-05,
|
10306 |
+
"loss": 0.4119,
|
10307 |
+
"step": 2940
|
10308 |
+
},
|
10309 |
+
{
|
10310 |
+
"epoch": 1.631724902939545,
|
10311 |
+
"grad_norm": 0.2855752110481262,
|
10312 |
+
"learning_rate": 1.7270235563908443e-05,
|
10313 |
+
"loss": 0.4221,
|
10314 |
+
"step": 2942
|
10315 |
+
},
|
10316 |
+
{
|
10317 |
+
"epoch": 1.6328341652800886,
|
10318 |
+
"grad_norm": 0.30537575483322144,
|
10319 |
+
"learning_rate": 1.716943503175671e-05,
|
10320 |
+
"loss": 0.4187,
|
10321 |
+
"step": 2944
|
10322 |
+
},
|
10323 |
+
{
|
10324 |
+
"epoch": 1.6339434276206322,
|
10325 |
+
"grad_norm": 0.32603779435157776,
|
10326 |
+
"learning_rate": 1.7068901898112478e-05,
|
10327 |
+
"loss": 0.4118,
|
10328 |
+
"step": 2946
|
10329 |
+
},
|
10330 |
+
{
|
10331 |
+
"epoch": 1.6350526899611757,
|
10332 |
+
"grad_norm": 0.21832433342933655,
|
10333 |
+
"learning_rate": 1.6968636487522705e-05,
|
10334 |
+
"loss": 0.3122,
|
10335 |
+
"step": 2948
|
10336 |
+
},
|
10337 |
+
{
|
10338 |
+
"epoch": 1.6361619523017192,
|
10339 |
+
"grad_norm": 0.30126479268074036,
|
10340 |
+
"learning_rate": 1.686863912367006e-05,
|
10341 |
+
"loss": 0.322,
|
10342 |
+
"step": 2950
|
10343 |
+
},
|
10344 |
+
{
|
10345 |
+
"epoch": 1.6372712146422628,
|
10346 |
+
"grad_norm": 0.27455347776412964,
|
10347 |
+
"learning_rate": 1.6768910129371986e-05,
|
10348 |
+
"loss": 0.3588,
|
10349 |
+
"step": 2952
|
10350 |
+
},
|
10351 |
+
{
|
10352 |
+
"epoch": 1.6383804769828063,
|
10353 |
+
"grad_norm": 0.26136961579322815,
|
10354 |
+
"learning_rate": 1.6669449826579464e-05,
|
10355 |
+
"loss": 0.3672,
|
10356 |
+
"step": 2954
|
10357 |
+
},
|
10358 |
+
{
|
10359 |
+
"epoch": 1.6394897393233498,
|
10360 |
+
"grad_norm": 0.24132628738880157,
|
10361 |
+
"learning_rate": 1.6570258536376083e-05,
|
10362 |
+
"loss": 0.3935,
|
10363 |
+
"step": 2956
|
10364 |
+
},
|
10365 |
+
{
|
10366 |
+
"epoch": 1.6405990016638934,
|
10367 |
+
"grad_norm": 0.38164663314819336,
|
10368 |
+
"learning_rate": 1.6471336578977016e-05,
|
10369 |
+
"loss": 0.4923,
|
10370 |
+
"step": 2958
|
10371 |
+
},
|
10372 |
+
{
|
10373 |
+
"epoch": 1.641708264004437,
|
10374 |
+
"grad_norm": 0.3024519979953766,
|
10375 |
+
"learning_rate": 1.637268427372799e-05,
|
10376 |
+
"loss": 0.4043,
|
10377 |
+
"step": 2960
|
10378 |
+
},
|
10379 |
+
{
|
10380 |
+
"epoch": 1.6428175263449805,
|
10381 |
+
"grad_norm": 0.29123973846435547,
|
10382 |
+
"learning_rate": 1.627430193910414e-05,
|
10383 |
+
"loss": 0.3372,
|
10384 |
+
"step": 2962
|
10385 |
+
},
|
10386 |
+
{
|
10387 |
+
"epoch": 1.643926788685524,
|
10388 |
+
"grad_norm": 0.2549437880516052,
|
10389 |
+
"learning_rate": 1.6176189892709127e-05,
|
10390 |
+
"loss": 0.2834,
|
10391 |
+
"step": 2964
|
10392 |
+
},
|
10393 |
+
{
|
10394 |
+
"epoch": 1.6450360510260675,
|
10395 |
+
"grad_norm": 0.3285108804702759,
|
10396 |
+
"learning_rate": 1.607834845127405e-05,
|
10397 |
+
"loss": 0.3657,
|
10398 |
+
"step": 2966
|
10399 |
+
},
|
10400 |
+
{
|
10401 |
+
"epoch": 1.646145313366611,
|
10402 |
+
"grad_norm": 0.24914546310901642,
|
10403 |
+
"learning_rate": 1.59807779306564e-05,
|
10404 |
+
"loss": 0.3498,
|
10405 |
+
"step": 2968
|
10406 |
+
},
|
10407 |
+
{
|
10408 |
+
"epoch": 1.6472545757071546,
|
10409 |
+
"grad_norm": 0.2854565978050232,
|
10410 |
+
"learning_rate": 1.5883478645839045e-05,
|
10411 |
+
"loss": 0.3597,
|
10412 |
+
"step": 2970
|
10413 |
+
},
|
10414 |
+
{
|
10415 |
+
"epoch": 1.6483638380476981,
|
10416 |
+
"grad_norm": 0.24184933304786682,
|
10417 |
+
"learning_rate": 1.578645091092933e-05,
|
10418 |
+
"loss": 0.3682,
|
10419 |
+
"step": 2972
|
10420 |
+
},
|
10421 |
+
{
|
10422 |
+
"epoch": 1.6494731003882417,
|
10423 |
+
"grad_norm": 0.30457058548927307,
|
10424 |
+
"learning_rate": 1.5689695039157848e-05,
|
10425 |
+
"loss": 0.3172,
|
10426 |
+
"step": 2974
|
10427 |
+
},
|
10428 |
+
{
|
10429 |
+
"epoch": 1.6505823627287852,
|
10430 |
+
"grad_norm": 0.23675574362277985,
|
10431 |
+
"learning_rate": 1.5593211342877645e-05,
|
10432 |
+
"loss": 0.27,
|
10433 |
+
"step": 2976
|
10434 |
+
},
|
10435 |
+
{
|
10436 |
+
"epoch": 1.6516916250693288,
|
10437 |
+
"grad_norm": 0.3284320533275604,
|
10438 |
+
"learning_rate": 1.5497000133563022e-05,
|
10439 |
+
"loss": 0.4104,
|
10440 |
+
"step": 2978
|
10441 |
+
},
|
10442 |
+
{
|
10443 |
+
"epoch": 1.6528008874098723,
|
10444 |
+
"grad_norm": 0.4357747435569763,
|
10445 |
+
"learning_rate": 1.540106172180873e-05,
|
10446 |
+
"loss": 0.4127,
|
10447 |
+
"step": 2980
|
10448 |
+
},
|
10449 |
+
{
|
10450 |
+
"epoch": 1.6539101497504158,
|
10451 |
+
"grad_norm": 0.3309295177459717,
|
10452 |
+
"learning_rate": 1.5305396417328756e-05,
|
10453 |
+
"loss": 0.4256,
|
10454 |
+
"step": 2982
|
10455 |
+
},
|
10456 |
+
{
|
10457 |
+
"epoch": 1.6550194120909594,
|
10458 |
+
"grad_norm": 0.27238425612449646,
|
10459 |
+
"learning_rate": 1.5210004528955468e-05,
|
10460 |
+
"loss": 0.359,
|
10461 |
+
"step": 2984
|
10462 |
+
},
|
10463 |
+
{
|
10464 |
+
"epoch": 1.656128674431503,
|
10465 |
+
"grad_norm": 0.30173739790916443,
|
10466 |
+
"learning_rate": 1.5114886364638614e-05,
|
10467 |
+
"loss": 0.4343,
|
10468 |
+
"step": 2986
|
10469 |
+
},
|
10470 |
+
{
|
10471 |
+
"epoch": 1.6572379367720464,
|
10472 |
+
"grad_norm": 0.29943424463272095,
|
10473 |
+
"learning_rate": 1.5020042231444197e-05,
|
10474 |
+
"loss": 0.344,
|
10475 |
+
"step": 2988
|
10476 |
+
},
|
10477 |
+
{
|
10478 |
+
"epoch": 1.65834719911259,
|
10479 |
+
"grad_norm": 0.34404435753822327,
|
10480 |
+
"learning_rate": 1.4925472435553701e-05,
|
10481 |
+
"loss": 0.3992,
|
10482 |
+
"step": 2990
|
10483 |
+
},
|
10484 |
+
{
|
10485 |
+
"epoch": 1.6594564614531335,
|
10486 |
+
"grad_norm": 0.3563268184661865,
|
10487 |
+
"learning_rate": 1.4831177282262842e-05,
|
10488 |
+
"loss": 0.4014,
|
10489 |
+
"step": 2992
|
10490 |
+
},
|
10491 |
+
{
|
10492 |
+
"epoch": 1.660565723793677,
|
10493 |
+
"grad_norm": 0.2387107014656067,
|
10494 |
+
"learning_rate": 1.4737157075980845e-05,
|
10495 |
+
"loss": 0.3141,
|
10496 |
+
"step": 2994
|
10497 |
+
},
|
10498 |
+
{
|
10499 |
+
"epoch": 1.6616749861342206,
|
10500 |
+
"grad_norm": 0.3576110899448395,
|
10501 |
+
"learning_rate": 1.4643412120229262e-05,
|
10502 |
+
"loss": 0.3765,
|
10503 |
+
"step": 2996
|
10504 |
+
},
|
10505 |
+
{
|
10506 |
+
"epoch": 1.6627842484747641,
|
10507 |
+
"grad_norm": 0.335438072681427,
|
10508 |
+
"learning_rate": 1.4549942717641052e-05,
|
10509 |
+
"loss": 0.3619,
|
10510 |
+
"step": 2998
|
10511 |
+
},
|
10512 |
+
{
|
10513 |
+
"epoch": 1.6638935108153077,
|
10514 |
+
"grad_norm": 0.2912905216217041,
|
10515 |
+
"learning_rate": 1.4456749169959648e-05,
|
10516 |
+
"loss": 0.3389,
|
10517 |
+
"step": 3000
|
10518 |
+
},
|
10519 |
+
{
|
10520 |
+
"epoch": 1.6650027731558512,
|
10521 |
+
"grad_norm": 0.2544459104537964,
|
10522 |
+
"learning_rate": 1.4363831778037961e-05,
|
10523 |
+
"loss": 0.2778,
|
10524 |
+
"step": 3002
|
10525 |
+
},
|
10526 |
+
{
|
10527 |
+
"epoch": 1.6661120354963947,
|
10528 |
+
"grad_norm": 0.36533600091934204,
|
10529 |
+
"learning_rate": 1.42711908418374e-05,
|
10530 |
+
"loss": 0.3798,
|
10531 |
+
"step": 3004
|
10532 |
+
},
|
10533 |
+
{
|
10534 |
+
"epoch": 1.6672212978369383,
|
10535 |
+
"grad_norm": 0.2710284888744354,
|
10536 |
+
"learning_rate": 1.4178826660426891e-05,
|
10537 |
+
"loss": 0.305,
|
10538 |
+
"step": 3006
|
10539 |
+
},
|
10540 |
+
{
|
10541 |
+
"epoch": 1.6683305601774818,
|
10542 |
+
"grad_norm": 0.21859732270240784,
|
10543 |
+
"learning_rate": 1.4086739531981885e-05,
|
10544 |
+
"loss": 0.4432,
|
10545 |
+
"step": 3008
|
10546 |
+
},
|
10547 |
+
{
|
10548 |
+
"epoch": 1.6694398225180254,
|
10549 |
+
"grad_norm": 0.22141209244728088,
|
10550 |
+
"learning_rate": 1.3994929753783515e-05,
|
10551 |
+
"loss": 0.3012,
|
10552 |
+
"step": 3010
|
10553 |
+
},
|
10554 |
+
{
|
10555 |
+
"epoch": 1.670549084858569,
|
10556 |
+
"grad_norm": 0.3187447786331177,
|
10557 |
+
"learning_rate": 1.3903397622217506e-05,
|
10558 |
+
"loss": 0.3794,
|
10559 |
+
"step": 3012
|
10560 |
+
},
|
10561 |
+
{
|
10562 |
+
"epoch": 1.6716583471991124,
|
10563 |
+
"grad_norm": 0.37787067890167236,
|
10564 |
+
"learning_rate": 1.381214343277324e-05,
|
10565 |
+
"loss": 0.3672,
|
10566 |
+
"step": 3014
|
10567 |
+
},
|
10568 |
+
{
|
10569 |
+
"epoch": 1.672767609539656,
|
10570 |
+
"grad_norm": 0.3910747766494751,
|
10571 |
+
"learning_rate": 1.3721167480042885e-05,
|
10572 |
+
"loss": 0.4577,
|
10573 |
+
"step": 3016
|
10574 |
+
},
|
10575 |
+
{
|
10576 |
+
"epoch": 1.6738768718801995,
|
10577 |
+
"grad_norm": 0.3260791003704071,
|
10578 |
+
"learning_rate": 1.3630470057720402e-05,
|
10579 |
+
"loss": 0.4624,
|
10580 |
+
"step": 3018
|
10581 |
+
},
|
10582 |
+
{
|
10583 |
+
"epoch": 1.674986134220743,
|
10584 |
+
"grad_norm": 0.3197901248931885,
|
10585 |
+
"learning_rate": 1.3540051458600523e-05,
|
10586 |
+
"loss": 0.3861,
|
10587 |
+
"step": 3020
|
10588 |
+
},
|
10589 |
+
{
|
10590 |
+
"epoch": 1.6760953965612866,
|
10591 |
+
"grad_norm": 0.3465018570423126,
|
10592 |
+
"learning_rate": 1.3449911974577877e-05,
|
10593 |
+
"loss": 0.5036,
|
10594 |
+
"step": 3022
|
10595 |
+
},
|
10596 |
+
{
|
10597 |
+
"epoch": 1.6772046589018301,
|
10598 |
+
"grad_norm": 0.32030799984931946,
|
10599 |
+
"learning_rate": 1.3360051896646086e-05,
|
10600 |
+
"loss": 0.3244,
|
10601 |
+
"step": 3024
|
10602 |
+
},
|
10603 |
+
{
|
10604 |
+
"epoch": 1.6783139212423737,
|
10605 |
+
"grad_norm": 0.2779349088668823,
|
10606 |
+
"learning_rate": 1.3270471514896743e-05,
|
10607 |
+
"loss": 0.3362,
|
10608 |
+
"step": 3026
|
10609 |
+
},
|
10610 |
+
{
|
10611 |
+
"epoch": 1.6794231835829172,
|
10612 |
+
"grad_norm": 0.312095046043396,
|
10613 |
+
"learning_rate": 1.3181171118518465e-05,
|
10614 |
+
"loss": 0.5161,
|
10615 |
+
"step": 3028
|
10616 |
+
},
|
10617 |
+
{
|
10618 |
+
"epoch": 1.6805324459234607,
|
10619 |
+
"grad_norm": 0.2570739984512329,
|
10620 |
+
"learning_rate": 1.3092150995796115e-05,
|
10621 |
+
"loss": 0.3741,
|
10622 |
+
"step": 3030
|
10623 |
+
},
|
10624 |
+
{
|
10625 |
+
"epoch": 1.6816417082640043,
|
10626 |
+
"grad_norm": 0.27533021569252014,
|
10627 |
+
"learning_rate": 1.3003411434109647e-05,
|
10628 |
+
"loss": 0.3173,
|
10629 |
+
"step": 3032
|
10630 |
+
},
|
10631 |
+
{
|
10632 |
+
"epoch": 1.6827509706045478,
|
10633 |
+
"grad_norm": 0.2738421559333801,
|
10634 |
+
"learning_rate": 1.2914952719933371e-05,
|
10635 |
+
"loss": 0.4167,
|
10636 |
+
"step": 3034
|
10637 |
+
},
|
10638 |
+
{
|
10639 |
+
"epoch": 1.6838602329450914,
|
10640 |
+
"grad_norm": 0.21936124563217163,
|
10641 |
+
"learning_rate": 1.282677513883489e-05,
|
10642 |
+
"loss": 0.3356,
|
10643 |
+
"step": 3036
|
10644 |
+
},
|
10645 |
+
{
|
10646 |
+
"epoch": 1.6849694952856349,
|
10647 |
+
"grad_norm": 0.2971390187740326,
|
10648 |
+
"learning_rate": 1.2738878975474288e-05,
|
10649 |
+
"loss": 0.3919,
|
10650 |
+
"step": 3038
|
10651 |
+
},
|
10652 |
+
{
|
10653 |
+
"epoch": 1.6860787576261784,
|
10654 |
+
"grad_norm": 0.3661748766899109,
|
10655 |
+
"learning_rate": 1.2651264513603134e-05,
|
10656 |
+
"loss": 0.3864,
|
10657 |
+
"step": 3040
|
10658 |
+
},
|
10659 |
+
{
|
10660 |
+
"epoch": 1.687188019966722,
|
10661 |
+
"grad_norm": 0.3551200330257416,
|
10662 |
+
"learning_rate": 1.2563932036063586e-05,
|
10663 |
+
"loss": 0.3555,
|
10664 |
+
"step": 3042
|
10665 |
+
},
|
10666 |
+
{
|
10667 |
+
"epoch": 1.6882972823072657,
|
10668 |
+
"grad_norm": 0.27041590213775635,
|
10669 |
+
"learning_rate": 1.2476881824787467e-05,
|
10670 |
+
"loss": 0.295,
|
10671 |
+
"step": 3044
|
10672 |
+
},
|
10673 |
+
{
|
10674 |
+
"epoch": 1.6894065446478093,
|
10675 |
+
"grad_norm": 0.2313155084848404,
|
10676 |
+
"learning_rate": 1.2390114160795419e-05,
|
10677 |
+
"loss": 0.3177,
|
10678 |
+
"step": 3046
|
10679 |
+
},
|
10680 |
+
{
|
10681 |
+
"epoch": 1.6905158069883528,
|
10682 |
+
"grad_norm": 0.3004077970981598,
|
10683 |
+
"learning_rate": 1.2303629324195943e-05,
|
10684 |
+
"loss": 0.3845,
|
10685 |
+
"step": 3048
|
10686 |
+
},
|
10687 |
+
{
|
10688 |
+
"epoch": 1.6916250693288963,
|
10689 |
+
"grad_norm": 0.29577240347862244,
|
10690 |
+
"learning_rate": 1.2217427594184461e-05,
|
10691 |
+
"loss": 0.3376,
|
10692 |
+
"step": 3050
|
10693 |
+
},
|
10694 |
+
{
|
10695 |
+
"epoch": 1.6927343316694399,
|
10696 |
+
"grad_norm": 0.363438218832016,
|
10697 |
+
"learning_rate": 1.213150924904245e-05,
|
10698 |
+
"loss": 0.465,
|
10699 |
+
"step": 3052
|
10700 |
+
},
|
10701 |
+
{
|
10702 |
+
"epoch": 1.6938435940099834,
|
10703 |
+
"grad_norm": 0.2636345624923706,
|
10704 |
+
"learning_rate": 1.2045874566136617e-05,
|
10705 |
+
"loss": 0.2845,
|
10706 |
+
"step": 3054
|
10707 |
+
},
|
10708 |
+
{
|
10709 |
+
"epoch": 1.694952856350527,
|
10710 |
+
"grad_norm": 0.3315665125846863,
|
10711 |
+
"learning_rate": 1.1960523821917868e-05,
|
10712 |
+
"loss": 0.4179,
|
10713 |
+
"step": 3056
|
10714 |
+
},
|
10715 |
+
{
|
10716 |
+
"epoch": 1.6960621186910705,
|
10717 |
+
"grad_norm": 0.27641746401786804,
|
10718 |
+
"learning_rate": 1.1875457291920477e-05,
|
10719 |
+
"loss": 0.3542,
|
10720 |
+
"step": 3058
|
10721 |
+
},
|
10722 |
+
{
|
10723 |
+
"epoch": 1.697171381031614,
|
10724 |
+
"grad_norm": 0.39690592885017395,
|
10725 |
+
"learning_rate": 1.1790675250761263e-05,
|
10726 |
+
"loss": 0.4511,
|
10727 |
+
"step": 3060
|
10728 |
+
},
|
10729 |
+
{
|
10730 |
+
"epoch": 1.6982806433721576,
|
10731 |
+
"grad_norm": 0.1926700472831726,
|
10732 |
+
"learning_rate": 1.1706177972138599e-05,
|
10733 |
+
"loss": 0.2946,
|
10734 |
+
"step": 3062
|
10735 |
+
},
|
10736 |
+
{
|
10737 |
+
"epoch": 1.699389905712701,
|
10738 |
+
"grad_norm": 0.27746477723121643,
|
10739 |
+
"learning_rate": 1.1621965728831564e-05,
|
10740 |
+
"loss": 0.3691,
|
10741 |
+
"step": 3064
|
10742 |
+
},
|
10743 |
+
{
|
10744 |
+
"epoch": 1.7004991680532446,
|
10745 |
+
"grad_norm": 0.2863025367259979,
|
10746 |
+
"learning_rate": 1.1538038792699068e-05,
|
10747 |
+
"loss": 0.3466,
|
10748 |
+
"step": 3066
|
10749 |
+
},
|
10750 |
+
{
|
10751 |
+
"epoch": 1.7016084303937882,
|
10752 |
+
"grad_norm": 0.31509512662887573,
|
10753 |
+
"learning_rate": 1.1454397434679021e-05,
|
10754 |
+
"loss": 0.3379,
|
10755 |
+
"step": 3068
|
10756 |
+
},
|
10757 |
+
{
|
10758 |
+
"epoch": 1.7027176927343317,
|
10759 |
+
"grad_norm": 0.3157186806201935,
|
10760 |
+
"learning_rate": 1.1371041924787362e-05,
|
10761 |
+
"loss": 0.3854,
|
10762 |
+
"step": 3070
|
10763 |
+
},
|
10764 |
+
{
|
10765 |
+
"epoch": 1.7038269550748752,
|
10766 |
+
"grad_norm": 0.32956090569496155,
|
10767 |
+
"learning_rate": 1.128797253211723e-05,
|
10768 |
+
"loss": 0.3036,
|
10769 |
+
"step": 3072
|
10770 |
}
|
10771 |
],
|
10772 |
"logging_steps": 2,
|
|
|
10786 |
"attributes": {}
|
10787 |
}
|
10788 |
},
|
10789 |
+
"total_flos": 7482417840783360.0,
|
10790 |
"train_batch_size": 8,
|
10791 |
"trial_name": null,
|
10792 |
"trial_params": null
|