Training in progress, step 3500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1673342072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a8ad065d2bf6fe719db00bfc86d9aca4be6f0bca9b3557a8ed7a64539963b55
|
3 |
size 1673342072
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 194745274
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f0a8c27caac8d4d654fe68e5b9789c63eb21a703a3bf567823bc1d52feb39dc
|
3 |
size 194745274
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9b1df5fb25513ffea2c862b1a01785af03a097fa89b0a8f449bf99d0346d6e0
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4288febd9081786b52bdf01d6f570b7ebf36526320fd40daa36c3cb1500b408c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 900,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -21031,6 +21031,3506 @@
|
|
21031 |
"learning_rate": 6.6815144766147e-05,
|
21032 |
"loss": 0.8101,
|
21033 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21034 |
}
|
21035 |
],
|
21036 |
"logging_steps": 1,
|
@@ -21050,7 +24550,7 @@
|
|
21050 |
"attributes": {}
|
21051 |
}
|
21052 |
},
|
21053 |
-
"total_flos": 3.
|
21054 |
"train_batch_size": 1,
|
21055 |
"trial_name": null,
|
21056 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7777777777777778,
|
5 |
"eval_steps": 900,
|
6 |
+
"global_step": 3500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
21031 |
"learning_rate": 6.6815144766147e-05,
|
21032 |
"loss": 0.8101,
|
21033 |
"step": 3000
|
21034 |
+
},
|
21035 |
+
{
|
21036 |
+
"epoch": 0.6668888888888889,
|
21037 |
+
"grad_norm": 0.8144944906234741,
|
21038 |
+
"learning_rate": 6.67706013363029e-05,
|
21039 |
+
"loss": 1.9887,
|
21040 |
+
"step": 3001
|
21041 |
+
},
|
21042 |
+
{
|
21043 |
+
"epoch": 0.6671111111111111,
|
21044 |
+
"grad_norm": 0.8721863627433777,
|
21045 |
+
"learning_rate": 6.67260579064588e-05,
|
21046 |
+
"loss": 2.3995,
|
21047 |
+
"step": 3002
|
21048 |
+
},
|
21049 |
+
{
|
21050 |
+
"epoch": 0.6673333333333333,
|
21051 |
+
"grad_norm": 0.801108717918396,
|
21052 |
+
"learning_rate": 6.66815144766147e-05,
|
21053 |
+
"loss": 2.1919,
|
21054 |
+
"step": 3003
|
21055 |
+
},
|
21056 |
+
{
|
21057 |
+
"epoch": 0.6675555555555556,
|
21058 |
+
"grad_norm": 0.6709057688713074,
|
21059 |
+
"learning_rate": 6.66369710467706e-05,
|
21060 |
+
"loss": 1.0861,
|
21061 |
+
"step": 3004
|
21062 |
+
},
|
21063 |
+
{
|
21064 |
+
"epoch": 0.6677777777777778,
|
21065 |
+
"grad_norm": 0.05186731740832329,
|
21066 |
+
"learning_rate": 6.659242761692652e-05,
|
21067 |
+
"loss": 0.0109,
|
21068 |
+
"step": 3005
|
21069 |
+
},
|
21070 |
+
{
|
21071 |
+
"epoch": 0.668,
|
21072 |
+
"grad_norm": 0.6289195418357849,
|
21073 |
+
"learning_rate": 6.654788418708241e-05,
|
21074 |
+
"loss": 1.1656,
|
21075 |
+
"step": 3006
|
21076 |
+
},
|
21077 |
+
{
|
21078 |
+
"epoch": 0.6682222222222223,
|
21079 |
+
"grad_norm": 0.5143423080444336,
|
21080 |
+
"learning_rate": 6.650334075723831e-05,
|
21081 |
+
"loss": 1.1429,
|
21082 |
+
"step": 3007
|
21083 |
+
},
|
21084 |
+
{
|
21085 |
+
"epoch": 0.6684444444444444,
|
21086 |
+
"grad_norm": 0.7924249768257141,
|
21087 |
+
"learning_rate": 6.645879732739422e-05,
|
21088 |
+
"loss": 1.8575,
|
21089 |
+
"step": 3008
|
21090 |
+
},
|
21091 |
+
{
|
21092 |
+
"epoch": 0.6686666666666666,
|
21093 |
+
"grad_norm": 0.09778264164924622,
|
21094 |
+
"learning_rate": 6.641425389755011e-05,
|
21095 |
+
"loss": 0.0128,
|
21096 |
+
"step": 3009
|
21097 |
+
},
|
21098 |
+
{
|
21099 |
+
"epoch": 0.6688888888888889,
|
21100 |
+
"grad_norm": 0.10178276896476746,
|
21101 |
+
"learning_rate": 6.636971046770602e-05,
|
21102 |
+
"loss": 0.013,
|
21103 |
+
"step": 3010
|
21104 |
+
},
|
21105 |
+
{
|
21106 |
+
"epoch": 0.6691111111111111,
|
21107 |
+
"grad_norm": 0.09697845578193665,
|
21108 |
+
"learning_rate": 6.632516703786192e-05,
|
21109 |
+
"loss": 0.0126,
|
21110 |
+
"step": 3011
|
21111 |
+
},
|
21112 |
+
{
|
21113 |
+
"epoch": 0.6693333333333333,
|
21114 |
+
"grad_norm": 1.0395288467407227,
|
21115 |
+
"learning_rate": 6.628062360801783e-05,
|
21116 |
+
"loss": 2.0803,
|
21117 |
+
"step": 3012
|
21118 |
+
},
|
21119 |
+
{
|
21120 |
+
"epoch": 0.6695555555555556,
|
21121 |
+
"grad_norm": 0.8418979048728943,
|
21122 |
+
"learning_rate": 6.623608017817372e-05,
|
21123 |
+
"loss": 2.1971,
|
21124 |
+
"step": 3013
|
21125 |
+
},
|
21126 |
+
{
|
21127 |
+
"epoch": 0.6697777777777778,
|
21128 |
+
"grad_norm": 0.9855999946594238,
|
21129 |
+
"learning_rate": 6.619153674832962e-05,
|
21130 |
+
"loss": 1.9663,
|
21131 |
+
"step": 3014
|
21132 |
+
},
|
21133 |
+
{
|
21134 |
+
"epoch": 0.67,
|
21135 |
+
"grad_norm": 0.9182018637657166,
|
21136 |
+
"learning_rate": 6.614699331848553e-05,
|
21137 |
+
"loss": 2.1056,
|
21138 |
+
"step": 3015
|
21139 |
+
},
|
21140 |
+
{
|
21141 |
+
"epoch": 0.6702222222222223,
|
21142 |
+
"grad_norm": 0.9927064180374146,
|
21143 |
+
"learning_rate": 6.610244988864142e-05,
|
21144 |
+
"loss": 1.8567,
|
21145 |
+
"step": 3016
|
21146 |
+
},
|
21147 |
+
{
|
21148 |
+
"epoch": 0.6704444444444444,
|
21149 |
+
"grad_norm": 0.8513967394828796,
|
21150 |
+
"learning_rate": 6.605790645879733e-05,
|
21151 |
+
"loss": 1.8663,
|
21152 |
+
"step": 3017
|
21153 |
+
},
|
21154 |
+
{
|
21155 |
+
"epoch": 0.6706666666666666,
|
21156 |
+
"grad_norm": 0.6829978227615356,
|
21157 |
+
"learning_rate": 6.601336302895323e-05,
|
21158 |
+
"loss": 0.9329,
|
21159 |
+
"step": 3018
|
21160 |
+
},
|
21161 |
+
{
|
21162 |
+
"epoch": 0.6708888888888889,
|
21163 |
+
"grad_norm": 1.0144321918487549,
|
21164 |
+
"learning_rate": 6.596881959910914e-05,
|
21165 |
+
"loss": 2.3309,
|
21166 |
+
"step": 3019
|
21167 |
+
},
|
21168 |
+
{
|
21169 |
+
"epoch": 0.6711111111111111,
|
21170 |
+
"grad_norm": 0.9434064030647278,
|
21171 |
+
"learning_rate": 6.592427616926503e-05,
|
21172 |
+
"loss": 1.841,
|
21173 |
+
"step": 3020
|
21174 |
+
},
|
21175 |
+
{
|
21176 |
+
"epoch": 0.6713333333333333,
|
21177 |
+
"grad_norm": 0.9861494302749634,
|
21178 |
+
"learning_rate": 6.587973273942093e-05,
|
21179 |
+
"loss": 2.0507,
|
21180 |
+
"step": 3021
|
21181 |
+
},
|
21182 |
+
{
|
21183 |
+
"epoch": 0.6715555555555556,
|
21184 |
+
"grad_norm": 1.0820823907852173,
|
21185 |
+
"learning_rate": 6.583518930957684e-05,
|
21186 |
+
"loss": 1.5992,
|
21187 |
+
"step": 3022
|
21188 |
+
},
|
21189 |
+
{
|
21190 |
+
"epoch": 0.6717777777777778,
|
21191 |
+
"grad_norm": 0.6171742677688599,
|
21192 |
+
"learning_rate": 6.579064587973273e-05,
|
21193 |
+
"loss": 0.876,
|
21194 |
+
"step": 3023
|
21195 |
+
},
|
21196 |
+
{
|
21197 |
+
"epoch": 0.672,
|
21198 |
+
"grad_norm": 0.07197222858667374,
|
21199 |
+
"learning_rate": 6.574610244988865e-05,
|
21200 |
+
"loss": 0.0164,
|
21201 |
+
"step": 3024
|
21202 |
+
},
|
21203 |
+
{
|
21204 |
+
"epoch": 0.6722222222222223,
|
21205 |
+
"grad_norm": 0.7509397268295288,
|
21206 |
+
"learning_rate": 6.570155902004454e-05,
|
21207 |
+
"loss": 0.9635,
|
21208 |
+
"step": 3025
|
21209 |
+
},
|
21210 |
+
{
|
21211 |
+
"epoch": 0.6724444444444444,
|
21212 |
+
"grad_norm": 0.9577994346618652,
|
21213 |
+
"learning_rate": 6.565701559020045e-05,
|
21214 |
+
"loss": 1.7846,
|
21215 |
+
"step": 3026
|
21216 |
+
},
|
21217 |
+
{
|
21218 |
+
"epoch": 0.6726666666666666,
|
21219 |
+
"grad_norm": 0.9909307360649109,
|
21220 |
+
"learning_rate": 6.561247216035635e-05,
|
21221 |
+
"loss": 1.7811,
|
21222 |
+
"step": 3027
|
21223 |
+
},
|
21224 |
+
{
|
21225 |
+
"epoch": 0.6728888888888889,
|
21226 |
+
"grad_norm": 1.076392412185669,
|
21227 |
+
"learning_rate": 6.556792873051225e-05,
|
21228 |
+
"loss": 1.6848,
|
21229 |
+
"step": 3028
|
21230 |
+
},
|
21231 |
+
{
|
21232 |
+
"epoch": 0.6731111111111111,
|
21233 |
+
"grad_norm": 0.9113189578056335,
|
21234 |
+
"learning_rate": 6.552338530066815e-05,
|
21235 |
+
"loss": 1.7005,
|
21236 |
+
"step": 3029
|
21237 |
+
},
|
21238 |
+
{
|
21239 |
+
"epoch": 0.6733333333333333,
|
21240 |
+
"grad_norm": 1.2456274032592773,
|
21241 |
+
"learning_rate": 6.547884187082406e-05,
|
21242 |
+
"loss": 1.7526,
|
21243 |
+
"step": 3030
|
21244 |
+
},
|
21245 |
+
{
|
21246 |
+
"epoch": 0.6735555555555556,
|
21247 |
+
"grad_norm": 1.0734461545944214,
|
21248 |
+
"learning_rate": 6.543429844097996e-05,
|
21249 |
+
"loss": 1.9902,
|
21250 |
+
"step": 3031
|
21251 |
+
},
|
21252 |
+
{
|
21253 |
+
"epoch": 0.6737777777777778,
|
21254 |
+
"grad_norm": 0.7110247015953064,
|
21255 |
+
"learning_rate": 6.538975501113585e-05,
|
21256 |
+
"loss": 0.8453,
|
21257 |
+
"step": 3032
|
21258 |
+
},
|
21259 |
+
{
|
21260 |
+
"epoch": 0.674,
|
21261 |
+
"grad_norm": 0.059201423078775406,
|
21262 |
+
"learning_rate": 6.534521158129177e-05,
|
21263 |
+
"loss": 0.0163,
|
21264 |
+
"step": 3033
|
21265 |
+
},
|
21266 |
+
{
|
21267 |
+
"epoch": 0.6742222222222222,
|
21268 |
+
"grad_norm": 0.7983320355415344,
|
21269 |
+
"learning_rate": 6.530066815144766e-05,
|
21270 |
+
"loss": 0.9296,
|
21271 |
+
"step": 3034
|
21272 |
+
},
|
21273 |
+
{
|
21274 |
+
"epoch": 0.6744444444444444,
|
21275 |
+
"grad_norm": 0.07402991503477097,
|
21276 |
+
"learning_rate": 6.525612472160356e-05,
|
21277 |
+
"loss": 0.0193,
|
21278 |
+
"step": 3035
|
21279 |
+
},
|
21280 |
+
{
|
21281 |
+
"epoch": 0.6746666666666666,
|
21282 |
+
"grad_norm": 0.07244686037302017,
|
21283 |
+
"learning_rate": 6.521158129175947e-05,
|
21284 |
+
"loss": 0.0193,
|
21285 |
+
"step": 3036
|
21286 |
+
},
|
21287 |
+
{
|
21288 |
+
"epoch": 0.6748888888888889,
|
21289 |
+
"grad_norm": 1.0383340120315552,
|
21290 |
+
"learning_rate": 6.516703786191537e-05,
|
21291 |
+
"loss": 1.4567,
|
21292 |
+
"step": 3037
|
21293 |
+
},
|
21294 |
+
{
|
21295 |
+
"epoch": 0.6751111111111111,
|
21296 |
+
"grad_norm": 1.0180835723876953,
|
21297 |
+
"learning_rate": 6.512249443207127e-05,
|
21298 |
+
"loss": 1.8275,
|
21299 |
+
"step": 3038
|
21300 |
+
},
|
21301 |
+
{
|
21302 |
+
"epoch": 0.6753333333333333,
|
21303 |
+
"grad_norm": 1.225290298461914,
|
21304 |
+
"learning_rate": 6.507795100222718e-05,
|
21305 |
+
"loss": 1.7056,
|
21306 |
+
"step": 3039
|
21307 |
+
},
|
21308 |
+
{
|
21309 |
+
"epoch": 0.6755555555555556,
|
21310 |
+
"grad_norm": 0.8802182674407959,
|
21311 |
+
"learning_rate": 6.503340757238308e-05,
|
21312 |
+
"loss": 1.0935,
|
21313 |
+
"step": 3040
|
21314 |
+
},
|
21315 |
+
{
|
21316 |
+
"epoch": 0.6757777777777778,
|
21317 |
+
"grad_norm": 1.0758693218231201,
|
21318 |
+
"learning_rate": 6.498886414253897e-05,
|
21319 |
+
"loss": 1.7778,
|
21320 |
+
"step": 3041
|
21321 |
+
},
|
21322 |
+
{
|
21323 |
+
"epoch": 0.676,
|
21324 |
+
"grad_norm": 1.1325352191925049,
|
21325 |
+
"learning_rate": 6.494432071269488e-05,
|
21326 |
+
"loss": 1.5706,
|
21327 |
+
"step": 3042
|
21328 |
+
},
|
21329 |
+
{
|
21330 |
+
"epoch": 0.6762222222222222,
|
21331 |
+
"grad_norm": 1.0380780696868896,
|
21332 |
+
"learning_rate": 6.489977728285078e-05,
|
21333 |
+
"loss": 1.535,
|
21334 |
+
"step": 3043
|
21335 |
+
},
|
21336 |
+
{
|
21337 |
+
"epoch": 0.6764444444444444,
|
21338 |
+
"grad_norm": 0.9906545281410217,
|
21339 |
+
"learning_rate": 6.485523385300668e-05,
|
21340 |
+
"loss": 1.4007,
|
21341 |
+
"step": 3044
|
21342 |
+
},
|
21343 |
+
{
|
21344 |
+
"epoch": 0.6766666666666666,
|
21345 |
+
"grad_norm": 0.17783640325069427,
|
21346 |
+
"learning_rate": 6.48106904231626e-05,
|
21347 |
+
"loss": 0.0312,
|
21348 |
+
"step": 3045
|
21349 |
+
},
|
21350 |
+
{
|
21351 |
+
"epoch": 0.6768888888888889,
|
21352 |
+
"grad_norm": 0.9812122583389282,
|
21353 |
+
"learning_rate": 6.476614699331849e-05,
|
21354 |
+
"loss": 1.2594,
|
21355 |
+
"step": 3046
|
21356 |
+
},
|
21357 |
+
{
|
21358 |
+
"epoch": 0.6771111111111111,
|
21359 |
+
"grad_norm": 1.060013771057129,
|
21360 |
+
"learning_rate": 6.472160356347439e-05,
|
21361 |
+
"loss": 1.1064,
|
21362 |
+
"step": 3047
|
21363 |
+
},
|
21364 |
+
{
|
21365 |
+
"epoch": 0.6773333333333333,
|
21366 |
+
"grad_norm": 0.6272473335266113,
|
21367 |
+
"learning_rate": 6.46770601336303e-05,
|
21368 |
+
"loss": 0.4809,
|
21369 |
+
"step": 3048
|
21370 |
+
},
|
21371 |
+
{
|
21372 |
+
"epoch": 0.6775555555555556,
|
21373 |
+
"grad_norm": 0.9412599802017212,
|
21374 |
+
"learning_rate": 6.463251670378619e-05,
|
21375 |
+
"loss": 0.9192,
|
21376 |
+
"step": 3049
|
21377 |
+
},
|
21378 |
+
{
|
21379 |
+
"epoch": 0.6777777777777778,
|
21380 |
+
"grad_norm": 1.0236815214157104,
|
21381 |
+
"learning_rate": 6.45879732739421e-05,
|
21382 |
+
"loss": 0.8751,
|
21383 |
+
"step": 3050
|
21384 |
+
},
|
21385 |
+
{
|
21386 |
+
"epoch": 0.678,
|
21387 |
+
"grad_norm": 0.05509922653436661,
|
21388 |
+
"learning_rate": 6.4543429844098e-05,
|
21389 |
+
"loss": 0.0112,
|
21390 |
+
"step": 3051
|
21391 |
+
},
|
21392 |
+
{
|
21393 |
+
"epoch": 0.6782222222222222,
|
21394 |
+
"grad_norm": 0.8858595490455627,
|
21395 |
+
"learning_rate": 6.44988864142539e-05,
|
21396 |
+
"loss": 2.2657,
|
21397 |
+
"step": 3052
|
21398 |
+
},
|
21399 |
+
{
|
21400 |
+
"epoch": 0.6784444444444444,
|
21401 |
+
"grad_norm": 0.5961353182792664,
|
21402 |
+
"learning_rate": 6.44543429844098e-05,
|
21403 |
+
"loss": 1.2772,
|
21404 |
+
"step": 3053
|
21405 |
+
},
|
21406 |
+
{
|
21407 |
+
"epoch": 0.6786666666666666,
|
21408 |
+
"grad_norm": 0.05628953129053116,
|
21409 |
+
"learning_rate": 6.44097995545657e-05,
|
21410 |
+
"loss": 0.011,
|
21411 |
+
"step": 3054
|
21412 |
+
},
|
21413 |
+
{
|
21414 |
+
"epoch": 0.6788888888888889,
|
21415 |
+
"grad_norm": 0.8051088452339172,
|
21416 |
+
"learning_rate": 6.436525612472161e-05,
|
21417 |
+
"loss": 2.2465,
|
21418 |
+
"step": 3055
|
21419 |
+
},
|
21420 |
+
{
|
21421 |
+
"epoch": 0.6791111111111111,
|
21422 |
+
"grad_norm": 0.9172492027282715,
|
21423 |
+
"learning_rate": 6.43207126948775e-05,
|
21424 |
+
"loss": 2.1671,
|
21425 |
+
"step": 3056
|
21426 |
+
},
|
21427 |
+
{
|
21428 |
+
"epoch": 0.6793333333333333,
|
21429 |
+
"grad_norm": 0.622685968875885,
|
21430 |
+
"learning_rate": 6.427616926503342e-05,
|
21431 |
+
"loss": 1.1392,
|
21432 |
+
"step": 3057
|
21433 |
+
},
|
21434 |
+
{
|
21435 |
+
"epoch": 0.6795555555555556,
|
21436 |
+
"grad_norm": 1.047365665435791,
|
21437 |
+
"learning_rate": 6.423162583518931e-05,
|
21438 |
+
"loss": 2.1024,
|
21439 |
+
"step": 3058
|
21440 |
+
},
|
21441 |
+
{
|
21442 |
+
"epoch": 0.6797777777777778,
|
21443 |
+
"grad_norm": 0.6596314311027527,
|
21444 |
+
"learning_rate": 6.418708240534522e-05,
|
21445 |
+
"loss": 1.2222,
|
21446 |
+
"step": 3059
|
21447 |
+
},
|
21448 |
+
{
|
21449 |
+
"epoch": 0.68,
|
21450 |
+
"grad_norm": 0.058744728565216064,
|
21451 |
+
"learning_rate": 6.414253897550112e-05,
|
21452 |
+
"loss": 0.0111,
|
21453 |
+
"step": 3060
|
21454 |
+
},
|
21455 |
+
{
|
21456 |
+
"epoch": 0.6802222222222222,
|
21457 |
+
"grad_norm": 0.9326651692390442,
|
21458 |
+
"learning_rate": 6.409799554565701e-05,
|
21459 |
+
"loss": 2.1789,
|
21460 |
+
"step": 3061
|
21461 |
+
},
|
21462 |
+
{
|
21463 |
+
"epoch": 0.6804444444444444,
|
21464 |
+
"grad_norm": 0.9691800475120544,
|
21465 |
+
"learning_rate": 6.405345211581292e-05,
|
21466 |
+
"loss": 2.2824,
|
21467 |
+
"step": 3062
|
21468 |
+
},
|
21469 |
+
{
|
21470 |
+
"epoch": 0.6806666666666666,
|
21471 |
+
"grad_norm": 0.925193190574646,
|
21472 |
+
"learning_rate": 6.400890868596882e-05,
|
21473 |
+
"loss": 2.431,
|
21474 |
+
"step": 3063
|
21475 |
+
},
|
21476 |
+
{
|
21477 |
+
"epoch": 0.6808888888888889,
|
21478 |
+
"grad_norm": 0.9088225364685059,
|
21479 |
+
"learning_rate": 6.396436525612473e-05,
|
21480 |
+
"loss": 2.0492,
|
21481 |
+
"step": 3064
|
21482 |
+
},
|
21483 |
+
{
|
21484 |
+
"epoch": 0.6811111111111111,
|
21485 |
+
"grad_norm": 0.8372054696083069,
|
21486 |
+
"learning_rate": 6.391982182628062e-05,
|
21487 |
+
"loss": 2.2126,
|
21488 |
+
"step": 3065
|
21489 |
+
},
|
21490 |
+
{
|
21491 |
+
"epoch": 0.6813333333333333,
|
21492 |
+
"grad_norm": 0.8477223515510559,
|
21493 |
+
"learning_rate": 6.387527839643653e-05,
|
21494 |
+
"loss": 1.933,
|
21495 |
+
"step": 3066
|
21496 |
+
},
|
21497 |
+
{
|
21498 |
+
"epoch": 0.6815555555555556,
|
21499 |
+
"grad_norm": 0.6637649536132812,
|
21500 |
+
"learning_rate": 6.383073496659243e-05,
|
21501 |
+
"loss": 1.0526,
|
21502 |
+
"step": 3067
|
21503 |
+
},
|
21504 |
+
{
|
21505 |
+
"epoch": 0.6817777777777778,
|
21506 |
+
"grad_norm": 0.9227988719940186,
|
21507 |
+
"learning_rate": 6.378619153674832e-05,
|
21508 |
+
"loss": 1.9949,
|
21509 |
+
"step": 3068
|
21510 |
+
},
|
21511 |
+
{
|
21512 |
+
"epoch": 0.682,
|
21513 |
+
"grad_norm": 0.9380735754966736,
|
21514 |
+
"learning_rate": 6.374164810690424e-05,
|
21515 |
+
"loss": 1.8282,
|
21516 |
+
"step": 3069
|
21517 |
+
},
|
21518 |
+
{
|
21519 |
+
"epoch": 0.6822222222222222,
|
21520 |
+
"grad_norm": 0.992690920829773,
|
21521 |
+
"learning_rate": 6.369710467706013e-05,
|
21522 |
+
"loss": 1.7263,
|
21523 |
+
"step": 3070
|
21524 |
+
},
|
21525 |
+
{
|
21526 |
+
"epoch": 0.6824444444444444,
|
21527 |
+
"grad_norm": 0.9732444286346436,
|
21528 |
+
"learning_rate": 6.365256124721604e-05,
|
21529 |
+
"loss": 2.0818,
|
21530 |
+
"step": 3071
|
21531 |
+
},
|
21532 |
+
{
|
21533 |
+
"epoch": 0.6826666666666666,
|
21534 |
+
"grad_norm": 0.07128032296895981,
|
21535 |
+
"learning_rate": 6.360801781737195e-05,
|
21536 |
+
"loss": 0.0167,
|
21537 |
+
"step": 3072
|
21538 |
+
},
|
21539 |
+
{
|
21540 |
+
"epoch": 0.6828888888888889,
|
21541 |
+
"grad_norm": 0.9442581534385681,
|
21542 |
+
"learning_rate": 6.356347438752784e-05,
|
21543 |
+
"loss": 1.9286,
|
21544 |
+
"step": 3073
|
21545 |
+
},
|
21546 |
+
{
|
21547 |
+
"epoch": 0.6831111111111111,
|
21548 |
+
"grad_norm": 0.13606970012187958,
|
21549 |
+
"learning_rate": 6.351893095768374e-05,
|
21550 |
+
"loss": 0.0205,
|
21551 |
+
"step": 3074
|
21552 |
+
},
|
21553 |
+
{
|
21554 |
+
"epoch": 0.6833333333333333,
|
21555 |
+
"grad_norm": 0.648127555847168,
|
21556 |
+
"learning_rate": 6.347438752783965e-05,
|
21557 |
+
"loss": 0.8501,
|
21558 |
+
"step": 3075
|
21559 |
+
},
|
21560 |
+
{
|
21561 |
+
"epoch": 0.6835555555555556,
|
21562 |
+
"grad_norm": 0.9328134655952454,
|
21563 |
+
"learning_rate": 6.342984409799555e-05,
|
21564 |
+
"loss": 1.6076,
|
21565 |
+
"step": 3076
|
21566 |
+
},
|
21567 |
+
{
|
21568 |
+
"epoch": 0.6837777777777778,
|
21569 |
+
"grad_norm": 1.0561175346374512,
|
21570 |
+
"learning_rate": 6.338530066815144e-05,
|
21571 |
+
"loss": 1.765,
|
21572 |
+
"step": 3077
|
21573 |
+
},
|
21574 |
+
{
|
21575 |
+
"epoch": 0.684,
|
21576 |
+
"grad_norm": 1.1816853284835815,
|
21577 |
+
"learning_rate": 6.334075723830736e-05,
|
21578 |
+
"loss": 2.1397,
|
21579 |
+
"step": 3078
|
21580 |
+
},
|
21581 |
+
{
|
21582 |
+
"epoch": 0.6842222222222222,
|
21583 |
+
"grad_norm": 1.151865839958191,
|
21584 |
+
"learning_rate": 6.329621380846326e-05,
|
21585 |
+
"loss": 1.8881,
|
21586 |
+
"step": 3079
|
21587 |
+
},
|
21588 |
+
{
|
21589 |
+
"epoch": 0.6844444444444444,
|
21590 |
+
"grad_norm": 0.07445438951253891,
|
21591 |
+
"learning_rate": 6.325167037861916e-05,
|
21592 |
+
"loss": 0.0177,
|
21593 |
+
"step": 3080
|
21594 |
+
},
|
21595 |
+
{
|
21596 |
+
"epoch": 0.6846666666666666,
|
21597 |
+
"grad_norm": 0.07042374461889267,
|
21598 |
+
"learning_rate": 6.320712694877507e-05,
|
21599 |
+
"loss": 0.0171,
|
21600 |
+
"step": 3081
|
21601 |
+
},
|
21602 |
+
{
|
21603 |
+
"epoch": 0.6848888888888889,
|
21604 |
+
"grad_norm": 0.6800836324691772,
|
21605 |
+
"learning_rate": 6.316258351893096e-05,
|
21606 |
+
"loss": 0.7686,
|
21607 |
+
"step": 3082
|
21608 |
+
},
|
21609 |
+
{
|
21610 |
+
"epoch": 0.6851111111111111,
|
21611 |
+
"grad_norm": 0.7797111868858337,
|
21612 |
+
"learning_rate": 6.311804008908686e-05,
|
21613 |
+
"loss": 0.9435,
|
21614 |
+
"step": 3083
|
21615 |
+
},
|
21616 |
+
{
|
21617 |
+
"epoch": 0.6853333333333333,
|
21618 |
+
"grad_norm": 0.07849026471376419,
|
21619 |
+
"learning_rate": 6.307349665924277e-05,
|
21620 |
+
"loss": 0.0194,
|
21621 |
+
"step": 3084
|
21622 |
+
},
|
21623 |
+
{
|
21624 |
+
"epoch": 0.6855555555555556,
|
21625 |
+
"grad_norm": 0.07662785053253174,
|
21626 |
+
"learning_rate": 6.302895322939867e-05,
|
21627 |
+
"loss": 0.0191,
|
21628 |
+
"step": 3085
|
21629 |
+
},
|
21630 |
+
{
|
21631 |
+
"epoch": 0.6857777777777778,
|
21632 |
+
"grad_norm": 0.0744476169347763,
|
21633 |
+
"learning_rate": 6.298440979955457e-05,
|
21634 |
+
"loss": 0.0183,
|
21635 |
+
"step": 3086
|
21636 |
+
},
|
21637 |
+
{
|
21638 |
+
"epoch": 0.686,
|
21639 |
+
"grad_norm": 0.0717550590634346,
|
21640 |
+
"learning_rate": 6.293986636971047e-05,
|
21641 |
+
"loss": 0.0184,
|
21642 |
+
"step": 3087
|
21643 |
+
},
|
21644 |
+
{
|
21645 |
+
"epoch": 0.6862222222222222,
|
21646 |
+
"grad_norm": 1.115823745727539,
|
21647 |
+
"learning_rate": 6.289532293986638e-05,
|
21648 |
+
"loss": 1.871,
|
21649 |
+
"step": 3088
|
21650 |
+
},
|
21651 |
+
{
|
21652 |
+
"epoch": 0.6864444444444444,
|
21653 |
+
"grad_norm": 0.9394058585166931,
|
21654 |
+
"learning_rate": 6.285077951002227e-05,
|
21655 |
+
"loss": 1.2818,
|
21656 |
+
"step": 3089
|
21657 |
+
},
|
21658 |
+
{
|
21659 |
+
"epoch": 0.6866666666666666,
|
21660 |
+
"grad_norm": 0.7753637433052063,
|
21661 |
+
"learning_rate": 6.280623608017817e-05,
|
21662 |
+
"loss": 0.9434,
|
21663 |
+
"step": 3090
|
21664 |
+
},
|
21665 |
+
{
|
21666 |
+
"epoch": 0.6868888888888889,
|
21667 |
+
"grad_norm": 0.7117932438850403,
|
21668 |
+
"learning_rate": 6.276169265033408e-05,
|
21669 |
+
"loss": 0.7796,
|
21670 |
+
"step": 3091
|
21671 |
+
},
|
21672 |
+
{
|
21673 |
+
"epoch": 0.6871111111111111,
|
21674 |
+
"grad_norm": 1.1098551750183105,
|
21675 |
+
"learning_rate": 6.271714922048998e-05,
|
21676 |
+
"loss": 1.3902,
|
21677 |
+
"step": 3092
|
21678 |
+
},
|
21679 |
+
{
|
21680 |
+
"epoch": 0.6873333333333334,
|
21681 |
+
"grad_norm": 1.1206355094909668,
|
21682 |
+
"learning_rate": 6.267260579064588e-05,
|
21683 |
+
"loss": 1.4423,
|
21684 |
+
"step": 3093
|
21685 |
+
},
|
21686 |
+
{
|
21687 |
+
"epoch": 0.6875555555555556,
|
21688 |
+
"grad_norm": 1.0141700506210327,
|
21689 |
+
"learning_rate": 6.262806236080178e-05,
|
21690 |
+
"loss": 1.587,
|
21691 |
+
"step": 3094
|
21692 |
+
},
|
21693 |
+
{
|
21694 |
+
"epoch": 0.6877777777777778,
|
21695 |
+
"grad_norm": 1.2595239877700806,
|
21696 |
+
"learning_rate": 6.258351893095769e-05,
|
21697 |
+
"loss": 1.5666,
|
21698 |
+
"step": 3095
|
21699 |
+
},
|
21700 |
+
{
|
21701 |
+
"epoch": 0.688,
|
21702 |
+
"grad_norm": 0.9674675464630127,
|
21703 |
+
"learning_rate": 6.253897550111358e-05,
|
21704 |
+
"loss": 0.7225,
|
21705 |
+
"step": 3096
|
21706 |
+
},
|
21707 |
+
{
|
21708 |
+
"epoch": 0.6882222222222222,
|
21709 |
+
"grad_norm": 0.1801719069480896,
|
21710 |
+
"learning_rate": 6.24944320712695e-05,
|
21711 |
+
"loss": 0.0303,
|
21712 |
+
"step": 3097
|
21713 |
+
},
|
21714 |
+
{
|
21715 |
+
"epoch": 0.6884444444444444,
|
21716 |
+
"grad_norm": 1.2141374349594116,
|
21717 |
+
"learning_rate": 6.244988864142539e-05,
|
21718 |
+
"loss": 1.0987,
|
21719 |
+
"step": 3098
|
21720 |
+
},
|
21721 |
+
{
|
21722 |
+
"epoch": 0.6886666666666666,
|
21723 |
+
"grad_norm": 1.031459093093872,
|
21724 |
+
"learning_rate": 6.24053452115813e-05,
|
21725 |
+
"loss": 1.0589,
|
21726 |
+
"step": 3099
|
21727 |
+
},
|
21728 |
+
{
|
21729 |
+
"epoch": 0.6888888888888889,
|
21730 |
+
"grad_norm": 0.8709812760353088,
|
21731 |
+
"learning_rate": 6.23608017817372e-05,
|
21732 |
+
"loss": 0.5863,
|
21733 |
+
"step": 3100
|
21734 |
+
},
|
21735 |
+
{
|
21736 |
+
"epoch": 0.6891111111111111,
|
21737 |
+
"grad_norm": 0.607231616973877,
|
21738 |
+
"learning_rate": 6.231625835189309e-05,
|
21739 |
+
"loss": 1.0338,
|
21740 |
+
"step": 3101
|
21741 |
+
},
|
21742 |
+
{
|
21743 |
+
"epoch": 0.6893333333333334,
|
21744 |
+
"grad_norm": 0.5942530035972595,
|
21745 |
+
"learning_rate": 6.2271714922049e-05,
|
21746 |
+
"loss": 1.0758,
|
21747 |
+
"step": 3102
|
21748 |
+
},
|
21749 |
+
{
|
21750 |
+
"epoch": 0.6895555555555556,
|
21751 |
+
"grad_norm": 0.8875899910926819,
|
21752 |
+
"learning_rate": 6.22271714922049e-05,
|
21753 |
+
"loss": 2.1266,
|
21754 |
+
"step": 3103
|
21755 |
+
},
|
21756 |
+
{
|
21757 |
+
"epoch": 0.6897777777777778,
|
21758 |
+
"grad_norm": 0.8262476325035095,
|
21759 |
+
"learning_rate": 6.218262806236081e-05,
|
21760 |
+
"loss": 2.2635,
|
21761 |
+
"step": 3104
|
21762 |
+
},
|
21763 |
+
{
|
21764 |
+
"epoch": 0.69,
|
21765 |
+
"grad_norm": 0.0524749718606472,
|
21766 |
+
"learning_rate": 6.21380846325167e-05,
|
21767 |
+
"loss": 0.0106,
|
21768 |
+
"step": 3105
|
21769 |
+
},
|
21770 |
+
{
|
21771 |
+
"epoch": 0.6902222222222222,
|
21772 |
+
"grad_norm": 0.5803321003913879,
|
21773 |
+
"learning_rate": 6.20935412026726e-05,
|
21774 |
+
"loss": 1.0884,
|
21775 |
+
"step": 3106
|
21776 |
+
},
|
21777 |
+
{
|
21778 |
+
"epoch": 0.6904444444444444,
|
21779 |
+
"grad_norm": 1.0942132472991943,
|
21780 |
+
"learning_rate": 6.204899777282851e-05,
|
21781 |
+
"loss": 2.2134,
|
21782 |
+
"step": 3107
|
21783 |
+
},
|
21784 |
+
{
|
21785 |
+
"epoch": 0.6906666666666667,
|
21786 |
+
"grad_norm": 0.07692880928516388,
|
21787 |
+
"learning_rate": 6.20044543429844e-05,
|
21788 |
+
"loss": 0.0116,
|
21789 |
+
"step": 3108
|
21790 |
+
},
|
21791 |
+
{
|
21792 |
+
"epoch": 0.6908888888888889,
|
21793 |
+
"grad_norm": 0.07232845574617386,
|
21794 |
+
"learning_rate": 6.195991091314032e-05,
|
21795 |
+
"loss": 0.0117,
|
21796 |
+
"step": 3109
|
21797 |
+
},
|
21798 |
+
{
|
21799 |
+
"epoch": 0.6911111111111111,
|
21800 |
+
"grad_norm": 0.8545564413070679,
|
21801 |
+
"learning_rate": 6.191536748329621e-05,
|
21802 |
+
"loss": 2.0441,
|
21803 |
+
"step": 3110
|
21804 |
+
},
|
21805 |
+
{
|
21806 |
+
"epoch": 0.6913333333333334,
|
21807 |
+
"grad_norm": 0.8483017086982727,
|
21808 |
+
"learning_rate": 6.187082405345212e-05,
|
21809 |
+
"loss": 1.9926,
|
21810 |
+
"step": 3111
|
21811 |
+
},
|
21812 |
+
{
|
21813 |
+
"epoch": 0.6915555555555556,
|
21814 |
+
"grad_norm": 0.8519989848136902,
|
21815 |
+
"learning_rate": 6.182628062360802e-05,
|
21816 |
+
"loss": 2.1871,
|
21817 |
+
"step": 3112
|
21818 |
+
},
|
21819 |
+
{
|
21820 |
+
"epoch": 0.6917777777777778,
|
21821 |
+
"grad_norm": 0.8962295055389404,
|
21822 |
+
"learning_rate": 6.178173719376392e-05,
|
21823 |
+
"loss": 1.9079,
|
21824 |
+
"step": 3113
|
21825 |
+
},
|
21826 |
+
{
|
21827 |
+
"epoch": 0.692,
|
21828 |
+
"grad_norm": 0.908099353313446,
|
21829 |
+
"learning_rate": 6.173719376391982e-05,
|
21830 |
+
"loss": 2.2056,
|
21831 |
+
"step": 3114
|
21832 |
+
},
|
21833 |
+
{
|
21834 |
+
"epoch": 0.6922222222222222,
|
21835 |
+
"grad_norm": 0.9471180438995361,
|
21836 |
+
"learning_rate": 6.169265033407573e-05,
|
21837 |
+
"loss": 1.6695,
|
21838 |
+
"step": 3115
|
21839 |
+
},
|
21840 |
+
{
|
21841 |
+
"epoch": 0.6924444444444444,
|
21842 |
+
"grad_norm": 0.9277594685554504,
|
21843 |
+
"learning_rate": 6.164810690423163e-05,
|
21844 |
+
"loss": 2.0879,
|
21845 |
+
"step": 3116
|
21846 |
+
},
|
21847 |
+
{
|
21848 |
+
"epoch": 0.6926666666666667,
|
21849 |
+
"grad_norm": 0.6673265695571899,
|
21850 |
+
"learning_rate": 6.160356347438752e-05,
|
21851 |
+
"loss": 0.9213,
|
21852 |
+
"step": 3117
|
21853 |
+
},
|
21854 |
+
{
|
21855 |
+
"epoch": 0.6928888888888889,
|
21856 |
+
"grad_norm": 0.12814414501190186,
|
21857 |
+
"learning_rate": 6.155902004454344e-05,
|
21858 |
+
"loss": 0.0211,
|
21859 |
+
"step": 3118
|
21860 |
+
},
|
21861 |
+
{
|
21862 |
+
"epoch": 0.6931111111111111,
|
21863 |
+
"grad_norm": 0.9539985656738281,
|
21864 |
+
"learning_rate": 6.151447661469933e-05,
|
21865 |
+
"loss": 1.9212,
|
21866 |
+
"step": 3119
|
21867 |
+
},
|
21868 |
+
{
|
21869 |
+
"epoch": 0.6933333333333334,
|
21870 |
+
"grad_norm": 0.927853524684906,
|
21871 |
+
"learning_rate": 6.146993318485523e-05,
|
21872 |
+
"loss": 1.6054,
|
21873 |
+
"step": 3120
|
21874 |
+
},
|
21875 |
+
{
|
21876 |
+
"epoch": 0.6935555555555556,
|
21877 |
+
"grad_norm": 0.6636569499969482,
|
21878 |
+
"learning_rate": 6.142538975501115e-05,
|
21879 |
+
"loss": 0.9331,
|
21880 |
+
"step": 3121
|
21881 |
+
},
|
21882 |
+
{
|
21883 |
+
"epoch": 0.6937777777777778,
|
21884 |
+
"grad_norm": 0.07317844778299332,
|
21885 |
+
"learning_rate": 6.138084632516704e-05,
|
21886 |
+
"loss": 0.0166,
|
21887 |
+
"step": 3122
|
21888 |
+
},
|
21889 |
+
{
|
21890 |
+
"epoch": 0.694,
|
21891 |
+
"grad_norm": 0.07253949344158173,
|
21892 |
+
"learning_rate": 6.133630289532294e-05,
|
21893 |
+
"loss": 0.0165,
|
21894 |
+
"step": 3123
|
21895 |
+
},
|
21896 |
+
{
|
21897 |
+
"epoch": 0.6942222222222222,
|
21898 |
+
"grad_norm": 0.07455820590257645,
|
21899 |
+
"learning_rate": 6.129175946547885e-05,
|
21900 |
+
"loss": 0.0168,
|
21901 |
+
"step": 3124
|
21902 |
+
},
|
21903 |
+
{
|
21904 |
+
"epoch": 0.6944444444444444,
|
21905 |
+
"grad_norm": 0.7180811762809753,
|
21906 |
+
"learning_rate": 6.124721603563475e-05,
|
21907 |
+
"loss": 1.3197,
|
21908 |
+
"step": 3125
|
21909 |
+
},
|
21910 |
+
{
|
21911 |
+
"epoch": 0.6946666666666667,
|
21912 |
+
"grad_norm": 1.0325121879577637,
|
21913 |
+
"learning_rate": 6.120267260579064e-05,
|
21914 |
+
"loss": 1.8146,
|
21915 |
+
"step": 3126
|
21916 |
+
},
|
21917 |
+
{
|
21918 |
+
"epoch": 0.6948888888888889,
|
21919 |
+
"grad_norm": 1.0472650527954102,
|
21920 |
+
"learning_rate": 6.115812917594655e-05,
|
21921 |
+
"loss": 1.8477,
|
21922 |
+
"step": 3127
|
21923 |
+
},
|
21924 |
+
{
|
21925 |
+
"epoch": 0.6951111111111111,
|
21926 |
+
"grad_norm": 1.3057109117507935,
|
21927 |
+
"learning_rate": 6.111358574610246e-05,
|
21928 |
+
"loss": 1.6522,
|
21929 |
+
"step": 3128
|
21930 |
+
},
|
21931 |
+
{
|
21932 |
+
"epoch": 0.6953333333333334,
|
21933 |
+
"grad_norm": 0.9642925262451172,
|
21934 |
+
"learning_rate": 6.106904231625835e-05,
|
21935 |
+
"loss": 1.9227,
|
21936 |
+
"step": 3129
|
21937 |
+
},
|
21938 |
+
{
|
21939 |
+
"epoch": 0.6955555555555556,
|
21940 |
+
"grad_norm": 0.9852336049079895,
|
21941 |
+
"learning_rate": 6.102449888641426e-05,
|
21942 |
+
"loss": 1.8925,
|
21943 |
+
"step": 3130
|
21944 |
+
},
|
21945 |
+
{
|
21946 |
+
"epoch": 0.6957777777777778,
|
21947 |
+
"grad_norm": 0.0911262258887291,
|
21948 |
+
"learning_rate": 6.097995545657016e-05,
|
21949 |
+
"loss": 0.0171,
|
21950 |
+
"step": 3131
|
21951 |
+
},
|
21952 |
+
{
|
21953 |
+
"epoch": 0.696,
|
21954 |
+
"grad_norm": 0.6741465330123901,
|
21955 |
+
"learning_rate": 6.093541202672606e-05,
|
21956 |
+
"loss": 0.8653,
|
21957 |
+
"step": 3132
|
21958 |
+
},
|
21959 |
+
{
|
21960 |
+
"epoch": 0.6962222222222222,
|
21961 |
+
"grad_norm": 0.9752011895179749,
|
21962 |
+
"learning_rate": 6.089086859688197e-05,
|
21963 |
+
"loss": 1.4045,
|
21964 |
+
"step": 3133
|
21965 |
+
},
|
21966 |
+
{
|
21967 |
+
"epoch": 0.6964444444444444,
|
21968 |
+
"grad_norm": 0.07129085063934326,
|
21969 |
+
"learning_rate": 6.084632516703787e-05,
|
21970 |
+
"loss": 0.0192,
|
21971 |
+
"step": 3134
|
21972 |
+
},
|
21973 |
+
{
|
21974 |
+
"epoch": 0.6966666666666667,
|
21975 |
+
"grad_norm": 0.0695280209183693,
|
21976 |
+
"learning_rate": 6.0801781737193766e-05,
|
21977 |
+
"loss": 0.0186,
|
21978 |
+
"step": 3135
|
21979 |
+
},
|
21980 |
+
{
|
21981 |
+
"epoch": 0.6968888888888889,
|
21982 |
+
"grad_norm": 0.07262587547302246,
|
21983 |
+
"learning_rate": 6.075723830734967e-05,
|
21984 |
+
"loss": 0.0183,
|
21985 |
+
"step": 3136
|
21986 |
+
},
|
21987 |
+
{
|
21988 |
+
"epoch": 0.6971111111111111,
|
21989 |
+
"grad_norm": 0.9831186532974243,
|
21990 |
+
"learning_rate": 6.071269487750557e-05,
|
21991 |
+
"loss": 1.4122,
|
21992 |
+
"step": 3137
|
21993 |
+
},
|
21994 |
+
{
|
21995 |
+
"epoch": 0.6973333333333334,
|
21996 |
+
"grad_norm": 0.9442914724349976,
|
21997 |
+
"learning_rate": 6.066815144766147e-05,
|
21998 |
+
"loss": 1.452,
|
21999 |
+
"step": 3138
|
22000 |
+
},
|
22001 |
+
{
|
22002 |
+
"epoch": 0.6975555555555556,
|
22003 |
+
"grad_norm": 1.1144623756408691,
|
22004 |
+
"learning_rate": 6.062360801781738e-05,
|
22005 |
+
"loss": 1.6665,
|
22006 |
+
"step": 3139
|
22007 |
+
},
|
22008 |
+
{
|
22009 |
+
"epoch": 0.6977777777777778,
|
22010 |
+
"grad_norm": 1.0614639520645142,
|
22011 |
+
"learning_rate": 6.057906458797328e-05,
|
22012 |
+
"loss": 1.5621,
|
22013 |
+
"step": 3140
|
22014 |
+
},
|
22015 |
+
{
|
22016 |
+
"epoch": 0.698,
|
22017 |
+
"grad_norm": 1.2415484189987183,
|
22018 |
+
"learning_rate": 6.053452115812918e-05,
|
22019 |
+
"loss": 1.5338,
|
22020 |
+
"step": 3141
|
22021 |
+
},
|
22022 |
+
{
|
22023 |
+
"epoch": 0.6982222222222222,
|
22024 |
+
"grad_norm": 1.146238923072815,
|
22025 |
+
"learning_rate": 6.048997772828508e-05,
|
22026 |
+
"loss": 1.8936,
|
22027 |
+
"step": 3142
|
22028 |
+
},
|
22029 |
+
{
|
22030 |
+
"epoch": 0.6984444444444444,
|
22031 |
+
"grad_norm": 1.1693158149719238,
|
22032 |
+
"learning_rate": 6.044543429844098e-05,
|
22033 |
+
"loss": 1.7386,
|
22034 |
+
"step": 3143
|
22035 |
+
},
|
22036 |
+
{
|
22037 |
+
"epoch": 0.6986666666666667,
|
22038 |
+
"grad_norm": 1.2340409755706787,
|
22039 |
+
"learning_rate": 6.040089086859688e-05,
|
22040 |
+
"loss": 1.4781,
|
22041 |
+
"step": 3144
|
22042 |
+
},
|
22043 |
+
{
|
22044 |
+
"epoch": 0.6988888888888889,
|
22045 |
+
"grad_norm": 1.0042845010757446,
|
22046 |
+
"learning_rate": 6.035634743875279e-05,
|
22047 |
+
"loss": 1.3662,
|
22048 |
+
"step": 3145
|
22049 |
+
},
|
22050 |
+
{
|
22051 |
+
"epoch": 0.6991111111111111,
|
22052 |
+
"grad_norm": 0.18454298377037048,
|
22053 |
+
"learning_rate": 6.031180400890869e-05,
|
22054 |
+
"loss": 0.0276,
|
22055 |
+
"step": 3146
|
22056 |
+
},
|
22057 |
+
{
|
22058 |
+
"epoch": 0.6993333333333334,
|
22059 |
+
"grad_norm": 1.1719262599945068,
|
22060 |
+
"learning_rate": 6.026726057906459e-05,
|
22061 |
+
"loss": 1.0601,
|
22062 |
+
"step": 3147
|
22063 |
+
},
|
22064 |
+
{
|
22065 |
+
"epoch": 0.6995555555555556,
|
22066 |
+
"grad_norm": 0.9232467412948608,
|
22067 |
+
"learning_rate": 6.0222717149220495e-05,
|
22068 |
+
"loss": 0.8415,
|
22069 |
+
"step": 3148
|
22070 |
+
},
|
22071 |
+
{
|
22072 |
+
"epoch": 0.6997777777777778,
|
22073 |
+
"grad_norm": 0.194104865193367,
|
22074 |
+
"learning_rate": 6.0178173719376394e-05,
|
22075 |
+
"loss": 0.0401,
|
22076 |
+
"step": 3149
|
22077 |
+
},
|
22078 |
+
{
|
22079 |
+
"epoch": 0.7,
|
22080 |
+
"grad_norm": 0.7421103119850159,
|
22081 |
+
"learning_rate": 6.013363028953229e-05,
|
22082 |
+
"loss": 0.6284,
|
22083 |
+
"step": 3150
|
22084 |
+
},
|
22085 |
+
{
|
22086 |
+
"epoch": 0.7002222222222222,
|
22087 |
+
"grad_norm": 0.7694705724716187,
|
22088 |
+
"learning_rate": 6.0089086859688204e-05,
|
22089 |
+
"loss": 1.1152,
|
22090 |
+
"step": 3151
|
22091 |
+
},
|
22092 |
+
{
|
22093 |
+
"epoch": 0.7004444444444444,
|
22094 |
+
"grad_norm": 0.703349232673645,
|
22095 |
+
"learning_rate": 6.00445434298441e-05,
|
22096 |
+
"loss": 1.1683,
|
22097 |
+
"step": 3152
|
22098 |
+
},
|
22099 |
+
{
|
22100 |
+
"epoch": 0.7006666666666667,
|
22101 |
+
"grad_norm": 0.05406121537089348,
|
22102 |
+
"learning_rate": 6e-05,
|
22103 |
+
"loss": 0.0105,
|
22104 |
+
"step": 3153
|
22105 |
+
},
|
22106 |
+
{
|
22107 |
+
"epoch": 0.7008888888888889,
|
22108 |
+
"grad_norm": 0.5842484831809998,
|
22109 |
+
"learning_rate": 5.995545657015591e-05,
|
22110 |
+
"loss": 1.15,
|
22111 |
+
"step": 3154
|
22112 |
+
},
|
22113 |
+
{
|
22114 |
+
"epoch": 0.7011111111111111,
|
22115 |
+
"grad_norm": 0.05234431475400925,
|
22116 |
+
"learning_rate": 5.9910913140311805e-05,
|
22117 |
+
"loss": 0.0105,
|
22118 |
+
"step": 3155
|
22119 |
+
},
|
22120 |
+
{
|
22121 |
+
"epoch": 0.7013333333333334,
|
22122 |
+
"grad_norm": 0.5893082618713379,
|
22123 |
+
"learning_rate": 5.9866369710467704e-05,
|
22124 |
+
"loss": 1.1913,
|
22125 |
+
"step": 3156
|
22126 |
+
},
|
22127 |
+
{
|
22128 |
+
"epoch": 0.7015555555555556,
|
22129 |
+
"grad_norm": 0.5218148231506348,
|
22130 |
+
"learning_rate": 5.9821826280623616e-05,
|
22131 |
+
"loss": 0.9835,
|
22132 |
+
"step": 3157
|
22133 |
+
},
|
22134 |
+
{
|
22135 |
+
"epoch": 0.7017777777777777,
|
22136 |
+
"grad_norm": 0.5484596490859985,
|
22137 |
+
"learning_rate": 5.9777282850779515e-05,
|
22138 |
+
"loss": 0.9247,
|
22139 |
+
"step": 3158
|
22140 |
+
},
|
22141 |
+
{
|
22142 |
+
"epoch": 0.702,
|
22143 |
+
"grad_norm": 0.6557696461677551,
|
22144 |
+
"learning_rate": 5.973273942093541e-05,
|
22145 |
+
"loss": 1.1391,
|
22146 |
+
"step": 3159
|
22147 |
+
},
|
22148 |
+
{
|
22149 |
+
"epoch": 0.7022222222222222,
|
22150 |
+
"grad_norm": 0.5898274779319763,
|
22151 |
+
"learning_rate": 5.9688195991091325e-05,
|
22152 |
+
"loss": 1.2284,
|
22153 |
+
"step": 3160
|
22154 |
+
},
|
22155 |
+
{
|
22156 |
+
"epoch": 0.7024444444444444,
|
22157 |
+
"grad_norm": 0.09231838583946228,
|
22158 |
+
"learning_rate": 5.9643652561247224e-05,
|
22159 |
+
"loss": 0.0125,
|
22160 |
+
"step": 3161
|
22161 |
+
},
|
22162 |
+
{
|
22163 |
+
"epoch": 0.7026666666666667,
|
22164 |
+
"grad_norm": 1.012488842010498,
|
22165 |
+
"learning_rate": 5.9599109131403116e-05,
|
22166 |
+
"loss": 2.0515,
|
22167 |
+
"step": 3162
|
22168 |
+
},
|
22169 |
+
{
|
22170 |
+
"epoch": 0.7028888888888889,
|
22171 |
+
"grad_norm": 0.9501926302909851,
|
22172 |
+
"learning_rate": 5.9554565701559014e-05,
|
22173 |
+
"loss": 2.3767,
|
22174 |
+
"step": 3163
|
22175 |
+
},
|
22176 |
+
{
|
22177 |
+
"epoch": 0.7031111111111111,
|
22178 |
+
"grad_norm": 0.9576533436775208,
|
22179 |
+
"learning_rate": 5.9510022271714927e-05,
|
22180 |
+
"loss": 2.3394,
|
22181 |
+
"step": 3164
|
22182 |
+
},
|
22183 |
+
{
|
22184 |
+
"epoch": 0.7033333333333334,
|
22185 |
+
"grad_norm": 0.944797694683075,
|
22186 |
+
"learning_rate": 5.9465478841870825e-05,
|
22187 |
+
"loss": 1.9659,
|
22188 |
+
"step": 3165
|
22189 |
+
},
|
22190 |
+
{
|
22191 |
+
"epoch": 0.7035555555555556,
|
22192 |
+
"grad_norm": 0.8810012340545654,
|
22193 |
+
"learning_rate": 5.9420935412026724e-05,
|
22194 |
+
"loss": 1.8656,
|
22195 |
+
"step": 3166
|
22196 |
+
},
|
22197 |
+
{
|
22198 |
+
"epoch": 0.7037777777777777,
|
22199 |
+
"grad_norm": 0.6439220309257507,
|
22200 |
+
"learning_rate": 5.9376391982182636e-05,
|
22201 |
+
"loss": 0.8335,
|
22202 |
+
"step": 3167
|
22203 |
+
},
|
22204 |
+
{
|
22205 |
+
"epoch": 0.704,
|
22206 |
+
"grad_norm": 0.9962994456291199,
|
22207 |
+
"learning_rate": 5.9331848552338534e-05,
|
22208 |
+
"loss": 2.0233,
|
22209 |
+
"step": 3168
|
22210 |
+
},
|
22211 |
+
{
|
22212 |
+
"epoch": 0.7042222222222222,
|
22213 |
+
"grad_norm": 0.9703332185745239,
|
22214 |
+
"learning_rate": 5.928730512249443e-05,
|
22215 |
+
"loss": 1.9519,
|
22216 |
+
"step": 3169
|
22217 |
+
},
|
22218 |
+
{
|
22219 |
+
"epoch": 0.7044444444444444,
|
22220 |
+
"grad_norm": 1.0500884056091309,
|
22221 |
+
"learning_rate": 5.924276169265034e-05,
|
22222 |
+
"loss": 1.578,
|
22223 |
+
"step": 3170
|
22224 |
+
},
|
22225 |
+
{
|
22226 |
+
"epoch": 0.7046666666666667,
|
22227 |
+
"grad_norm": 0.9718672037124634,
|
22228 |
+
"learning_rate": 5.919821826280624e-05,
|
22229 |
+
"loss": 2.0021,
|
22230 |
+
"step": 3171
|
22231 |
+
},
|
22232 |
+
{
|
22233 |
+
"epoch": 0.7048888888888889,
|
22234 |
+
"grad_norm": 0.07014777511358261,
|
22235 |
+
"learning_rate": 5.9153674832962136e-05,
|
22236 |
+
"loss": 0.0162,
|
22237 |
+
"step": 3172
|
22238 |
+
},
|
22239 |
+
{
|
22240 |
+
"epoch": 0.7051111111111111,
|
22241 |
+
"grad_norm": 0.07737057656049728,
|
22242 |
+
"learning_rate": 5.910913140311805e-05,
|
22243 |
+
"loss": 0.0156,
|
22244 |
+
"step": 3173
|
22245 |
+
},
|
22246 |
+
{
|
22247 |
+
"epoch": 0.7053333333333334,
|
22248 |
+
"grad_norm": 0.12630076706409454,
|
22249 |
+
"learning_rate": 5.9064587973273946e-05,
|
22250 |
+
"loss": 0.0213,
|
22251 |
+
"step": 3174
|
22252 |
+
},
|
22253 |
+
{
|
22254 |
+
"epoch": 0.7055555555555556,
|
22255 |
+
"grad_norm": 0.7619150876998901,
|
22256 |
+
"learning_rate": 5.9020044543429845e-05,
|
22257 |
+
"loss": 0.9373,
|
22258 |
+
"step": 3175
|
22259 |
+
},
|
22260 |
+
{
|
22261 |
+
"epoch": 0.7057777777777777,
|
22262 |
+
"grad_norm": 1.0807890892028809,
|
22263 |
+
"learning_rate": 5.897550111358575e-05,
|
22264 |
+
"loss": 2.0725,
|
22265 |
+
"step": 3176
|
22266 |
+
},
|
22267 |
+
{
|
22268 |
+
"epoch": 0.706,
|
22269 |
+
"grad_norm": 0.9409441351890564,
|
22270 |
+
"learning_rate": 5.893095768374165e-05,
|
22271 |
+
"loss": 1.6597,
|
22272 |
+
"step": 3177
|
22273 |
+
},
|
22274 |
+
{
|
22275 |
+
"epoch": 0.7062222222222222,
|
22276 |
+
"grad_norm": 1.096917986869812,
|
22277 |
+
"learning_rate": 5.888641425389755e-05,
|
22278 |
+
"loss": 1.9767,
|
22279 |
+
"step": 3178
|
22280 |
+
},
|
22281 |
+
{
|
22282 |
+
"epoch": 0.7064444444444444,
|
22283 |
+
"grad_norm": 1.091698408126831,
|
22284 |
+
"learning_rate": 5.884187082405346e-05,
|
22285 |
+
"loss": 1.7166,
|
22286 |
+
"step": 3179
|
22287 |
+
},
|
22288 |
+
{
|
22289 |
+
"epoch": 0.7066666666666667,
|
22290 |
+
"grad_norm": 1.0211970806121826,
|
22291 |
+
"learning_rate": 5.879732739420936e-05,
|
22292 |
+
"loss": 1.6798,
|
22293 |
+
"step": 3180
|
22294 |
+
},
|
22295 |
+
{
|
22296 |
+
"epoch": 0.7068888888888889,
|
22297 |
+
"grad_norm": 0.6886789202690125,
|
22298 |
+
"learning_rate": 5.875278396436526e-05,
|
22299 |
+
"loss": 1.0461,
|
22300 |
+
"step": 3181
|
22301 |
+
},
|
22302 |
+
{
|
22303 |
+
"epoch": 0.7071111111111111,
|
22304 |
+
"grad_norm": 0.05880124494433403,
|
22305 |
+
"learning_rate": 5.870824053452116e-05,
|
22306 |
+
"loss": 0.0178,
|
22307 |
+
"step": 3182
|
22308 |
+
},
|
22309 |
+
{
|
22310 |
+
"epoch": 0.7073333333333334,
|
22311 |
+
"grad_norm": 0.060819823294878006,
|
22312 |
+
"learning_rate": 5.866369710467706e-05,
|
22313 |
+
"loss": 0.0178,
|
22314 |
+
"step": 3183
|
22315 |
+
},
|
22316 |
+
{
|
22317 |
+
"epoch": 0.7075555555555556,
|
22318 |
+
"grad_norm": 0.724615752696991,
|
22319 |
+
"learning_rate": 5.861915367483296e-05,
|
22320 |
+
"loss": 1.0519,
|
22321 |
+
"step": 3184
|
22322 |
+
},
|
22323 |
+
{
|
22324 |
+
"epoch": 0.7077777777777777,
|
22325 |
+
"grad_norm": 0.8110787868499756,
|
22326 |
+
"learning_rate": 5.857461024498887e-05,
|
22327 |
+
"loss": 0.9285,
|
22328 |
+
"step": 3185
|
22329 |
+
},
|
22330 |
+
{
|
22331 |
+
"epoch": 0.708,
|
22332 |
+
"grad_norm": 0.06422421336174011,
|
22333 |
+
"learning_rate": 5.853006681514477e-05,
|
22334 |
+
"loss": 0.0176,
|
22335 |
+
"step": 3186
|
22336 |
+
},
|
22337 |
+
{
|
22338 |
+
"epoch": 0.7082222222222222,
|
22339 |
+
"grad_norm": 0.067098468542099,
|
22340 |
+
"learning_rate": 5.848552338530067e-05,
|
22341 |
+
"loss": 0.0175,
|
22342 |
+
"step": 3187
|
22343 |
+
},
|
22344 |
+
{
|
22345 |
+
"epoch": 0.7084444444444444,
|
22346 |
+
"grad_norm": 0.06487097591161728,
|
22347 |
+
"learning_rate": 5.8440979955456574e-05,
|
22348 |
+
"loss": 0.0172,
|
22349 |
+
"step": 3188
|
22350 |
+
},
|
22351 |
+
{
|
22352 |
+
"epoch": 0.7086666666666667,
|
22353 |
+
"grad_norm": 0.06535470485687256,
|
22354 |
+
"learning_rate": 5.839643652561247e-05,
|
22355 |
+
"loss": 0.0174,
|
22356 |
+
"step": 3189
|
22357 |
+
},
|
22358 |
+
{
|
22359 |
+
"epoch": 0.7088888888888889,
|
22360 |
+
"grad_norm": 1.172293782234192,
|
22361 |
+
"learning_rate": 5.835189309576837e-05,
|
22362 |
+
"loss": 1.5616,
|
22363 |
+
"step": 3190
|
22364 |
+
},
|
22365 |
+
{
|
22366 |
+
"epoch": 0.7091111111111111,
|
22367 |
+
"grad_norm": 1.1036264896392822,
|
22368 |
+
"learning_rate": 5.830734966592428e-05,
|
22369 |
+
"loss": 1.6201,
|
22370 |
+
"step": 3191
|
22371 |
+
},
|
22372 |
+
{
|
22373 |
+
"epoch": 0.7093333333333334,
|
22374 |
+
"grad_norm": 0.7746077179908752,
|
22375 |
+
"learning_rate": 5.826280623608018e-05,
|
22376 |
+
"loss": 0.8634,
|
22377 |
+
"step": 3192
|
22378 |
+
},
|
22379 |
+
{
|
22380 |
+
"epoch": 0.7095555555555556,
|
22381 |
+
"grad_norm": 0.9545249342918396,
|
22382 |
+
"learning_rate": 5.821826280623608e-05,
|
22383 |
+
"loss": 1.4337,
|
22384 |
+
"step": 3193
|
22385 |
+
},
|
22386 |
+
{
|
22387 |
+
"epoch": 0.7097777777777777,
|
22388 |
+
"grad_norm": 1.0250579118728638,
|
22389 |
+
"learning_rate": 5.8173719376391986e-05,
|
22390 |
+
"loss": 1.6208,
|
22391 |
+
"step": 3194
|
22392 |
+
},
|
22393 |
+
{
|
22394 |
+
"epoch": 0.71,
|
22395 |
+
"grad_norm": 1.0089478492736816,
|
22396 |
+
"learning_rate": 5.8129175946547884e-05,
|
22397 |
+
"loss": 1.2085,
|
22398 |
+
"step": 3195
|
22399 |
+
},
|
22400 |
+
{
|
22401 |
+
"epoch": 0.7102222222222222,
|
22402 |
+
"grad_norm": 1.1248306035995483,
|
22403 |
+
"learning_rate": 5.808463251670378e-05,
|
22404 |
+
"loss": 1.5717,
|
22405 |
+
"step": 3196
|
22406 |
+
},
|
22407 |
+
{
|
22408 |
+
"epoch": 0.7104444444444444,
|
22409 |
+
"grad_norm": 0.6627147793769836,
|
22410 |
+
"learning_rate": 5.8040089086859695e-05,
|
22411 |
+
"loss": 0.6573,
|
22412 |
+
"step": 3197
|
22413 |
+
},
|
22414 |
+
{
|
22415 |
+
"epoch": 0.7106666666666667,
|
22416 |
+
"grad_norm": 1.230597972869873,
|
22417 |
+
"learning_rate": 5.7995545657015594e-05,
|
22418 |
+
"loss": 1.2766,
|
22419 |
+
"step": 3198
|
22420 |
+
},
|
22421 |
+
{
|
22422 |
+
"epoch": 0.7108888888888889,
|
22423 |
+
"grad_norm": 0.1396600902080536,
|
22424 |
+
"learning_rate": 5.795100222717149e-05,
|
22425 |
+
"loss": 0.0322,
|
22426 |
+
"step": 3199
|
22427 |
+
},
|
22428 |
+
{
|
22429 |
+
"epoch": 0.7111111111111111,
|
22430 |
+
"grad_norm": 0.9573265314102173,
|
22431 |
+
"learning_rate": 5.79064587973274e-05,
|
22432 |
+
"loss": 0.507,
|
22433 |
+
"step": 3200
|
22434 |
+
},
|
22435 |
+
{
|
22436 |
+
"epoch": 0.7113333333333334,
|
22437 |
+
"grad_norm": 0.8519662618637085,
|
22438 |
+
"learning_rate": 5.7861915367483296e-05,
|
22439 |
+
"loss": 1.1341,
|
22440 |
+
"step": 3201
|
22441 |
+
},
|
22442 |
+
{
|
22443 |
+
"epoch": 0.7115555555555556,
|
22444 |
+
"grad_norm": 0.04184136167168617,
|
22445 |
+
"learning_rate": 5.7817371937639195e-05,
|
22446 |
+
"loss": 0.0093,
|
22447 |
+
"step": 3202
|
22448 |
+
},
|
22449 |
+
{
|
22450 |
+
"epoch": 0.7117777777777777,
|
22451 |
+
"grad_norm": 0.6974391341209412,
|
22452 |
+
"learning_rate": 5.777282850779511e-05,
|
22453 |
+
"loss": 1.3839,
|
22454 |
+
"step": 3203
|
22455 |
+
},
|
22456 |
+
{
|
22457 |
+
"epoch": 0.712,
|
22458 |
+
"grad_norm": 0.8318896293640137,
|
22459 |
+
"learning_rate": 5.7728285077951005e-05,
|
22460 |
+
"loss": 2.0999,
|
22461 |
+
"step": 3204
|
22462 |
+
},
|
22463 |
+
{
|
22464 |
+
"epoch": 0.7122222222222222,
|
22465 |
+
"grad_norm": 0.5589978694915771,
|
22466 |
+
"learning_rate": 5.7683741648106904e-05,
|
22467 |
+
"loss": 1.1487,
|
22468 |
+
"step": 3205
|
22469 |
+
},
|
22470 |
+
{
|
22471 |
+
"epoch": 0.7124444444444444,
|
22472 |
+
"grad_norm": 0.07977552711963654,
|
22473 |
+
"learning_rate": 5.7639198218262816e-05,
|
22474 |
+
"loss": 0.0123,
|
22475 |
+
"step": 3206
|
22476 |
+
},
|
22477 |
+
{
|
22478 |
+
"epoch": 0.7126666666666667,
|
22479 |
+
"grad_norm": 0.07479345053434372,
|
22480 |
+
"learning_rate": 5.7594654788418715e-05,
|
22481 |
+
"loss": 0.0123,
|
22482 |
+
"step": 3207
|
22483 |
+
},
|
22484 |
+
{
|
22485 |
+
"epoch": 0.7128888888888889,
|
22486 |
+
"grad_norm": 0.7520397305488586,
|
22487 |
+
"learning_rate": 5.7550111358574607e-05,
|
22488 |
+
"loss": 1.8669,
|
22489 |
+
"step": 3208
|
22490 |
+
},
|
22491 |
+
{
|
22492 |
+
"epoch": 0.7131111111111111,
|
22493 |
+
"grad_norm": 0.891527533531189,
|
22494 |
+
"learning_rate": 5.750556792873052e-05,
|
22495 |
+
"loss": 1.8476,
|
22496 |
+
"step": 3209
|
22497 |
+
},
|
22498 |
+
{
|
22499 |
+
"epoch": 0.7133333333333334,
|
22500 |
+
"grad_norm": 0.870412290096283,
|
22501 |
+
"learning_rate": 5.746102449888642e-05,
|
22502 |
+
"loss": 1.9461,
|
22503 |
+
"step": 3210
|
22504 |
+
},
|
22505 |
+
{
|
22506 |
+
"epoch": 0.7135555555555556,
|
22507 |
+
"grad_norm": 0.9231261610984802,
|
22508 |
+
"learning_rate": 5.7416481069042316e-05,
|
22509 |
+
"loss": 2.1436,
|
22510 |
+
"step": 3211
|
22511 |
+
},
|
22512 |
+
{
|
22513 |
+
"epoch": 0.7137777777777777,
|
22514 |
+
"grad_norm": 0.804538369178772,
|
22515 |
+
"learning_rate": 5.737193763919823e-05,
|
22516 |
+
"loss": 1.6058,
|
22517 |
+
"step": 3212
|
22518 |
+
},
|
22519 |
+
{
|
22520 |
+
"epoch": 0.714,
|
22521 |
+
"grad_norm": 0.9710292220115662,
|
22522 |
+
"learning_rate": 5.7327394209354127e-05,
|
22523 |
+
"loss": 1.0738,
|
22524 |
+
"step": 3213
|
22525 |
+
},
|
22526 |
+
{
|
22527 |
+
"epoch": 0.7142222222222222,
|
22528 |
+
"grad_norm": 0.9411685466766357,
|
22529 |
+
"learning_rate": 5.7282850779510025e-05,
|
22530 |
+
"loss": 2.0708,
|
22531 |
+
"step": 3214
|
22532 |
+
},
|
22533 |
+
{
|
22534 |
+
"epoch": 0.7144444444444444,
|
22535 |
+
"grad_norm": 0.9712237119674683,
|
22536 |
+
"learning_rate": 5.723830734966593e-05,
|
22537 |
+
"loss": 2.1416,
|
22538 |
+
"step": 3215
|
22539 |
+
},
|
22540 |
+
{
|
22541 |
+
"epoch": 0.7146666666666667,
|
22542 |
+
"grad_norm": 0.6982542872428894,
|
22543 |
+
"learning_rate": 5.719376391982183e-05,
|
22544 |
+
"loss": 0.7926,
|
22545 |
+
"step": 3216
|
22546 |
+
},
|
22547 |
+
{
|
22548 |
+
"epoch": 0.7148888888888889,
|
22549 |
+
"grad_norm": 0.7483058571815491,
|
22550 |
+
"learning_rate": 5.714922048997773e-05,
|
22551 |
+
"loss": 0.8696,
|
22552 |
+
"step": 3217
|
22553 |
+
},
|
22554 |
+
{
|
22555 |
+
"epoch": 0.7151111111111111,
|
22556 |
+
"grad_norm": 0.6382774114608765,
|
22557 |
+
"learning_rate": 5.710467706013364e-05,
|
22558 |
+
"loss": 0.8758,
|
22559 |
+
"step": 3218
|
22560 |
+
},
|
22561 |
+
{
|
22562 |
+
"epoch": 0.7153333333333334,
|
22563 |
+
"grad_norm": 0.09534616768360138,
|
22564 |
+
"learning_rate": 5.706013363028954e-05,
|
22565 |
+
"loss": 0.0179,
|
22566 |
+
"step": 3219
|
22567 |
+
},
|
22568 |
+
{
|
22569 |
+
"epoch": 0.7155555555555555,
|
22570 |
+
"grad_norm": 0.9931474328041077,
|
22571 |
+
"learning_rate": 5.701559020044544e-05,
|
22572 |
+
"loss": 1.7448,
|
22573 |
+
"step": 3220
|
22574 |
+
},
|
22575 |
+
{
|
22576 |
+
"epoch": 0.7157777777777777,
|
22577 |
+
"grad_norm": 1.051207184791565,
|
22578 |
+
"learning_rate": 5.697104677060134e-05,
|
22579 |
+
"loss": 1.8485,
|
22580 |
+
"step": 3221
|
22581 |
+
},
|
22582 |
+
{
|
22583 |
+
"epoch": 0.716,
|
22584 |
+
"grad_norm": 0.9426413178443909,
|
22585 |
+
"learning_rate": 5.692650334075724e-05,
|
22586 |
+
"loss": 1.6347,
|
22587 |
+
"step": 3222
|
22588 |
+
},
|
22589 |
+
{
|
22590 |
+
"epoch": 0.7162222222222222,
|
22591 |
+
"grad_norm": 0.919272243976593,
|
22592 |
+
"learning_rate": 5.688195991091314e-05,
|
22593 |
+
"loss": 1.7151,
|
22594 |
+
"step": 3223
|
22595 |
+
},
|
22596 |
+
{
|
22597 |
+
"epoch": 0.7164444444444444,
|
22598 |
+
"grad_norm": 0.9655510783195496,
|
22599 |
+
"learning_rate": 5.683741648106905e-05,
|
22600 |
+
"loss": 1.6615,
|
22601 |
+
"step": 3224
|
22602 |
+
},
|
22603 |
+
{
|
22604 |
+
"epoch": 0.7166666666666667,
|
22605 |
+
"grad_norm": 1.2728337049484253,
|
22606 |
+
"learning_rate": 5.679287305122495e-05,
|
22607 |
+
"loss": 1.8277,
|
22608 |
+
"step": 3225
|
22609 |
+
},
|
22610 |
+
{
|
22611 |
+
"epoch": 0.7168888888888889,
|
22612 |
+
"grad_norm": 0.7086578011512756,
|
22613 |
+
"learning_rate": 5.674832962138085e-05,
|
22614 |
+
"loss": 0.8322,
|
22615 |
+
"step": 3226
|
22616 |
+
},
|
22617 |
+
{
|
22618 |
+
"epoch": 0.7171111111111111,
|
22619 |
+
"grad_norm": 0.06795133650302887,
|
22620 |
+
"learning_rate": 5.6703786191536754e-05,
|
22621 |
+
"loss": 0.017,
|
22622 |
+
"step": 3227
|
22623 |
+
},
|
22624 |
+
{
|
22625 |
+
"epoch": 0.7173333333333334,
|
22626 |
+
"grad_norm": 0.06331969052553177,
|
22627 |
+
"learning_rate": 5.665924276169265e-05,
|
22628 |
+
"loss": 0.0171,
|
22629 |
+
"step": 3228
|
22630 |
+
},
|
22631 |
+
{
|
22632 |
+
"epoch": 0.7175555555555555,
|
22633 |
+
"grad_norm": 0.0663456916809082,
|
22634 |
+
"learning_rate": 5.661469933184855e-05,
|
22635 |
+
"loss": 0.0173,
|
22636 |
+
"step": 3229
|
22637 |
+
},
|
22638 |
+
{
|
22639 |
+
"epoch": 0.7177777777777777,
|
22640 |
+
"grad_norm": 0.8989565968513489,
|
22641 |
+
"learning_rate": 5.6570155902004463e-05,
|
22642 |
+
"loss": 1.6765,
|
22643 |
+
"step": 3230
|
22644 |
+
},
|
22645 |
+
{
|
22646 |
+
"epoch": 0.718,
|
22647 |
+
"grad_norm": 0.7637456059455872,
|
22648 |
+
"learning_rate": 5.652561247216036e-05,
|
22649 |
+
"loss": 0.7514,
|
22650 |
+
"step": 3231
|
22651 |
+
},
|
22652 |
+
{
|
22653 |
+
"epoch": 0.7182222222222222,
|
22654 |
+
"grad_norm": 0.08078856021165848,
|
22655 |
+
"learning_rate": 5.648106904231626e-05,
|
22656 |
+
"loss": 0.0163,
|
22657 |
+
"step": 3232
|
22658 |
+
},
|
22659 |
+
{
|
22660 |
+
"epoch": 0.7184444444444444,
|
22661 |
+
"grad_norm": 0.8078843951225281,
|
22662 |
+
"learning_rate": 5.643652561247216e-05,
|
22663 |
+
"loss": 0.8599,
|
22664 |
+
"step": 3233
|
22665 |
+
},
|
22666 |
+
{
|
22667 |
+
"epoch": 0.7186666666666667,
|
22668 |
+
"grad_norm": 1.0271605253219604,
|
22669 |
+
"learning_rate": 5.6391982182628065e-05,
|
22670 |
+
"loss": 1.415,
|
22671 |
+
"step": 3234
|
22672 |
+
},
|
22673 |
+
{
|
22674 |
+
"epoch": 0.7188888888888889,
|
22675 |
+
"grad_norm": 1.2213661670684814,
|
22676 |
+
"learning_rate": 5.634743875278396e-05,
|
22677 |
+
"loss": 1.8322,
|
22678 |
+
"step": 3235
|
22679 |
+
},
|
22680 |
+
{
|
22681 |
+
"epoch": 0.7191111111111111,
|
22682 |
+
"grad_norm": 1.0940077304840088,
|
22683 |
+
"learning_rate": 5.630289532293986e-05,
|
22684 |
+
"loss": 1.5287,
|
22685 |
+
"step": 3236
|
22686 |
+
},
|
22687 |
+
{
|
22688 |
+
"epoch": 0.7193333333333334,
|
22689 |
+
"grad_norm": 1.0005013942718506,
|
22690 |
+
"learning_rate": 5.6258351893095774e-05,
|
22691 |
+
"loss": 1.6254,
|
22692 |
+
"step": 3237
|
22693 |
+
},
|
22694 |
+
{
|
22695 |
+
"epoch": 0.7195555555555555,
|
22696 |
+
"grad_norm": 0.8303656578063965,
|
22697 |
+
"learning_rate": 5.621380846325167e-05,
|
22698 |
+
"loss": 1.027,
|
22699 |
+
"step": 3238
|
22700 |
+
},
|
22701 |
+
{
|
22702 |
+
"epoch": 0.7197777777777777,
|
22703 |
+
"grad_norm": 0.704897403717041,
|
22704 |
+
"learning_rate": 5.616926503340757e-05,
|
22705 |
+
"loss": 0.7988,
|
22706 |
+
"step": 3239
|
22707 |
+
},
|
22708 |
+
{
|
22709 |
+
"epoch": 0.72,
|
22710 |
+
"grad_norm": 1.0700993537902832,
|
22711 |
+
"learning_rate": 5.6124721603563476e-05,
|
22712 |
+
"loss": 1.7471,
|
22713 |
+
"step": 3240
|
22714 |
+
},
|
22715 |
+
{
|
22716 |
+
"epoch": 0.7202222222222222,
|
22717 |
+
"grad_norm": 1.1328794956207275,
|
22718 |
+
"learning_rate": 5.6080178173719375e-05,
|
22719 |
+
"loss": 1.2742,
|
22720 |
+
"step": 3241
|
22721 |
+
},
|
22722 |
+
{
|
22723 |
+
"epoch": 0.7204444444444444,
|
22724 |
+
"grad_norm": 0.9732044339179993,
|
22725 |
+
"learning_rate": 5.6035634743875274e-05,
|
22726 |
+
"loss": 1.3644,
|
22727 |
+
"step": 3242
|
22728 |
+
},
|
22729 |
+
{
|
22730 |
+
"epoch": 0.7206666666666667,
|
22731 |
+
"grad_norm": 1.174729347229004,
|
22732 |
+
"learning_rate": 5.5991091314031186e-05,
|
22733 |
+
"loss": 1.5483,
|
22734 |
+
"step": 3243
|
22735 |
+
},
|
22736 |
+
{
|
22737 |
+
"epoch": 0.7208888888888889,
|
22738 |
+
"grad_norm": 0.78294837474823,
|
22739 |
+
"learning_rate": 5.5946547884187084e-05,
|
22740 |
+
"loss": 0.6034,
|
22741 |
+
"step": 3244
|
22742 |
+
},
|
22743 |
+
{
|
22744 |
+
"epoch": 0.7211111111111111,
|
22745 |
+
"grad_norm": 0.8941324949264526,
|
22746 |
+
"learning_rate": 5.590200445434298e-05,
|
22747 |
+
"loss": 1.0701,
|
22748 |
+
"step": 3245
|
22749 |
+
},
|
22750 |
+
{
|
22751 |
+
"epoch": 0.7213333333333334,
|
22752 |
+
"grad_norm": 1.1886690855026245,
|
22753 |
+
"learning_rate": 5.5857461024498895e-05,
|
22754 |
+
"loss": 1.3666,
|
22755 |
+
"step": 3246
|
22756 |
+
},
|
22757 |
+
{
|
22758 |
+
"epoch": 0.7215555555555555,
|
22759 |
+
"grad_norm": 1.0057522058486938,
|
22760 |
+
"learning_rate": 5.581291759465479e-05,
|
22761 |
+
"loss": 1.1996,
|
22762 |
+
"step": 3247
|
22763 |
+
},
|
22764 |
+
{
|
22765 |
+
"epoch": 0.7217777777777777,
|
22766 |
+
"grad_norm": 0.839670717716217,
|
22767 |
+
"learning_rate": 5.5768374164810685e-05,
|
22768 |
+
"loss": 0.6609,
|
22769 |
+
"step": 3248
|
22770 |
+
},
|
22771 |
+
{
|
22772 |
+
"epoch": 0.722,
|
22773 |
+
"grad_norm": 1.1767035722732544,
|
22774 |
+
"learning_rate": 5.57238307349666e-05,
|
22775 |
+
"loss": 1.0941,
|
22776 |
+
"step": 3249
|
22777 |
+
},
|
22778 |
+
{
|
22779 |
+
"epoch": 0.7222222222222222,
|
22780 |
+
"grad_norm": 1.2154204845428467,
|
22781 |
+
"learning_rate": 5.5679287305122496e-05,
|
22782 |
+
"loss": 0.7415,
|
22783 |
+
"step": 3250
|
22784 |
+
},
|
22785 |
+
{
|
22786 |
+
"epoch": 0.7224444444444444,
|
22787 |
+
"grad_norm": 0.5861397385597229,
|
22788 |
+
"learning_rate": 5.5634743875278395e-05,
|
22789 |
+
"loss": 1.1437,
|
22790 |
+
"step": 3251
|
22791 |
+
},
|
22792 |
+
{
|
22793 |
+
"epoch": 0.7226666666666667,
|
22794 |
+
"grad_norm": 0.041759125888347626,
|
22795 |
+
"learning_rate": 5.559020044543431e-05,
|
22796 |
+
"loss": 0.0097,
|
22797 |
+
"step": 3252
|
22798 |
+
},
|
22799 |
+
{
|
22800 |
+
"epoch": 0.7228888888888889,
|
22801 |
+
"grad_norm": 0.7977886199951172,
|
22802 |
+
"learning_rate": 5.5545657015590205e-05,
|
22803 |
+
"loss": 2.1774,
|
22804 |
+
"step": 3253
|
22805 |
+
},
|
22806 |
+
{
|
22807 |
+
"epoch": 0.7231111111111111,
|
22808 |
+
"grad_norm": 0.571662425994873,
|
22809 |
+
"learning_rate": 5.5501113585746104e-05,
|
22810 |
+
"loss": 1.1858,
|
22811 |
+
"step": 3254
|
22812 |
+
},
|
22813 |
+
{
|
22814 |
+
"epoch": 0.7233333333333334,
|
22815 |
+
"grad_norm": 0.7104848027229309,
|
22816 |
+
"learning_rate": 5.545657015590201e-05,
|
22817 |
+
"loss": 1.0467,
|
22818 |
+
"step": 3255
|
22819 |
+
},
|
22820 |
+
{
|
22821 |
+
"epoch": 0.7235555555555555,
|
22822 |
+
"grad_norm": 0.8153942823410034,
|
22823 |
+
"learning_rate": 5.541202672605791e-05,
|
22824 |
+
"loss": 1.976,
|
22825 |
+
"step": 3256
|
22826 |
+
},
|
22827 |
+
{
|
22828 |
+
"epoch": 0.7237777777777777,
|
22829 |
+
"grad_norm": 0.08071549981832504,
|
22830 |
+
"learning_rate": 5.5367483296213806e-05,
|
22831 |
+
"loss": 0.0125,
|
22832 |
+
"step": 3257
|
22833 |
+
},
|
22834 |
+
{
|
22835 |
+
"epoch": 0.724,
|
22836 |
+
"grad_norm": 0.12843948602676392,
|
22837 |
+
"learning_rate": 5.532293986636972e-05,
|
22838 |
+
"loss": 0.0124,
|
22839 |
+
"step": 3258
|
22840 |
+
},
|
22841 |
+
{
|
22842 |
+
"epoch": 0.7242222222222222,
|
22843 |
+
"grad_norm": 0.07995433360338211,
|
22844 |
+
"learning_rate": 5.527839643652562e-05,
|
22845 |
+
"loss": 0.0122,
|
22846 |
+
"step": 3259
|
22847 |
+
},
|
22848 |
+
{
|
22849 |
+
"epoch": 0.7244444444444444,
|
22850 |
+
"grad_norm": 0.0713566243648529,
|
22851 |
+
"learning_rate": 5.5233853006681516e-05,
|
22852 |
+
"loss": 0.0119,
|
22853 |
+
"step": 3260
|
22854 |
+
},
|
22855 |
+
{
|
22856 |
+
"epoch": 0.7246666666666667,
|
22857 |
+
"grad_norm": 0.07306591421365738,
|
22858 |
+
"learning_rate": 5.518930957683742e-05,
|
22859 |
+
"loss": 0.0118,
|
22860 |
+
"step": 3261
|
22861 |
+
},
|
22862 |
+
{
|
22863 |
+
"epoch": 0.7248888888888889,
|
22864 |
+
"grad_norm": 0.8607704043388367,
|
22865 |
+
"learning_rate": 5.514476614699332e-05,
|
22866 |
+
"loss": 2.1437,
|
22867 |
+
"step": 3262
|
22868 |
+
},
|
22869 |
+
{
|
22870 |
+
"epoch": 0.7251111111111112,
|
22871 |
+
"grad_norm": 0.8772170543670654,
|
22872 |
+
"learning_rate": 5.510022271714922e-05,
|
22873 |
+
"loss": 1.9092,
|
22874 |
+
"step": 3263
|
22875 |
+
},
|
22876 |
+
{
|
22877 |
+
"epoch": 0.7253333333333334,
|
22878 |
+
"grad_norm": 0.9902425408363342,
|
22879 |
+
"learning_rate": 5.505567928730513e-05,
|
22880 |
+
"loss": 2.1999,
|
22881 |
+
"step": 3264
|
22882 |
+
},
|
22883 |
+
{
|
22884 |
+
"epoch": 0.7255555555555555,
|
22885 |
+
"grad_norm": 0.926304817199707,
|
22886 |
+
"learning_rate": 5.501113585746103e-05,
|
22887 |
+
"loss": 2.0622,
|
22888 |
+
"step": 3265
|
22889 |
+
},
|
22890 |
+
{
|
22891 |
+
"epoch": 0.7257777777777777,
|
22892 |
+
"grad_norm": 0.8717379570007324,
|
22893 |
+
"learning_rate": 5.496659242761693e-05,
|
22894 |
+
"loss": 1.6769,
|
22895 |
+
"step": 3266
|
22896 |
+
},
|
22897 |
+
{
|
22898 |
+
"epoch": 0.726,
|
22899 |
+
"grad_norm": 1.0354970693588257,
|
22900 |
+
"learning_rate": 5.492204899777283e-05,
|
22901 |
+
"loss": 1.9093,
|
22902 |
+
"step": 3267
|
22903 |
+
},
|
22904 |
+
{
|
22905 |
+
"epoch": 0.7262222222222222,
|
22906 |
+
"grad_norm": 0.9445512890815735,
|
22907 |
+
"learning_rate": 5.487750556792873e-05,
|
22908 |
+
"loss": 1.9806,
|
22909 |
+
"step": 3268
|
22910 |
+
},
|
22911 |
+
{
|
22912 |
+
"epoch": 0.7264444444444444,
|
22913 |
+
"grad_norm": 0.9720260500907898,
|
22914 |
+
"learning_rate": 5.483296213808463e-05,
|
22915 |
+
"loss": 1.793,
|
22916 |
+
"step": 3269
|
22917 |
+
},
|
22918 |
+
{
|
22919 |
+
"epoch": 0.7266666666666667,
|
22920 |
+
"grad_norm": 0.932304859161377,
|
22921 |
+
"learning_rate": 5.478841870824054e-05,
|
22922 |
+
"loss": 1.8463,
|
22923 |
+
"step": 3270
|
22924 |
+
},
|
22925 |
+
{
|
22926 |
+
"epoch": 0.7268888888888889,
|
22927 |
+
"grad_norm": 0.9925035238265991,
|
22928 |
+
"learning_rate": 5.474387527839644e-05,
|
22929 |
+
"loss": 1.9726,
|
22930 |
+
"step": 3271
|
22931 |
+
},
|
22932 |
+
{
|
22933 |
+
"epoch": 0.7271111111111112,
|
22934 |
+
"grad_norm": 0.5608296990394592,
|
22935 |
+
"learning_rate": 5.469933184855234e-05,
|
22936 |
+
"loss": 0.7764,
|
22937 |
+
"step": 3272
|
22938 |
+
},
|
22939 |
+
{
|
22940 |
+
"epoch": 0.7273333333333334,
|
22941 |
+
"grad_norm": 0.6601234674453735,
|
22942 |
+
"learning_rate": 5.4654788418708245e-05,
|
22943 |
+
"loss": 0.8271,
|
22944 |
+
"step": 3273
|
22945 |
+
},
|
22946 |
+
{
|
22947 |
+
"epoch": 0.7275555555555555,
|
22948 |
+
"grad_norm": 0.6779617071151733,
|
22949 |
+
"learning_rate": 5.461024498886414e-05,
|
22950 |
+
"loss": 0.9032,
|
22951 |
+
"step": 3274
|
22952 |
+
},
|
22953 |
+
{
|
22954 |
+
"epoch": 0.7277777777777777,
|
22955 |
+
"grad_norm": 0.9753432869911194,
|
22956 |
+
"learning_rate": 5.456570155902004e-05,
|
22957 |
+
"loss": 1.7793,
|
22958 |
+
"step": 3275
|
22959 |
+
},
|
22960 |
+
{
|
22961 |
+
"epoch": 0.728,
|
22962 |
+
"grad_norm": 0.9676978588104248,
|
22963 |
+
"learning_rate": 5.4521158129175954e-05,
|
22964 |
+
"loss": 1.6972,
|
22965 |
+
"step": 3276
|
22966 |
+
},
|
22967 |
+
{
|
22968 |
+
"epoch": 0.7282222222222222,
|
22969 |
+
"grad_norm": 1.093235969543457,
|
22970 |
+
"learning_rate": 5.447661469933185e-05,
|
22971 |
+
"loss": 2.0882,
|
22972 |
+
"step": 3277
|
22973 |
+
},
|
22974 |
+
{
|
22975 |
+
"epoch": 0.7284444444444444,
|
22976 |
+
"grad_norm": 1.0347819328308105,
|
22977 |
+
"learning_rate": 5.443207126948775e-05,
|
22978 |
+
"loss": 2.039,
|
22979 |
+
"step": 3278
|
22980 |
+
},
|
22981 |
+
{
|
22982 |
+
"epoch": 0.7286666666666667,
|
22983 |
+
"grad_norm": 0.071097731590271,
|
22984 |
+
"learning_rate": 5.4387527839643657e-05,
|
22985 |
+
"loss": 0.0174,
|
22986 |
+
"step": 3279
|
22987 |
+
},
|
22988 |
+
{
|
22989 |
+
"epoch": 0.7288888888888889,
|
22990 |
+
"grad_norm": 0.9010851383209229,
|
22991 |
+
"learning_rate": 5.4342984409799555e-05,
|
22992 |
+
"loss": 1.0428,
|
22993 |
+
"step": 3280
|
22994 |
+
},
|
22995 |
+
{
|
22996 |
+
"epoch": 0.7291111111111112,
|
22997 |
+
"grad_norm": 0.07293925434350967,
|
22998 |
+
"learning_rate": 5.4298440979955454e-05,
|
22999 |
+
"loss": 0.0173,
|
23000 |
+
"step": 3281
|
23001 |
+
},
|
23002 |
+
{
|
23003 |
+
"epoch": 0.7293333333333333,
|
23004 |
+
"grad_norm": 1.1432619094848633,
|
23005 |
+
"learning_rate": 5.4253897550111366e-05,
|
23006 |
+
"loss": 1.8629,
|
23007 |
+
"step": 3282
|
23008 |
+
},
|
23009 |
+
{
|
23010 |
+
"epoch": 0.7295555555555555,
|
23011 |
+
"grad_norm": 1.1886756420135498,
|
23012 |
+
"learning_rate": 5.4209354120267264e-05,
|
23013 |
+
"loss": 1.6837,
|
23014 |
+
"step": 3283
|
23015 |
+
},
|
23016 |
+
{
|
23017 |
+
"epoch": 0.7297777777777777,
|
23018 |
+
"grad_norm": 1.0832699537277222,
|
23019 |
+
"learning_rate": 5.416481069042316e-05,
|
23020 |
+
"loss": 1.536,
|
23021 |
+
"step": 3284
|
23022 |
+
},
|
23023 |
+
{
|
23024 |
+
"epoch": 0.73,
|
23025 |
+
"grad_norm": 0.6643537878990173,
|
23026 |
+
"learning_rate": 5.412026726057907e-05,
|
23027 |
+
"loss": 0.7855,
|
23028 |
+
"step": 3285
|
23029 |
+
},
|
23030 |
+
{
|
23031 |
+
"epoch": 0.7302222222222222,
|
23032 |
+
"grad_norm": 1.0094225406646729,
|
23033 |
+
"learning_rate": 5.407572383073497e-05,
|
23034 |
+
"loss": 1.4139,
|
23035 |
+
"step": 3286
|
23036 |
+
},
|
23037 |
+
{
|
23038 |
+
"epoch": 0.7304444444444445,
|
23039 |
+
"grad_norm": 1.14029860496521,
|
23040 |
+
"learning_rate": 5.4031180400890866e-05,
|
23041 |
+
"loss": 1.5854,
|
23042 |
+
"step": 3287
|
23043 |
+
},
|
23044 |
+
{
|
23045 |
+
"epoch": 0.7306666666666667,
|
23046 |
+
"grad_norm": 0.9698799848556519,
|
23047 |
+
"learning_rate": 5.398663697104678e-05,
|
23048 |
+
"loss": 1.4863,
|
23049 |
+
"step": 3288
|
23050 |
+
},
|
23051 |
+
{
|
23052 |
+
"epoch": 0.7308888888888889,
|
23053 |
+
"grad_norm": 1.1054226160049438,
|
23054 |
+
"learning_rate": 5.3942093541202676e-05,
|
23055 |
+
"loss": 1.3324,
|
23056 |
+
"step": 3289
|
23057 |
+
},
|
23058 |
+
{
|
23059 |
+
"epoch": 0.7311111111111112,
|
23060 |
+
"grad_norm": 1.1010569334030151,
|
23061 |
+
"learning_rate": 5.3897550111358575e-05,
|
23062 |
+
"loss": 1.4656,
|
23063 |
+
"step": 3290
|
23064 |
+
},
|
23065 |
+
{
|
23066 |
+
"epoch": 0.7313333333333333,
|
23067 |
+
"grad_norm": 1.315499186515808,
|
23068 |
+
"learning_rate": 5.385300668151449e-05,
|
23069 |
+
"loss": 1.4048,
|
23070 |
+
"step": 3291
|
23071 |
+
},
|
23072 |
+
{
|
23073 |
+
"epoch": 0.7315555555555555,
|
23074 |
+
"grad_norm": 1.108127474784851,
|
23075 |
+
"learning_rate": 5.3808463251670386e-05,
|
23076 |
+
"loss": 1.327,
|
23077 |
+
"step": 3292
|
23078 |
+
},
|
23079 |
+
{
|
23080 |
+
"epoch": 0.7317777777777777,
|
23081 |
+
"grad_norm": 0.6661926507949829,
|
23082 |
+
"learning_rate": 5.376391982182628e-05,
|
23083 |
+
"loss": 0.6108,
|
23084 |
+
"step": 3293
|
23085 |
+
},
|
23086 |
+
{
|
23087 |
+
"epoch": 0.732,
|
23088 |
+
"grad_norm": 0.9805776476860046,
|
23089 |
+
"learning_rate": 5.371937639198219e-05,
|
23090 |
+
"loss": 1.1752,
|
23091 |
+
"step": 3294
|
23092 |
+
},
|
23093 |
+
{
|
23094 |
+
"epoch": 0.7322222222222222,
|
23095 |
+
"grad_norm": 1.0693986415863037,
|
23096 |
+
"learning_rate": 5.367483296213809e-05,
|
23097 |
+
"loss": 1.3078,
|
23098 |
+
"step": 3295
|
23099 |
+
},
|
23100 |
+
{
|
23101 |
+
"epoch": 0.7324444444444445,
|
23102 |
+
"grad_norm": 1.078148603439331,
|
23103 |
+
"learning_rate": 5.363028953229399e-05,
|
23104 |
+
"loss": 1.2446,
|
23105 |
+
"step": 3296
|
23106 |
+
},
|
23107 |
+
{
|
23108 |
+
"epoch": 0.7326666666666667,
|
23109 |
+
"grad_norm": 1.1625440120697021,
|
23110 |
+
"learning_rate": 5.35857461024499e-05,
|
23111 |
+
"loss": 1.2387,
|
23112 |
+
"step": 3297
|
23113 |
+
},
|
23114 |
+
{
|
23115 |
+
"epoch": 0.7328888888888889,
|
23116 |
+
"grad_norm": 1.1278488636016846,
|
23117 |
+
"learning_rate": 5.35412026726058e-05,
|
23118 |
+
"loss": 1.1962,
|
23119 |
+
"step": 3298
|
23120 |
+
},
|
23121 |
+
{
|
23122 |
+
"epoch": 0.7331111111111112,
|
23123 |
+
"grad_norm": 1.182511806488037,
|
23124 |
+
"learning_rate": 5.3496659242761696e-05,
|
23125 |
+
"loss": 1.1573,
|
23126 |
+
"step": 3299
|
23127 |
+
},
|
23128 |
+
{
|
23129 |
+
"epoch": 0.7333333333333333,
|
23130 |
+
"grad_norm": 1.1381057500839233,
|
23131 |
+
"learning_rate": 5.34521158129176e-05,
|
23132 |
+
"loss": 0.7817,
|
23133 |
+
"step": 3300
|
23134 |
+
},
|
23135 |
+
{
|
23136 |
+
"epoch": 0.7335555555555555,
|
23137 |
+
"grad_norm": 0.5531929135322571,
|
23138 |
+
"learning_rate": 5.34075723830735e-05,
|
23139 |
+
"loss": 0.8331,
|
23140 |
+
"step": 3301
|
23141 |
+
},
|
23142 |
+
{
|
23143 |
+
"epoch": 0.7337777777777778,
|
23144 |
+
"grad_norm": 0.8333101868629456,
|
23145 |
+
"learning_rate": 5.33630289532294e-05,
|
23146 |
+
"loss": 1.9768,
|
23147 |
+
"step": 3302
|
23148 |
+
},
|
23149 |
+
{
|
23150 |
+
"epoch": 0.734,
|
23151 |
+
"grad_norm": 0.6918635964393616,
|
23152 |
+
"learning_rate": 5.331848552338531e-05,
|
23153 |
+
"loss": 1.0828,
|
23154 |
+
"step": 3303
|
23155 |
+
},
|
23156 |
+
{
|
23157 |
+
"epoch": 0.7342222222222222,
|
23158 |
+
"grad_norm": 0.9859722256660461,
|
23159 |
+
"learning_rate": 5.327394209354121e-05,
|
23160 |
+
"loss": 2.2754,
|
23161 |
+
"step": 3304
|
23162 |
+
},
|
23163 |
+
{
|
23164 |
+
"epoch": 0.7344444444444445,
|
23165 |
+
"grad_norm": 0.6960622072219849,
|
23166 |
+
"learning_rate": 5.322939866369711e-05,
|
23167 |
+
"loss": 1.0663,
|
23168 |
+
"step": 3305
|
23169 |
+
},
|
23170 |
+
{
|
23171 |
+
"epoch": 0.7346666666666667,
|
23172 |
+
"grad_norm": 1.1575109958648682,
|
23173 |
+
"learning_rate": 5.3184855233853006e-05,
|
23174 |
+
"loss": 2.2622,
|
23175 |
+
"step": 3306
|
23176 |
+
},
|
23177 |
+
{
|
23178 |
+
"epoch": 0.7348888888888889,
|
23179 |
+
"grad_norm": 0.5985379219055176,
|
23180 |
+
"learning_rate": 5.314031180400891e-05,
|
23181 |
+
"loss": 1.0319,
|
23182 |
+
"step": 3307
|
23183 |
+
},
|
23184 |
+
{
|
23185 |
+
"epoch": 0.7351111111111112,
|
23186 |
+
"grad_norm": 0.06290951371192932,
|
23187 |
+
"learning_rate": 5.309576837416481e-05,
|
23188 |
+
"loss": 0.0109,
|
23189 |
+
"step": 3308
|
23190 |
+
},
|
23191 |
+
{
|
23192 |
+
"epoch": 0.7353333333333333,
|
23193 |
+
"grad_norm": 0.06811843812465668,
|
23194 |
+
"learning_rate": 5.305122494432071e-05,
|
23195 |
+
"loss": 0.0109,
|
23196 |
+
"step": 3309
|
23197 |
+
},
|
23198 |
+
{
|
23199 |
+
"epoch": 0.7355555555555555,
|
23200 |
+
"grad_norm": 0.06429023295640945,
|
23201 |
+
"learning_rate": 5.300668151447662e-05,
|
23202 |
+
"loss": 0.0107,
|
23203 |
+
"step": 3310
|
23204 |
+
},
|
23205 |
+
{
|
23206 |
+
"epoch": 0.7357777777777778,
|
23207 |
+
"grad_norm": 0.06323552876710892,
|
23208 |
+
"learning_rate": 5.296213808463252e-05,
|
23209 |
+
"loss": 0.0107,
|
23210 |
+
"step": 3311
|
23211 |
+
},
|
23212 |
+
{
|
23213 |
+
"epoch": 0.736,
|
23214 |
+
"grad_norm": 0.6487092971801758,
|
23215 |
+
"learning_rate": 5.291759465478842e-05,
|
23216 |
+
"loss": 0.9286,
|
23217 |
+
"step": 3312
|
23218 |
+
},
|
23219 |
+
{
|
23220 |
+
"epoch": 0.7362222222222222,
|
23221 |
+
"grad_norm": 0.8638578653335571,
|
23222 |
+
"learning_rate": 5.2873051224944324e-05,
|
23223 |
+
"loss": 1.8427,
|
23224 |
+
"step": 3313
|
23225 |
+
},
|
23226 |
+
{
|
23227 |
+
"epoch": 0.7364444444444445,
|
23228 |
+
"grad_norm": 0.9095218181610107,
|
23229 |
+
"learning_rate": 5.282850779510022e-05,
|
23230 |
+
"loss": 2.0546,
|
23231 |
+
"step": 3314
|
23232 |
+
},
|
23233 |
+
{
|
23234 |
+
"epoch": 0.7366666666666667,
|
23235 |
+
"grad_norm": 0.87845379114151,
|
23236 |
+
"learning_rate": 5.278396436525612e-05,
|
23237 |
+
"loss": 1.9648,
|
23238 |
+
"step": 3315
|
23239 |
+
},
|
23240 |
+
{
|
23241 |
+
"epoch": 0.7368888888888889,
|
23242 |
+
"grad_norm": 0.8854038119316101,
|
23243 |
+
"learning_rate": 5.273942093541203e-05,
|
23244 |
+
"loss": 1.8114,
|
23245 |
+
"step": 3316
|
23246 |
+
},
|
23247 |
+
{
|
23248 |
+
"epoch": 0.7371111111111112,
|
23249 |
+
"grad_norm": 0.5725350379943848,
|
23250 |
+
"learning_rate": 5.269487750556793e-05,
|
23251 |
+
"loss": 1.0721,
|
23252 |
+
"step": 3317
|
23253 |
+
},
|
23254 |
+
{
|
23255 |
+
"epoch": 0.7373333333333333,
|
23256 |
+
"grad_norm": 0.6683716177940369,
|
23257 |
+
"learning_rate": 5.265033407572383e-05,
|
23258 |
+
"loss": 0.9192,
|
23259 |
+
"step": 3318
|
23260 |
+
},
|
23261 |
+
{
|
23262 |
+
"epoch": 0.7375555555555555,
|
23263 |
+
"grad_norm": 0.9927780628204346,
|
23264 |
+
"learning_rate": 5.2605790645879735e-05,
|
23265 |
+
"loss": 1.8748,
|
23266 |
+
"step": 3319
|
23267 |
+
},
|
23268 |
+
{
|
23269 |
+
"epoch": 0.7377777777777778,
|
23270 |
+
"grad_norm": 0.8612250685691833,
|
23271 |
+
"learning_rate": 5.2561247216035634e-05,
|
23272 |
+
"loss": 1.8307,
|
23273 |
+
"step": 3320
|
23274 |
+
},
|
23275 |
+
{
|
23276 |
+
"epoch": 0.738,
|
23277 |
+
"grad_norm": 0.9024035930633545,
|
23278 |
+
"learning_rate": 5.251670378619153e-05,
|
23279 |
+
"loss": 1.8448,
|
23280 |
+
"step": 3321
|
23281 |
+
},
|
23282 |
+
{
|
23283 |
+
"epoch": 0.7382222222222222,
|
23284 |
+
"grad_norm": 0.969914436340332,
|
23285 |
+
"learning_rate": 5.2472160356347445e-05,
|
23286 |
+
"loss": 1.866,
|
23287 |
+
"step": 3322
|
23288 |
+
},
|
23289 |
+
{
|
23290 |
+
"epoch": 0.7384444444444445,
|
23291 |
+
"grad_norm": 0.6315984129905701,
|
23292 |
+
"learning_rate": 5.242761692650334e-05,
|
23293 |
+
"loss": 0.9124,
|
23294 |
+
"step": 3323
|
23295 |
+
},
|
23296 |
+
{
|
23297 |
+
"epoch": 0.7386666666666667,
|
23298 |
+
"grad_norm": 0.07167524099349976,
|
23299 |
+
"learning_rate": 5.238307349665924e-05,
|
23300 |
+
"loss": 0.0158,
|
23301 |
+
"step": 3324
|
23302 |
+
},
|
23303 |
+
{
|
23304 |
+
"epoch": 0.7388888888888889,
|
23305 |
+
"grad_norm": 0.07736406475305557,
|
23306 |
+
"learning_rate": 5.233853006681515e-05,
|
23307 |
+
"loss": 0.0161,
|
23308 |
+
"step": 3325
|
23309 |
+
},
|
23310 |
+
{
|
23311 |
+
"epoch": 0.7391111111111112,
|
23312 |
+
"grad_norm": 0.07857107371091843,
|
23313 |
+
"learning_rate": 5.2293986636971046e-05,
|
23314 |
+
"loss": 0.0164,
|
23315 |
+
"step": 3326
|
23316 |
+
},
|
23317 |
+
{
|
23318 |
+
"epoch": 0.7393333333333333,
|
23319 |
+
"grad_norm": 0.0633215382695198,
|
23320 |
+
"learning_rate": 5.2249443207126944e-05,
|
23321 |
+
"loss": 0.0173,
|
23322 |
+
"step": 3327
|
23323 |
+
},
|
23324 |
+
{
|
23325 |
+
"epoch": 0.7395555555555555,
|
23326 |
+
"grad_norm": 0.7630808353424072,
|
23327 |
+
"learning_rate": 5.2204899777282857e-05,
|
23328 |
+
"loss": 0.9757,
|
23329 |
+
"step": 3328
|
23330 |
+
},
|
23331 |
+
{
|
23332 |
+
"epoch": 0.7397777777777778,
|
23333 |
+
"grad_norm": 0.8969722986221313,
|
23334 |
+
"learning_rate": 5.2160356347438755e-05,
|
23335 |
+
"loss": 1.6171,
|
23336 |
+
"step": 3329
|
23337 |
+
},
|
23338 |
+
{
|
23339 |
+
"epoch": 0.74,
|
23340 |
+
"grad_norm": 0.9955383539199829,
|
23341 |
+
"learning_rate": 5.2115812917594654e-05,
|
23342 |
+
"loss": 1.6627,
|
23343 |
+
"step": 3330
|
23344 |
+
},
|
23345 |
+
{
|
23346 |
+
"epoch": 0.7402222222222222,
|
23347 |
+
"grad_norm": 1.0531073808670044,
|
23348 |
+
"learning_rate": 5.2071269487750566e-05,
|
23349 |
+
"loss": 1.7925,
|
23350 |
+
"step": 3331
|
23351 |
+
},
|
23352 |
+
{
|
23353 |
+
"epoch": 0.7404444444444445,
|
23354 |
+
"grad_norm": 1.1096101999282837,
|
23355 |
+
"learning_rate": 5.202672605790646e-05,
|
23356 |
+
"loss": 1.4716,
|
23357 |
+
"step": 3332
|
23358 |
+
},
|
23359 |
+
{
|
23360 |
+
"epoch": 0.7406666666666667,
|
23361 |
+
"grad_norm": 0.06471211463212967,
|
23362 |
+
"learning_rate": 5.1982182628062356e-05,
|
23363 |
+
"loss": 0.0184,
|
23364 |
+
"step": 3333
|
23365 |
+
},
|
23366 |
+
{
|
23367 |
+
"epoch": 0.7408888888888889,
|
23368 |
+
"grad_norm": 0.07156452536582947,
|
23369 |
+
"learning_rate": 5.193763919821827e-05,
|
23370 |
+
"loss": 0.0183,
|
23371 |
+
"step": 3334
|
23372 |
+
},
|
23373 |
+
{
|
23374 |
+
"epoch": 0.7411111111111112,
|
23375 |
+
"grad_norm": 0.7111669182777405,
|
23376 |
+
"learning_rate": 5.189309576837417e-05,
|
23377 |
+
"loss": 0.8435,
|
23378 |
+
"step": 3335
|
23379 |
+
},
|
23380 |
+
{
|
23381 |
+
"epoch": 0.7413333333333333,
|
23382 |
+
"grad_norm": 0.784017026424408,
|
23383 |
+
"learning_rate": 5.1848552338530066e-05,
|
23384 |
+
"loss": 0.9037,
|
23385 |
+
"step": 3336
|
23386 |
+
},
|
23387 |
+
{
|
23388 |
+
"epoch": 0.7415555555555555,
|
23389 |
+
"grad_norm": 0.07464414834976196,
|
23390 |
+
"learning_rate": 5.180400890868598e-05,
|
23391 |
+
"loss": 0.0173,
|
23392 |
+
"step": 3337
|
23393 |
+
},
|
23394 |
+
{
|
23395 |
+
"epoch": 0.7417777777777778,
|
23396 |
+
"grad_norm": 0.7238468527793884,
|
23397 |
+
"learning_rate": 5.1759465478841876e-05,
|
23398 |
+
"loss": 0.8807,
|
23399 |
+
"step": 3338
|
23400 |
+
},
|
23401 |
+
{
|
23402 |
+
"epoch": 0.742,
|
23403 |
+
"grad_norm": 0.07420375943183899,
|
23404 |
+
"learning_rate": 5.1714922048997775e-05,
|
23405 |
+
"loss": 0.0192,
|
23406 |
+
"step": 3339
|
23407 |
+
},
|
23408 |
+
{
|
23409 |
+
"epoch": 0.7422222222222222,
|
23410 |
+
"grad_norm": 0.07133994251489639,
|
23411 |
+
"learning_rate": 5.167037861915368e-05,
|
23412 |
+
"loss": 0.0189,
|
23413 |
+
"step": 3340
|
23414 |
+
},
|
23415 |
+
{
|
23416 |
+
"epoch": 0.7424444444444445,
|
23417 |
+
"grad_norm": 0.0961189940571785,
|
23418 |
+
"learning_rate": 5.162583518930958e-05,
|
23419 |
+
"loss": 0.0194,
|
23420 |
+
"step": 3341
|
23421 |
+
},
|
23422 |
+
{
|
23423 |
+
"epoch": 0.7426666666666667,
|
23424 |
+
"grad_norm": 1.0209311246871948,
|
23425 |
+
"learning_rate": 5.158129175946548e-05,
|
23426 |
+
"loss": 1.7523,
|
23427 |
+
"step": 3342
|
23428 |
+
},
|
23429 |
+
{
|
23430 |
+
"epoch": 0.7428888888888889,
|
23431 |
+
"grad_norm": 1.067814588546753,
|
23432 |
+
"learning_rate": 5.153674832962139e-05,
|
23433 |
+
"loss": 1.7394,
|
23434 |
+
"step": 3343
|
23435 |
+
},
|
23436 |
+
{
|
23437 |
+
"epoch": 0.7431111111111111,
|
23438 |
+
"grad_norm": 0.10426237434148788,
|
23439 |
+
"learning_rate": 5.149220489977729e-05,
|
23440 |
+
"loss": 0.0266,
|
23441 |
+
"step": 3344
|
23442 |
+
},
|
23443 |
+
{
|
23444 |
+
"epoch": 0.7433333333333333,
|
23445 |
+
"grad_norm": 1.1256235837936401,
|
23446 |
+
"learning_rate": 5.144766146993319e-05,
|
23447 |
+
"loss": 1.5493,
|
23448 |
+
"step": 3345
|
23449 |
+
},
|
23450 |
+
{
|
23451 |
+
"epoch": 0.7435555555555555,
|
23452 |
+
"grad_norm": 1.0838463306427002,
|
23453 |
+
"learning_rate": 5.140311804008909e-05,
|
23454 |
+
"loss": 1.508,
|
23455 |
+
"step": 3346
|
23456 |
+
},
|
23457 |
+
{
|
23458 |
+
"epoch": 0.7437777777777778,
|
23459 |
+
"grad_norm": 1.0034325122833252,
|
23460 |
+
"learning_rate": 5.135857461024499e-05,
|
23461 |
+
"loss": 1.3716,
|
23462 |
+
"step": 3347
|
23463 |
+
},
|
23464 |
+
{
|
23465 |
+
"epoch": 0.744,
|
23466 |
+
"grad_norm": 1.1057904958724976,
|
23467 |
+
"learning_rate": 5.131403118040089e-05,
|
23468 |
+
"loss": 0.9587,
|
23469 |
+
"step": 3348
|
23470 |
+
},
|
23471 |
+
{
|
23472 |
+
"epoch": 0.7442222222222222,
|
23473 |
+
"grad_norm": 0.19667142629623413,
|
23474 |
+
"learning_rate": 5.12694877505568e-05,
|
23475 |
+
"loss": 0.0377,
|
23476 |
+
"step": 3349
|
23477 |
+
},
|
23478 |
+
{
|
23479 |
+
"epoch": 0.7444444444444445,
|
23480 |
+
"grad_norm": 1.0404895544052124,
|
23481 |
+
"learning_rate": 5.12249443207127e-05,
|
23482 |
+
"loss": 1.0799,
|
23483 |
+
"step": 3350
|
23484 |
+
},
|
23485 |
+
{
|
23486 |
+
"epoch": 0.7446666666666667,
|
23487 |
+
"grad_norm": 0.8521629571914673,
|
23488 |
+
"learning_rate": 5.11804008908686e-05,
|
23489 |
+
"loss": 2.0826,
|
23490 |
+
"step": 3351
|
23491 |
+
},
|
23492 |
+
{
|
23493 |
+
"epoch": 0.7448888888888889,
|
23494 |
+
"grad_norm": 0.046493686735630035,
|
23495 |
+
"learning_rate": 5.1135857461024504e-05,
|
23496 |
+
"loss": 0.0101,
|
23497 |
+
"step": 3352
|
23498 |
+
},
|
23499 |
+
{
|
23500 |
+
"epoch": 0.7451111111111111,
|
23501 |
+
"grad_norm": 0.04533799737691879,
|
23502 |
+
"learning_rate": 5.10913140311804e-05,
|
23503 |
+
"loss": 0.0102,
|
23504 |
+
"step": 3353
|
23505 |
+
},
|
23506 |
+
{
|
23507 |
+
"epoch": 0.7453333333333333,
|
23508 |
+
"grad_norm": 0.6256393194198608,
|
23509 |
+
"learning_rate": 5.10467706013363e-05,
|
23510 |
+
"loss": 1.2161,
|
23511 |
+
"step": 3354
|
23512 |
+
},
|
23513 |
+
{
|
23514 |
+
"epoch": 0.7455555555555555,
|
23515 |
+
"grad_norm": 0.5878841280937195,
|
23516 |
+
"learning_rate": 5.100222717149221e-05,
|
23517 |
+
"loss": 1.1603,
|
23518 |
+
"step": 3355
|
23519 |
+
},
|
23520 |
+
{
|
23521 |
+
"epoch": 0.7457777777777778,
|
23522 |
+
"grad_norm": 0.04651748016476631,
|
23523 |
+
"learning_rate": 5.095768374164811e-05,
|
23524 |
+
"loss": 0.01,
|
23525 |
+
"step": 3356
|
23526 |
+
},
|
23527 |
+
{
|
23528 |
+
"epoch": 0.746,
|
23529 |
+
"grad_norm": 0.03794243186712265,
|
23530 |
+
"learning_rate": 5.091314031180401e-05,
|
23531 |
+
"loss": 0.01,
|
23532 |
+
"step": 3357
|
23533 |
+
},
|
23534 |
+
{
|
23535 |
+
"epoch": 0.7462222222222222,
|
23536 |
+
"grad_norm": 0.04922659322619438,
|
23537 |
+
"learning_rate": 5.0868596881959916e-05,
|
23538 |
+
"loss": 0.0097,
|
23539 |
+
"step": 3358
|
23540 |
+
},
|
23541 |
+
{
|
23542 |
+
"epoch": 0.7464444444444445,
|
23543 |
+
"grad_norm": 0.8625622391700745,
|
23544 |
+
"learning_rate": 5.0824053452115814e-05,
|
23545 |
+
"loss": 1.8859,
|
23546 |
+
"step": 3359
|
23547 |
+
},
|
23548 |
+
{
|
23549 |
+
"epoch": 0.7466666666666667,
|
23550 |
+
"grad_norm": 0.8704177141189575,
|
23551 |
+
"learning_rate": 5.077951002227171e-05,
|
23552 |
+
"loss": 1.9087,
|
23553 |
+
"step": 3360
|
23554 |
+
},
|
23555 |
+
{
|
23556 |
+
"epoch": 0.7468888888888889,
|
23557 |
+
"grad_norm": 0.9514003992080688,
|
23558 |
+
"learning_rate": 5.0734966592427625e-05,
|
23559 |
+
"loss": 2.1152,
|
23560 |
+
"step": 3361
|
23561 |
+
},
|
23562 |
+
{
|
23563 |
+
"epoch": 0.7471111111111111,
|
23564 |
+
"grad_norm": 0.9952490925788879,
|
23565 |
+
"learning_rate": 5.0690423162583524e-05,
|
23566 |
+
"loss": 2.237,
|
23567 |
+
"step": 3362
|
23568 |
+
},
|
23569 |
+
{
|
23570 |
+
"epoch": 0.7473333333333333,
|
23571 |
+
"grad_norm": 1.0425519943237305,
|
23572 |
+
"learning_rate": 5.064587973273942e-05,
|
23573 |
+
"loss": 2.1412,
|
23574 |
+
"step": 3363
|
23575 |
+
},
|
23576 |
+
{
|
23577 |
+
"epoch": 0.7475555555555555,
|
23578 |
+
"grad_norm": 0.7753322124481201,
|
23579 |
+
"learning_rate": 5.060133630289533e-05,
|
23580 |
+
"loss": 1.7639,
|
23581 |
+
"step": 3364
|
23582 |
+
},
|
23583 |
+
{
|
23584 |
+
"epoch": 0.7477777777777778,
|
23585 |
+
"grad_norm": 0.9439111351966858,
|
23586 |
+
"learning_rate": 5.0556792873051226e-05,
|
23587 |
+
"loss": 1.7622,
|
23588 |
+
"step": 3365
|
23589 |
+
},
|
23590 |
+
{
|
23591 |
+
"epoch": 0.748,
|
23592 |
+
"grad_norm": 0.9274625778198242,
|
23593 |
+
"learning_rate": 5.0512249443207125e-05,
|
23594 |
+
"loss": 2.1017,
|
23595 |
+
"step": 3366
|
23596 |
+
},
|
23597 |
+
{
|
23598 |
+
"epoch": 0.7482222222222222,
|
23599 |
+
"grad_norm": 0.9550508856773376,
|
23600 |
+
"learning_rate": 5.046770601336304e-05,
|
23601 |
+
"loss": 1.8416,
|
23602 |
+
"step": 3367
|
23603 |
+
},
|
23604 |
+
{
|
23605 |
+
"epoch": 0.7484444444444445,
|
23606 |
+
"grad_norm": 0.8628423810005188,
|
23607 |
+
"learning_rate": 5.0423162583518935e-05,
|
23608 |
+
"loss": 1.9227,
|
23609 |
+
"step": 3368
|
23610 |
+
},
|
23611 |
+
{
|
23612 |
+
"epoch": 0.7486666666666667,
|
23613 |
+
"grad_norm": 1.0649088621139526,
|
23614 |
+
"learning_rate": 5.0378619153674834e-05,
|
23615 |
+
"loss": 2.1865,
|
23616 |
+
"step": 3369
|
23617 |
+
},
|
23618 |
+
{
|
23619 |
+
"epoch": 0.7488888888888889,
|
23620 |
+
"grad_norm": 0.9452845454216003,
|
23621 |
+
"learning_rate": 5.033407572383074e-05,
|
23622 |
+
"loss": 1.9341,
|
23623 |
+
"step": 3370
|
23624 |
+
},
|
23625 |
+
{
|
23626 |
+
"epoch": 0.7491111111111111,
|
23627 |
+
"grad_norm": 0.9852356910705566,
|
23628 |
+
"learning_rate": 5.028953229398664e-05,
|
23629 |
+
"loss": 1.6767,
|
23630 |
+
"step": 3371
|
23631 |
+
},
|
23632 |
+
{
|
23633 |
+
"epoch": 0.7493333333333333,
|
23634 |
+
"grad_norm": 0.9458546042442322,
|
23635 |
+
"learning_rate": 5.0244988864142536e-05,
|
23636 |
+
"loss": 1.786,
|
23637 |
+
"step": 3372
|
23638 |
+
},
|
23639 |
+
{
|
23640 |
+
"epoch": 0.7495555555555555,
|
23641 |
+
"grad_norm": 0.07178652286529541,
|
23642 |
+
"learning_rate": 5.020044543429845e-05,
|
23643 |
+
"loss": 0.015,
|
23644 |
+
"step": 3373
|
23645 |
+
},
|
23646 |
+
{
|
23647 |
+
"epoch": 0.7497777777777778,
|
23648 |
+
"grad_norm": 0.07055787742137909,
|
23649 |
+
"learning_rate": 5.015590200445435e-05,
|
23650 |
+
"loss": 0.0153,
|
23651 |
+
"step": 3374
|
23652 |
+
},
|
23653 |
+
{
|
23654 |
+
"epoch": 0.75,
|
23655 |
+
"grad_norm": 0.6104269027709961,
|
23656 |
+
"learning_rate": 5.0111358574610246e-05,
|
23657 |
+
"loss": 0.8618,
|
23658 |
+
"step": 3375
|
23659 |
+
},
|
23660 |
+
{
|
23661 |
+
"epoch": 0.7502222222222222,
|
23662 |
+
"grad_norm": 0.6599386930465698,
|
23663 |
+
"learning_rate": 5.006681514476616e-05,
|
23664 |
+
"loss": 0.8642,
|
23665 |
+
"step": 3376
|
23666 |
+
},
|
23667 |
+
{
|
23668 |
+
"epoch": 0.7504444444444445,
|
23669 |
+
"grad_norm": 0.6750035881996155,
|
23670 |
+
"learning_rate": 5.0022271714922056e-05,
|
23671 |
+
"loss": 0.8647,
|
23672 |
+
"step": 3377
|
23673 |
+
},
|
23674 |
+
{
|
23675 |
+
"epoch": 0.7506666666666667,
|
23676 |
+
"grad_norm": 0.9692963361740112,
|
23677 |
+
"learning_rate": 4.997772828507795e-05,
|
23678 |
+
"loss": 1.8036,
|
23679 |
+
"step": 3378
|
23680 |
+
},
|
23681 |
+
{
|
23682 |
+
"epoch": 0.7508888888888889,
|
23683 |
+
"grad_norm": 1.0836691856384277,
|
23684 |
+
"learning_rate": 4.9933184855233854e-05,
|
23685 |
+
"loss": 2.039,
|
23686 |
+
"step": 3379
|
23687 |
+
},
|
23688 |
+
{
|
23689 |
+
"epoch": 0.7511111111111111,
|
23690 |
+
"grad_norm": 0.06479348987340927,
|
23691 |
+
"learning_rate": 4.988864142538976e-05,
|
23692 |
+
"loss": 0.0173,
|
23693 |
+
"step": 3380
|
23694 |
+
},
|
23695 |
+
{
|
23696 |
+
"epoch": 0.7513333333333333,
|
23697 |
+
"grad_norm": 0.06957981735467911,
|
23698 |
+
"learning_rate": 4.984409799554566e-05,
|
23699 |
+
"loss": 0.0166,
|
23700 |
+
"step": 3381
|
23701 |
+
},
|
23702 |
+
{
|
23703 |
+
"epoch": 0.7515555555555555,
|
23704 |
+
"grad_norm": 0.666901707649231,
|
23705 |
+
"learning_rate": 4.979955456570156e-05,
|
23706 |
+
"loss": 0.7578,
|
23707 |
+
"step": 3382
|
23708 |
+
},
|
23709 |
+
{
|
23710 |
+
"epoch": 0.7517777777777778,
|
23711 |
+
"grad_norm": 1.0305155515670776,
|
23712 |
+
"learning_rate": 4.975501113585747e-05,
|
23713 |
+
"loss": 1.6703,
|
23714 |
+
"step": 3383
|
23715 |
+
},
|
23716 |
+
{
|
23717 |
+
"epoch": 0.752,
|
23718 |
+
"grad_norm": 0.9969210624694824,
|
23719 |
+
"learning_rate": 4.971046770601337e-05,
|
23720 |
+
"loss": 1.7831,
|
23721 |
+
"step": 3384
|
23722 |
+
},
|
23723 |
+
{
|
23724 |
+
"epoch": 0.7522222222222222,
|
23725 |
+
"grad_norm": 0.068308025598526,
|
23726 |
+
"learning_rate": 4.9665924276169265e-05,
|
23727 |
+
"loss": 0.018,
|
23728 |
+
"step": 3385
|
23729 |
+
},
|
23730 |
+
{
|
23731 |
+
"epoch": 0.7524444444444445,
|
23732 |
+
"grad_norm": 0.06835668534040451,
|
23733 |
+
"learning_rate": 4.962138084632517e-05,
|
23734 |
+
"loss": 0.0171,
|
23735 |
+
"step": 3386
|
23736 |
+
},
|
23737 |
+
{
|
23738 |
+
"epoch": 0.7526666666666667,
|
23739 |
+
"grad_norm": 0.562114417552948,
|
23740 |
+
"learning_rate": 4.957683741648107e-05,
|
23741 |
+
"loss": 0.8015,
|
23742 |
+
"step": 3387
|
23743 |
+
},
|
23744 |
+
{
|
23745 |
+
"epoch": 0.7528888888888889,
|
23746 |
+
"grad_norm": 0.9326373338699341,
|
23747 |
+
"learning_rate": 4.9532293986636975e-05,
|
23748 |
+
"loss": 1.7364,
|
23749 |
+
"step": 3388
|
23750 |
+
},
|
23751 |
+
{
|
23752 |
+
"epoch": 0.7531111111111111,
|
23753 |
+
"grad_norm": 1.0560567378997803,
|
23754 |
+
"learning_rate": 4.948775055679288e-05,
|
23755 |
+
"loss": 1.3854,
|
23756 |
+
"step": 3389
|
23757 |
+
},
|
23758 |
+
{
|
23759 |
+
"epoch": 0.7533333333333333,
|
23760 |
+
"grad_norm": 1.0617526769638062,
|
23761 |
+
"learning_rate": 4.944320712694878e-05,
|
23762 |
+
"loss": 1.3826,
|
23763 |
+
"step": 3390
|
23764 |
+
},
|
23765 |
+
{
|
23766 |
+
"epoch": 0.7535555555555555,
|
23767 |
+
"grad_norm": 0.6773163080215454,
|
23768 |
+
"learning_rate": 4.939866369710468e-05,
|
23769 |
+
"loss": 0.9724,
|
23770 |
+
"step": 3391
|
23771 |
+
},
|
23772 |
+
{
|
23773 |
+
"epoch": 0.7537777777777778,
|
23774 |
+
"grad_norm": 0.8919631838798523,
|
23775 |
+
"learning_rate": 4.935412026726058e-05,
|
23776 |
+
"loss": 1.4029,
|
23777 |
+
"step": 3392
|
23778 |
+
},
|
23779 |
+
{
|
23780 |
+
"epoch": 0.754,
|
23781 |
+
"grad_norm": 1.0007896423339844,
|
23782 |
+
"learning_rate": 4.930957683741648e-05,
|
23783 |
+
"loss": 1.3675,
|
23784 |
+
"step": 3393
|
23785 |
+
},
|
23786 |
+
{
|
23787 |
+
"epoch": 0.7542222222222222,
|
23788 |
+
"grad_norm": 1.1181669235229492,
|
23789 |
+
"learning_rate": 4.9265033407572387e-05,
|
23790 |
+
"loss": 1.5695,
|
23791 |
+
"step": 3394
|
23792 |
+
},
|
23793 |
+
{
|
23794 |
+
"epoch": 0.7544444444444445,
|
23795 |
+
"grad_norm": 1.058223843574524,
|
23796 |
+
"learning_rate": 4.922048997772829e-05,
|
23797 |
+
"loss": 1.444,
|
23798 |
+
"step": 3395
|
23799 |
+
},
|
23800 |
+
{
|
23801 |
+
"epoch": 0.7546666666666667,
|
23802 |
+
"grad_norm": 1.0917662382125854,
|
23803 |
+
"learning_rate": 4.917594654788419e-05,
|
23804 |
+
"loss": 1.5776,
|
23805 |
+
"step": 3396
|
23806 |
+
},
|
23807 |
+
{
|
23808 |
+
"epoch": 0.7548888888888889,
|
23809 |
+
"grad_norm": 1.2129132747650146,
|
23810 |
+
"learning_rate": 4.913140311804009e-05,
|
23811 |
+
"loss": 1.5378,
|
23812 |
+
"step": 3397
|
23813 |
+
},
|
23814 |
+
{
|
23815 |
+
"epoch": 0.7551111111111111,
|
23816 |
+
"grad_norm": 0.7757513523101807,
|
23817 |
+
"learning_rate": 4.908685968819599e-05,
|
23818 |
+
"loss": 0.7143,
|
23819 |
+
"step": 3398
|
23820 |
+
},
|
23821 |
+
{
|
23822 |
+
"epoch": 0.7553333333333333,
|
23823 |
+
"grad_norm": 1.0675660371780396,
|
23824 |
+
"learning_rate": 4.904231625835189e-05,
|
23825 |
+
"loss": 1.2625,
|
23826 |
+
"step": 3399
|
23827 |
+
},
|
23828 |
+
{
|
23829 |
+
"epoch": 0.7555555555555555,
|
23830 |
+
"grad_norm": 0.7911191582679749,
|
23831 |
+
"learning_rate": 4.89977728285078e-05,
|
23832 |
+
"loss": 0.6726,
|
23833 |
+
"step": 3400
|
23834 |
+
},
|
23835 |
+
{
|
23836 |
+
"epoch": 0.7557777777777778,
|
23837 |
+
"grad_norm": 0.936028003692627,
|
23838 |
+
"learning_rate": 4.89532293986637e-05,
|
23839 |
+
"loss": 2.5741,
|
23840 |
+
"step": 3401
|
23841 |
+
},
|
23842 |
+
{
|
23843 |
+
"epoch": 0.756,
|
23844 |
+
"grad_norm": 0.04625101387500763,
|
23845 |
+
"learning_rate": 4.89086859688196e-05,
|
23846 |
+
"loss": 0.0098,
|
23847 |
+
"step": 3402
|
23848 |
+
},
|
23849 |
+
{
|
23850 |
+
"epoch": 0.7562222222222222,
|
23851 |
+
"grad_norm": 0.5739651918411255,
|
23852 |
+
"learning_rate": 4.886414253897551e-05,
|
23853 |
+
"loss": 1.0021,
|
23854 |
+
"step": 3403
|
23855 |
+
},
|
23856 |
+
{
|
23857 |
+
"epoch": 0.7564444444444445,
|
23858 |
+
"grad_norm": 0.874405562877655,
|
23859 |
+
"learning_rate": 4.8819599109131406e-05,
|
23860 |
+
"loss": 2.1036,
|
23861 |
+
"step": 3404
|
23862 |
+
},
|
23863 |
+
{
|
23864 |
+
"epoch": 0.7566666666666667,
|
23865 |
+
"grad_norm": 0.5654922723770142,
|
23866 |
+
"learning_rate": 4.8775055679287305e-05,
|
23867 |
+
"loss": 0.9892,
|
23868 |
+
"step": 3405
|
23869 |
+
},
|
23870 |
+
{
|
23871 |
+
"epoch": 0.7568888888888889,
|
23872 |
+
"grad_norm": 0.6591737866401672,
|
23873 |
+
"learning_rate": 4.873051224944321e-05,
|
23874 |
+
"loss": 0.9575,
|
23875 |
+
"step": 3406
|
23876 |
+
},
|
23877 |
+
{
|
23878 |
+
"epoch": 0.7571111111111111,
|
23879 |
+
"grad_norm": 0.05461383983492851,
|
23880 |
+
"learning_rate": 4.868596881959911e-05,
|
23881 |
+
"loss": 0.011,
|
23882 |
+
"step": 3407
|
23883 |
+
},
|
23884 |
+
{
|
23885 |
+
"epoch": 0.7573333333333333,
|
23886 |
+
"grad_norm": 0.0622735358774662,
|
23887 |
+
"learning_rate": 4.8641425389755014e-05,
|
23888 |
+
"loss": 0.0112,
|
23889 |
+
"step": 3408
|
23890 |
+
},
|
23891 |
+
{
|
23892 |
+
"epoch": 0.7575555555555555,
|
23893 |
+
"grad_norm": 0.059408292174339294,
|
23894 |
+
"learning_rate": 4.859688195991092e-05,
|
23895 |
+
"loss": 0.011,
|
23896 |
+
"step": 3409
|
23897 |
+
},
|
23898 |
+
{
|
23899 |
+
"epoch": 0.7577777777777778,
|
23900 |
+
"grad_norm": 0.6495372653007507,
|
23901 |
+
"learning_rate": 4.855233853006682e-05,
|
23902 |
+
"loss": 0.8378,
|
23903 |
+
"step": 3410
|
23904 |
+
},
|
23905 |
+
{
|
23906 |
+
"epoch": 0.758,
|
23907 |
+
"grad_norm": 0.9061746001243591,
|
23908 |
+
"learning_rate": 4.850779510022272e-05,
|
23909 |
+
"loss": 2.2088,
|
23910 |
+
"step": 3411
|
23911 |
+
},
|
23912 |
+
{
|
23913 |
+
"epoch": 0.7582222222222222,
|
23914 |
+
"grad_norm": 0.8633875846862793,
|
23915 |
+
"learning_rate": 4.846325167037862e-05,
|
23916 |
+
"loss": 1.9511,
|
23917 |
+
"step": 3412
|
23918 |
+
},
|
23919 |
+
{
|
23920 |
+
"epoch": 0.7584444444444445,
|
23921 |
+
"grad_norm": 1.055767297744751,
|
23922 |
+
"learning_rate": 4.841870824053452e-05,
|
23923 |
+
"loss": 1.9755,
|
23924 |
+
"step": 3413
|
23925 |
+
},
|
23926 |
+
{
|
23927 |
+
"epoch": 0.7586666666666667,
|
23928 |
+
"grad_norm": 0.8679887056350708,
|
23929 |
+
"learning_rate": 4.8374164810690426e-05,
|
23930 |
+
"loss": 1.8676,
|
23931 |
+
"step": 3414
|
23932 |
+
},
|
23933 |
+
{
|
23934 |
+
"epoch": 0.7588888888888888,
|
23935 |
+
"grad_norm": 0.9158828258514404,
|
23936 |
+
"learning_rate": 4.832962138084633e-05,
|
23937 |
+
"loss": 2.0772,
|
23938 |
+
"step": 3415
|
23939 |
+
},
|
23940 |
+
{
|
23941 |
+
"epoch": 0.7591111111111111,
|
23942 |
+
"grad_norm": 0.6672974228858948,
|
23943 |
+
"learning_rate": 4.828507795100223e-05,
|
23944 |
+
"loss": 1.1813,
|
23945 |
+
"step": 3416
|
23946 |
+
},
|
23947 |
+
{
|
23948 |
+
"epoch": 0.7593333333333333,
|
23949 |
+
"grad_norm": 0.9546223282814026,
|
23950 |
+
"learning_rate": 4.824053452115813e-05,
|
23951 |
+
"loss": 1.9467,
|
23952 |
+
"step": 3417
|
23953 |
+
},
|
23954 |
+
{
|
23955 |
+
"epoch": 0.7595555555555555,
|
23956 |
+
"grad_norm": 1.0391935110092163,
|
23957 |
+
"learning_rate": 4.8195991091314034e-05,
|
23958 |
+
"loss": 2.041,
|
23959 |
+
"step": 3418
|
23960 |
+
},
|
23961 |
+
{
|
23962 |
+
"epoch": 0.7597777777777778,
|
23963 |
+
"grad_norm": 1.0147621631622314,
|
23964 |
+
"learning_rate": 4.815144766146993e-05,
|
23965 |
+
"loss": 2.0473,
|
23966 |
+
"step": 3419
|
23967 |
+
},
|
23968 |
+
{
|
23969 |
+
"epoch": 0.76,
|
23970 |
+
"grad_norm": 0.6334058046340942,
|
23971 |
+
"learning_rate": 4.810690423162584e-05,
|
23972 |
+
"loss": 0.8882,
|
23973 |
+
"step": 3420
|
23974 |
+
},
|
23975 |
+
{
|
23976 |
+
"epoch": 0.7602222222222222,
|
23977 |
+
"grad_norm": 0.06809257715940475,
|
23978 |
+
"learning_rate": 4.806236080178174e-05,
|
23979 |
+
"loss": 0.0153,
|
23980 |
+
"step": 3421
|
23981 |
+
},
|
23982 |
+
{
|
23983 |
+
"epoch": 0.7604444444444445,
|
23984 |
+
"grad_norm": 0.06833475828170776,
|
23985 |
+
"learning_rate": 4.801781737193764e-05,
|
23986 |
+
"loss": 0.0156,
|
23987 |
+
"step": 3422
|
23988 |
+
},
|
23989 |
+
{
|
23990 |
+
"epoch": 0.7606666666666667,
|
23991 |
+
"grad_norm": 0.09722508490085602,
|
23992 |
+
"learning_rate": 4.797327394209355e-05,
|
23993 |
+
"loss": 0.0179,
|
23994 |
+
"step": 3423
|
23995 |
+
},
|
23996 |
+
{
|
23997 |
+
"epoch": 0.7608888888888888,
|
23998 |
+
"grad_norm": 0.92330402135849,
|
23999 |
+
"learning_rate": 4.7928730512249446e-05,
|
24000 |
+
"loss": 1.7349,
|
24001 |
+
"step": 3424
|
24002 |
+
},
|
24003 |
+
{
|
24004 |
+
"epoch": 0.7611111111111111,
|
24005 |
+
"grad_norm": 1.0066584348678589,
|
24006 |
+
"learning_rate": 4.7884187082405344e-05,
|
24007 |
+
"loss": 1.6615,
|
24008 |
+
"step": 3425
|
24009 |
+
},
|
24010 |
+
{
|
24011 |
+
"epoch": 0.7613333333333333,
|
24012 |
+
"grad_norm": 0.9122890830039978,
|
24013 |
+
"learning_rate": 4.783964365256125e-05,
|
24014 |
+
"loss": 1.9283,
|
24015 |
+
"step": 3426
|
24016 |
+
},
|
24017 |
+
{
|
24018 |
+
"epoch": 0.7615555555555555,
|
24019 |
+
"grad_norm": 1.0834369659423828,
|
24020 |
+
"learning_rate": 4.7795100222717155e-05,
|
24021 |
+
"loss": 1.8457,
|
24022 |
+
"step": 3427
|
24023 |
+
},
|
24024 |
+
{
|
24025 |
+
"epoch": 0.7617777777777778,
|
24026 |
+
"grad_norm": 0.9122326970100403,
|
24027 |
+
"learning_rate": 4.7750556792873054e-05,
|
24028 |
+
"loss": 1.5779,
|
24029 |
+
"step": 3428
|
24030 |
+
},
|
24031 |
+
{
|
24032 |
+
"epoch": 0.762,
|
24033 |
+
"grad_norm": 0.6459372639656067,
|
24034 |
+
"learning_rate": 4.770601336302896e-05,
|
24035 |
+
"loss": 0.8526,
|
24036 |
+
"step": 3429
|
24037 |
+
},
|
24038 |
+
{
|
24039 |
+
"epoch": 0.7622222222222222,
|
24040 |
+
"grad_norm": 0.06661590933799744,
|
24041 |
+
"learning_rate": 4.766146993318486e-05,
|
24042 |
+
"loss": 0.018,
|
24043 |
+
"step": 3430
|
24044 |
+
},
|
24045 |
+
{
|
24046 |
+
"epoch": 0.7624444444444445,
|
24047 |
+
"grad_norm": 0.06595264375209808,
|
24048 |
+
"learning_rate": 4.7616926503340756e-05,
|
24049 |
+
"loss": 0.0176,
|
24050 |
+
"step": 3431
|
24051 |
+
},
|
24052 |
+
{
|
24053 |
+
"epoch": 0.7626666666666667,
|
24054 |
+
"grad_norm": 0.06258884072303772,
|
24055 |
+
"learning_rate": 4.757238307349666e-05,
|
24056 |
+
"loss": 0.0175,
|
24057 |
+
"step": 3432
|
24058 |
+
},
|
24059 |
+
{
|
24060 |
+
"epoch": 0.7628888888888888,
|
24061 |
+
"grad_norm": 0.9908372163772583,
|
24062 |
+
"learning_rate": 4.752783964365256e-05,
|
24063 |
+
"loss": 1.5601,
|
24064 |
+
"step": 3433
|
24065 |
+
},
|
24066 |
+
{
|
24067 |
+
"epoch": 0.7631111111111111,
|
24068 |
+
"grad_norm": 1.1008018255233765,
|
24069 |
+
"learning_rate": 4.7483296213808465e-05,
|
24070 |
+
"loss": 1.9175,
|
24071 |
+
"step": 3434
|
24072 |
+
},
|
24073 |
+
{
|
24074 |
+
"epoch": 0.7633333333333333,
|
24075 |
+
"grad_norm": 0.06766713410615921,
|
24076 |
+
"learning_rate": 4.743875278396437e-05,
|
24077 |
+
"loss": 0.0185,
|
24078 |
+
"step": 3435
|
24079 |
+
},
|
24080 |
+
{
|
24081 |
+
"epoch": 0.7635555555555555,
|
24082 |
+
"grad_norm": 0.06862013787031174,
|
24083 |
+
"learning_rate": 4.739420935412027e-05,
|
24084 |
+
"loss": 0.018,
|
24085 |
+
"step": 3436
|
24086 |
+
},
|
24087 |
+
{
|
24088 |
+
"epoch": 0.7637777777777778,
|
24089 |
+
"grad_norm": 0.995215654373169,
|
24090 |
+
"learning_rate": 4.734966592427617e-05,
|
24091 |
+
"loss": 1.6609,
|
24092 |
+
"step": 3437
|
24093 |
+
},
|
24094 |
+
{
|
24095 |
+
"epoch": 0.764,
|
24096 |
+
"grad_norm": 1.1150976419448853,
|
24097 |
+
"learning_rate": 4.730512249443207e-05,
|
24098 |
+
"loss": 1.5773,
|
24099 |
+
"step": 3438
|
24100 |
+
},
|
24101 |
+
{
|
24102 |
+
"epoch": 0.7642222222222222,
|
24103 |
+
"grad_norm": 0.10110121965408325,
|
24104 |
+
"learning_rate": 4.726057906458797e-05,
|
24105 |
+
"loss": 0.0254,
|
24106 |
+
"step": 3439
|
24107 |
+
},
|
24108 |
+
{
|
24109 |
+
"epoch": 0.7644444444444445,
|
24110 |
+
"grad_norm": 0.8509777188301086,
|
24111 |
+
"learning_rate": 4.721603563474388e-05,
|
24112 |
+
"loss": 0.8449,
|
24113 |
+
"step": 3440
|
24114 |
+
},
|
24115 |
+
{
|
24116 |
+
"epoch": 0.7646666666666667,
|
24117 |
+
"grad_norm": 1.163260579109192,
|
24118 |
+
"learning_rate": 4.717149220489978e-05,
|
24119 |
+
"loss": 1.6988,
|
24120 |
+
"step": 3441
|
24121 |
+
},
|
24122 |
+
{
|
24123 |
+
"epoch": 0.7648888888888888,
|
24124 |
+
"grad_norm": 1.1963449716567993,
|
24125 |
+
"learning_rate": 4.712694877505568e-05,
|
24126 |
+
"loss": 1.6756,
|
24127 |
+
"step": 3442
|
24128 |
+
},
|
24129 |
+
{
|
24130 |
+
"epoch": 0.7651111111111111,
|
24131 |
+
"grad_norm": 1.1867884397506714,
|
24132 |
+
"learning_rate": 4.7082405345211587e-05,
|
24133 |
+
"loss": 1.6131,
|
24134 |
+
"step": 3443
|
24135 |
+
},
|
24136 |
+
{
|
24137 |
+
"epoch": 0.7653333333333333,
|
24138 |
+
"grad_norm": 1.0478819608688354,
|
24139 |
+
"learning_rate": 4.7037861915367485e-05,
|
24140 |
+
"loss": 1.4666,
|
24141 |
+
"step": 3444
|
24142 |
+
},
|
24143 |
+
{
|
24144 |
+
"epoch": 0.7655555555555555,
|
24145 |
+
"grad_norm": 1.076615571975708,
|
24146 |
+
"learning_rate": 4.6993318485523384e-05,
|
24147 |
+
"loss": 1.3148,
|
24148 |
+
"step": 3445
|
24149 |
+
},
|
24150 |
+
{
|
24151 |
+
"epoch": 0.7657777777777778,
|
24152 |
+
"grad_norm": 0.7551054954528809,
|
24153 |
+
"learning_rate": 4.694877505567929e-05,
|
24154 |
+
"loss": 0.7423,
|
24155 |
+
"step": 3446
|
24156 |
+
},
|
24157 |
+
{
|
24158 |
+
"epoch": 0.766,
|
24159 |
+
"grad_norm": 0.7709291577339172,
|
24160 |
+
"learning_rate": 4.6904231625835194e-05,
|
24161 |
+
"loss": 0.749,
|
24162 |
+
"step": 3447
|
24163 |
+
},
|
24164 |
+
{
|
24165 |
+
"epoch": 0.7662222222222222,
|
24166 |
+
"grad_norm": 0.9779494404792786,
|
24167 |
+
"learning_rate": 4.685968819599109e-05,
|
24168 |
+
"loss": 1.1534,
|
24169 |
+
"step": 3448
|
24170 |
+
},
|
24171 |
+
{
|
24172 |
+
"epoch": 0.7664444444444445,
|
24173 |
+
"grad_norm": 0.7176189422607422,
|
24174 |
+
"learning_rate": 4.6815144766147e-05,
|
24175 |
+
"loss": 0.5791,
|
24176 |
+
"step": 3449
|
24177 |
+
},
|
24178 |
+
{
|
24179 |
+
"epoch": 0.7666666666666667,
|
24180 |
+
"grad_norm": 2.1151397228240967,
|
24181 |
+
"learning_rate": 4.67706013363029e-05,
|
24182 |
+
"loss": 1.2452,
|
24183 |
+
"step": 3450
|
24184 |
+
},
|
24185 |
+
{
|
24186 |
+
"epoch": 0.7668888888888888,
|
24187 |
+
"grad_norm": 0.04698283597826958,
|
24188 |
+
"learning_rate": 4.6726057906458796e-05,
|
24189 |
+
"loss": 0.0105,
|
24190 |
+
"step": 3451
|
24191 |
+
},
|
24192 |
+
{
|
24193 |
+
"epoch": 0.7671111111111111,
|
24194 |
+
"grad_norm": 0.806088387966156,
|
24195 |
+
"learning_rate": 4.66815144766147e-05,
|
24196 |
+
"loss": 2.1148,
|
24197 |
+
"step": 3452
|
24198 |
+
},
|
24199 |
+
{
|
24200 |
+
"epoch": 0.7673333333333333,
|
24201 |
+
"grad_norm": 0.9896338582038879,
|
24202 |
+
"learning_rate": 4.6636971046770606e-05,
|
24203 |
+
"loss": 2.4891,
|
24204 |
+
"step": 3453
|
24205 |
+
},
|
24206 |
+
{
|
24207 |
+
"epoch": 0.7675555555555555,
|
24208 |
+
"grad_norm": 0.6359859704971313,
|
24209 |
+
"learning_rate": 4.6592427616926505e-05,
|
24210 |
+
"loss": 0.8817,
|
24211 |
+
"step": 3454
|
24212 |
+
},
|
24213 |
+
{
|
24214 |
+
"epoch": 0.7677777777777778,
|
24215 |
+
"grad_norm": 0.6366167068481445,
|
24216 |
+
"learning_rate": 4.654788418708241e-05,
|
24217 |
+
"loss": 1.1016,
|
24218 |
+
"step": 3455
|
24219 |
+
},
|
24220 |
+
{
|
24221 |
+
"epoch": 0.768,
|
24222 |
+
"grad_norm": 0.6625463366508484,
|
24223 |
+
"learning_rate": 4.650334075723831e-05,
|
24224 |
+
"loss": 1.2164,
|
24225 |
+
"step": 3456
|
24226 |
+
},
|
24227 |
+
{
|
24228 |
+
"epoch": 0.7682222222222223,
|
24229 |
+
"grad_norm": 0.6121510863304138,
|
24230 |
+
"learning_rate": 4.645879732739421e-05,
|
24231 |
+
"loss": 0.8625,
|
24232 |
+
"step": 3457
|
24233 |
+
},
|
24234 |
+
{
|
24235 |
+
"epoch": 0.7684444444444445,
|
24236 |
+
"grad_norm": 1.0105525255203247,
|
24237 |
+
"learning_rate": 4.641425389755011e-05,
|
24238 |
+
"loss": 2.7758,
|
24239 |
+
"step": 3458
|
24240 |
+
},
|
24241 |
+
{
|
24242 |
+
"epoch": 0.7686666666666667,
|
24243 |
+
"grad_norm": 0.8283724188804626,
|
24244 |
+
"learning_rate": 4.636971046770602e-05,
|
24245 |
+
"loss": 2.0447,
|
24246 |
+
"step": 3459
|
24247 |
+
},
|
24248 |
+
{
|
24249 |
+
"epoch": 0.7688888888888888,
|
24250 |
+
"grad_norm": 0.06813201308250427,
|
24251 |
+
"learning_rate": 4.632516703786192e-05,
|
24252 |
+
"loss": 0.0111,
|
24253 |
+
"step": 3460
|
24254 |
+
},
|
24255 |
+
{
|
24256 |
+
"epoch": 0.7691111111111111,
|
24257 |
+
"grad_norm": 0.0665576308965683,
|
24258 |
+
"learning_rate": 4.628062360801782e-05,
|
24259 |
+
"loss": 0.011,
|
24260 |
+
"step": 3461
|
24261 |
+
},
|
24262 |
+
{
|
24263 |
+
"epoch": 0.7693333333333333,
|
24264 |
+
"grad_norm": 0.06463496387004852,
|
24265 |
+
"learning_rate": 4.623608017817373e-05,
|
24266 |
+
"loss": 0.0111,
|
24267 |
+
"step": 3462
|
24268 |
+
},
|
24269 |
+
{
|
24270 |
+
"epoch": 0.7695555555555555,
|
24271 |
+
"grad_norm": 0.8166987895965576,
|
24272 |
+
"learning_rate": 4.619153674832962e-05,
|
24273 |
+
"loss": 2.0366,
|
24274 |
+
"step": 3463
|
24275 |
+
},
|
24276 |
+
{
|
24277 |
+
"epoch": 0.7697777777777778,
|
24278 |
+
"grad_norm": 0.9549795985221863,
|
24279 |
+
"learning_rate": 4.6146993318485525e-05,
|
24280 |
+
"loss": 1.8478,
|
24281 |
+
"step": 3464
|
24282 |
+
},
|
24283 |
+
{
|
24284 |
+
"epoch": 0.77,
|
24285 |
+
"grad_norm": 0.8335583209991455,
|
24286 |
+
"learning_rate": 4.610244988864143e-05,
|
24287 |
+
"loss": 1.8226,
|
24288 |
+
"step": 3465
|
24289 |
+
},
|
24290 |
+
{
|
24291 |
+
"epoch": 0.7702222222222223,
|
24292 |
+
"grad_norm": 0.9823237657546997,
|
24293 |
+
"learning_rate": 4.605790645879733e-05,
|
24294 |
+
"loss": 2.114,
|
24295 |
+
"step": 3466
|
24296 |
+
},
|
24297 |
+
{
|
24298 |
+
"epoch": 0.7704444444444445,
|
24299 |
+
"grad_norm": 0.9316264986991882,
|
24300 |
+
"learning_rate": 4.6013363028953234e-05,
|
24301 |
+
"loss": 2.0765,
|
24302 |
+
"step": 3467
|
24303 |
+
},
|
24304 |
+
{
|
24305 |
+
"epoch": 0.7706666666666667,
|
24306 |
+
"grad_norm": 0.8862332701683044,
|
24307 |
+
"learning_rate": 4.596881959910914e-05,
|
24308 |
+
"loss": 1.8899,
|
24309 |
+
"step": 3468
|
24310 |
+
},
|
24311 |
+
{
|
24312 |
+
"epoch": 0.7708888888888888,
|
24313 |
+
"grad_norm": 0.9615729451179504,
|
24314 |
+
"learning_rate": 4.592427616926504e-05,
|
24315 |
+
"loss": 1.9076,
|
24316 |
+
"step": 3469
|
24317 |
+
},
|
24318 |
+
{
|
24319 |
+
"epoch": 0.7711111111111111,
|
24320 |
+
"grad_norm": 0.107745461165905,
|
24321 |
+
"learning_rate": 4.5879732739420936e-05,
|
24322 |
+
"loss": 0.0184,
|
24323 |
+
"step": 3470
|
24324 |
+
},
|
24325 |
+
{
|
24326 |
+
"epoch": 0.7713333333333333,
|
24327 |
+
"grad_norm": 0.9588910341262817,
|
24328 |
+
"learning_rate": 4.5835189309576835e-05,
|
24329 |
+
"loss": 1.7689,
|
24330 |
+
"step": 3471
|
24331 |
+
},
|
24332 |
+
{
|
24333 |
+
"epoch": 0.7715555555555556,
|
24334 |
+
"grad_norm": 0.8808805346488953,
|
24335 |
+
"learning_rate": 4.579064587973274e-05,
|
24336 |
+
"loss": 2.204,
|
24337 |
+
"step": 3472
|
24338 |
+
},
|
24339 |
+
{
|
24340 |
+
"epoch": 0.7717777777777778,
|
24341 |
+
"grad_norm": 1.1614326238632202,
|
24342 |
+
"learning_rate": 4.5746102449888646e-05,
|
24343 |
+
"loss": 1.9956,
|
24344 |
+
"step": 3473
|
24345 |
+
},
|
24346 |
+
{
|
24347 |
+
"epoch": 0.772,
|
24348 |
+
"grad_norm": 0.8884471654891968,
|
24349 |
+
"learning_rate": 4.5701559020044544e-05,
|
24350 |
+
"loss": 1.875,
|
24351 |
+
"step": 3474
|
24352 |
+
},
|
24353 |
+
{
|
24354 |
+
"epoch": 0.7722222222222223,
|
24355 |
+
"grad_norm": 0.9541723728179932,
|
24356 |
+
"learning_rate": 4.565701559020045e-05,
|
24357 |
+
"loss": 1.8806,
|
24358 |
+
"step": 3475
|
24359 |
+
},
|
24360 |
+
{
|
24361 |
+
"epoch": 0.7724444444444445,
|
24362 |
+
"grad_norm": 0.07305742055177689,
|
24363 |
+
"learning_rate": 4.561247216035635e-05,
|
24364 |
+
"loss": 0.0169,
|
24365 |
+
"step": 3476
|
24366 |
+
},
|
24367 |
+
{
|
24368 |
+
"epoch": 0.7726666666666666,
|
24369 |
+
"grad_norm": 0.6821660399436951,
|
24370 |
+
"learning_rate": 4.556792873051225e-05,
|
24371 |
+
"loss": 1.079,
|
24372 |
+
"step": 3477
|
24373 |
+
},
|
24374 |
+
{
|
24375 |
+
"epoch": 0.7728888888888888,
|
24376 |
+
"grad_norm": 0.9830121994018555,
|
24377 |
+
"learning_rate": 4.552338530066815e-05,
|
24378 |
+
"loss": 1.7285,
|
24379 |
+
"step": 3478
|
24380 |
+
},
|
24381 |
+
{
|
24382 |
+
"epoch": 0.7731111111111111,
|
24383 |
+
"grad_norm": 0.6831437945365906,
|
24384 |
+
"learning_rate": 4.547884187082406e-05,
|
24385 |
+
"loss": 0.8884,
|
24386 |
+
"step": 3479
|
24387 |
+
},
|
24388 |
+
{
|
24389 |
+
"epoch": 0.7733333333333333,
|
24390 |
+
"grad_norm": 1.0051524639129639,
|
24391 |
+
"learning_rate": 4.5434298440979956e-05,
|
24392 |
+
"loss": 1.8973,
|
24393 |
+
"step": 3480
|
24394 |
+
},
|
24395 |
+
{
|
24396 |
+
"epoch": 0.7735555555555556,
|
24397 |
+
"grad_norm": 0.9727129340171814,
|
24398 |
+
"learning_rate": 4.538975501113586e-05,
|
24399 |
+
"loss": 2.042,
|
24400 |
+
"step": 3481
|
24401 |
+
},
|
24402 |
+
{
|
24403 |
+
"epoch": 0.7737777777777778,
|
24404 |
+
"grad_norm": 1.0296839475631714,
|
24405 |
+
"learning_rate": 4.534521158129176e-05,
|
24406 |
+
"loss": 1.7367,
|
24407 |
+
"step": 3482
|
24408 |
+
},
|
24409 |
+
{
|
24410 |
+
"epoch": 0.774,
|
24411 |
+
"grad_norm": 0.9972522258758545,
|
24412 |
+
"learning_rate": 4.530066815144766e-05,
|
24413 |
+
"loss": 1.8867,
|
24414 |
+
"step": 3483
|
24415 |
+
},
|
24416 |
+
{
|
24417 |
+
"epoch": 0.7742222222222223,
|
24418 |
+
"grad_norm": 1.0227113962173462,
|
24419 |
+
"learning_rate": 4.5256124721603564e-05,
|
24420 |
+
"loss": 1.8279,
|
24421 |
+
"step": 3484
|
24422 |
+
},
|
24423 |
+
{
|
24424 |
+
"epoch": 0.7744444444444445,
|
24425 |
+
"grad_norm": 1.061448335647583,
|
24426 |
+
"learning_rate": 4.521158129175947e-05,
|
24427 |
+
"loss": 1.9245,
|
24428 |
+
"step": 3485
|
24429 |
+
},
|
24430 |
+
{
|
24431 |
+
"epoch": 0.7746666666666666,
|
24432 |
+
"grad_norm": 0.6845740675926208,
|
24433 |
+
"learning_rate": 4.516703786191537e-05,
|
24434 |
+
"loss": 0.9532,
|
24435 |
+
"step": 3486
|
24436 |
+
},
|
24437 |
+
{
|
24438 |
+
"epoch": 0.7748888888888888,
|
24439 |
+
"grad_norm": 1.010504961013794,
|
24440 |
+
"learning_rate": 4.512249443207127e-05,
|
24441 |
+
"loss": 1.6469,
|
24442 |
+
"step": 3487
|
24443 |
+
},
|
24444 |
+
{
|
24445 |
+
"epoch": 0.7751111111111111,
|
24446 |
+
"grad_norm": 1.15483820438385,
|
24447 |
+
"learning_rate": 4.507795100222718e-05,
|
24448 |
+
"loss": 1.735,
|
24449 |
+
"step": 3488
|
24450 |
+
},
|
24451 |
+
{
|
24452 |
+
"epoch": 0.7753333333333333,
|
24453 |
+
"grad_norm": 0.06943599879741669,
|
24454 |
+
"learning_rate": 4.503340757238308e-05,
|
24455 |
+
"loss": 0.0192,
|
24456 |
+
"step": 3489
|
24457 |
+
},
|
24458 |
+
{
|
24459 |
+
"epoch": 0.7755555555555556,
|
24460 |
+
"grad_norm": 0.06758453696966171,
|
24461 |
+
"learning_rate": 4.4988864142538976e-05,
|
24462 |
+
"loss": 0.0185,
|
24463 |
+
"step": 3490
|
24464 |
+
},
|
24465 |
+
{
|
24466 |
+
"epoch": 0.7757777777777778,
|
24467 |
+
"grad_norm": 0.06915237754583359,
|
24468 |
+
"learning_rate": 4.494432071269488e-05,
|
24469 |
+
"loss": 0.0189,
|
24470 |
+
"step": 3491
|
24471 |
+
},
|
24472 |
+
{
|
24473 |
+
"epoch": 0.776,
|
24474 |
+
"grad_norm": 0.7292212843894958,
|
24475 |
+
"learning_rate": 4.489977728285078e-05,
|
24476 |
+
"loss": 0.8598,
|
24477 |
+
"step": 3492
|
24478 |
+
},
|
24479 |
+
{
|
24480 |
+
"epoch": 0.7762222222222223,
|
24481 |
+
"grad_norm": 0.9773833751678467,
|
24482 |
+
"learning_rate": 4.4855233853006685e-05,
|
24483 |
+
"loss": 1.5498,
|
24484 |
+
"step": 3493
|
24485 |
+
},
|
24486 |
+
{
|
24487 |
+
"epoch": 0.7764444444444445,
|
24488 |
+
"grad_norm": 1.0763559341430664,
|
24489 |
+
"learning_rate": 4.481069042316259e-05,
|
24490 |
+
"loss": 1.6527,
|
24491 |
+
"step": 3494
|
24492 |
+
},
|
24493 |
+
{
|
24494 |
+
"epoch": 0.7766666666666666,
|
24495 |
+
"grad_norm": 1.0425339937210083,
|
24496 |
+
"learning_rate": 4.476614699331849e-05,
|
24497 |
+
"loss": 1.272,
|
24498 |
+
"step": 3495
|
24499 |
+
},
|
24500 |
+
{
|
24501 |
+
"epoch": 0.7768888888888889,
|
24502 |
+
"grad_norm": 1.1225720643997192,
|
24503 |
+
"learning_rate": 4.472160356347439e-05,
|
24504 |
+
"loss": 1.3202,
|
24505 |
+
"step": 3496
|
24506 |
+
},
|
24507 |
+
{
|
24508 |
+
"epoch": 0.7771111111111111,
|
24509 |
+
"grad_norm": 1.2557756900787354,
|
24510 |
+
"learning_rate": 4.467706013363029e-05,
|
24511 |
+
"loss": 1.378,
|
24512 |
+
"step": 3497
|
24513 |
+
},
|
24514 |
+
{
|
24515 |
+
"epoch": 0.7773333333333333,
|
24516 |
+
"grad_norm": 0.17433112859725952,
|
24517 |
+
"learning_rate": 4.463251670378619e-05,
|
24518 |
+
"loss": 0.038,
|
24519 |
+
"step": 3498
|
24520 |
+
},
|
24521 |
+
{
|
24522 |
+
"epoch": 0.7775555555555556,
|
24523 |
+
"grad_norm": 1.008841633796692,
|
24524 |
+
"learning_rate": 4.45879732739421e-05,
|
24525 |
+
"loss": 0.8367,
|
24526 |
+
"step": 3499
|
24527 |
+
},
|
24528 |
+
{
|
24529 |
+
"epoch": 0.7777777777777778,
|
24530 |
+
"grad_norm": 0.9712222814559937,
|
24531 |
+
"learning_rate": 4.4543429844098e-05,
|
24532 |
+
"loss": 0.6577,
|
24533 |
+
"step": 3500
|
24534 |
}
|
24535 |
],
|
24536 |
"logging_steps": 1,
|
|
|
24550 |
"attributes": {}
|
24551 |
}
|
24552 |
},
|
24553 |
+
"total_flos": 3.773762935084032e+16,
|
24554 |
"train_batch_size": 1,
|
24555 |
"trial_name": null,
|
24556 |
"trial_params": null
|