Training in progress, step 4000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1673342072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbfc8aad35faaf64abb513505043c047584775cadd7245019a5fe079204913c5
|
3 |
size 1673342072
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 194745274
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80b9fc16e20c7c3a4a8a32c70b58d54425e979f182d679aa58671e3ec7106544
|
3 |
size 194745274
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec8840281322a0a6eb80ad374fa65fffd4d0c70b4f8fda772212d4d71b733b12
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f809221ad225608f3be6038571a48fb2004d7ccafc2e2b8f01c991135b7010d6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 900,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -24531,6 +24531,3514 @@
|
|
24531 |
"learning_rate": 4.4543429844098e-05,
|
24532 |
"loss": 0.6577,
|
24533 |
"step": 3500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24534 |
}
|
24535 |
],
|
24536 |
"logging_steps": 1,
|
@@ -24550,7 +28058,7 @@
|
|
24550 |
"attributes": {}
|
24551 |
}
|
24552 |
},
|
24553 |
-
"total_flos":
|
24554 |
"train_batch_size": 1,
|
24555 |
"trial_name": null,
|
24556 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8888888888888888,
|
5 |
"eval_steps": 900,
|
6 |
+
"global_step": 4000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
24531 |
"learning_rate": 4.4543429844098e-05,
|
24532 |
"loss": 0.6577,
|
24533 |
"step": 3500
|
24534 |
+
},
|
24535 |
+
{
|
24536 |
+
"epoch": 0.778,
|
24537 |
+
"grad_norm": 0.6440428495407104,
|
24538 |
+
"learning_rate": 4.44988864142539e-05,
|
24539 |
+
"loss": 0.9956,
|
24540 |
+
"step": 3501
|
24541 |
+
},
|
24542 |
+
{
|
24543 |
+
"epoch": 0.7782222222222223,
|
24544 |
+
"grad_norm": 0.5025835633277893,
|
24545 |
+
"learning_rate": 4.44543429844098e-05,
|
24546 |
+
"loss": 0.9893,
|
24547 |
+
"step": 3502
|
24548 |
+
},
|
24549 |
+
{
|
24550 |
+
"epoch": 0.7784444444444445,
|
24551 |
+
"grad_norm": 0.04510605335235596,
|
24552 |
+
"learning_rate": 4.4409799554565705e-05,
|
24553 |
+
"loss": 0.0106,
|
24554 |
+
"step": 3503
|
24555 |
+
},
|
24556 |
+
{
|
24557 |
+
"epoch": 0.7786666666666666,
|
24558 |
+
"grad_norm": 0.9423682689666748,
|
24559 |
+
"learning_rate": 4.43652561247216e-05,
|
24560 |
+
"loss": 2.3701,
|
24561 |
+
"step": 3504
|
24562 |
+
},
|
24563 |
+
{
|
24564 |
+
"epoch": 0.7788888888888889,
|
24565 |
+
"grad_norm": 0.9391410946846008,
|
24566 |
+
"learning_rate": 4.432071269487751e-05,
|
24567 |
+
"loss": 2.172,
|
24568 |
+
"step": 3505
|
24569 |
+
},
|
24570 |
+
{
|
24571 |
+
"epoch": 0.7791111111111111,
|
24572 |
+
"grad_norm": 0.9086732864379883,
|
24573 |
+
"learning_rate": 4.427616926503341e-05,
|
24574 |
+
"loss": 2.217,
|
24575 |
+
"step": 3506
|
24576 |
+
},
|
24577 |
+
{
|
24578 |
+
"epoch": 0.7793333333333333,
|
24579 |
+
"grad_norm": 0.9702697396278381,
|
24580 |
+
"learning_rate": 4.423162583518931e-05,
|
24581 |
+
"loss": 2.1024,
|
24582 |
+
"step": 3507
|
24583 |
+
},
|
24584 |
+
{
|
24585 |
+
"epoch": 0.7795555555555556,
|
24586 |
+
"grad_norm": 0.9364957809448242,
|
24587 |
+
"learning_rate": 4.418708240534522e-05,
|
24588 |
+
"loss": 1.9179,
|
24589 |
+
"step": 3508
|
24590 |
+
},
|
24591 |
+
{
|
24592 |
+
"epoch": 0.7797777777777778,
|
24593 |
+
"grad_norm": 0.9006823301315308,
|
24594 |
+
"learning_rate": 4.414253897550111e-05,
|
24595 |
+
"loss": 1.9431,
|
24596 |
+
"step": 3509
|
24597 |
+
},
|
24598 |
+
{
|
24599 |
+
"epoch": 0.78,
|
24600 |
+
"grad_norm": 0.8712829947471619,
|
24601 |
+
"learning_rate": 4.4097995545657015e-05,
|
24602 |
+
"loss": 1.7075,
|
24603 |
+
"step": 3510
|
24604 |
+
},
|
24605 |
+
{
|
24606 |
+
"epoch": 0.7802222222222223,
|
24607 |
+
"grad_norm": 0.8921668529510498,
|
24608 |
+
"learning_rate": 4.405345211581292e-05,
|
24609 |
+
"loss": 2.0187,
|
24610 |
+
"step": 3511
|
24611 |
+
},
|
24612 |
+
{
|
24613 |
+
"epoch": 0.7804444444444445,
|
24614 |
+
"grad_norm": 1.2319942712783813,
|
24615 |
+
"learning_rate": 4.400890868596882e-05,
|
24616 |
+
"loss": 2.2116,
|
24617 |
+
"step": 3512
|
24618 |
+
},
|
24619 |
+
{
|
24620 |
+
"epoch": 0.7806666666666666,
|
24621 |
+
"grad_norm": 1.0687848329544067,
|
24622 |
+
"learning_rate": 4.3964365256124724e-05,
|
24623 |
+
"loss": 1.827,
|
24624 |
+
"step": 3513
|
24625 |
+
},
|
24626 |
+
{
|
24627 |
+
"epoch": 0.7808888888888889,
|
24628 |
+
"grad_norm": 0.06872207671403885,
|
24629 |
+
"learning_rate": 4.391982182628063e-05,
|
24630 |
+
"loss": 0.016,
|
24631 |
+
"step": 3514
|
24632 |
+
},
|
24633 |
+
{
|
24634 |
+
"epoch": 0.7811111111111111,
|
24635 |
+
"grad_norm": 0.06946699321269989,
|
24636 |
+
"learning_rate": 4.387527839643653e-05,
|
24637 |
+
"loss": 0.016,
|
24638 |
+
"step": 3515
|
24639 |
+
},
|
24640 |
+
{
|
24641 |
+
"epoch": 0.7813333333333333,
|
24642 |
+
"grad_norm": 0.06873323768377304,
|
24643 |
+
"learning_rate": 4.383073496659243e-05,
|
24644 |
+
"loss": 0.0157,
|
24645 |
+
"step": 3516
|
24646 |
+
},
|
24647 |
+
{
|
24648 |
+
"epoch": 0.7815555555555556,
|
24649 |
+
"grad_norm": 0.7765884399414062,
|
24650 |
+
"learning_rate": 4.378619153674833e-05,
|
24651 |
+
"loss": 0.974,
|
24652 |
+
"step": 3517
|
24653 |
+
},
|
24654 |
+
{
|
24655 |
+
"epoch": 0.7817777777777778,
|
24656 |
+
"grad_norm": 0.7765089869499207,
|
24657 |
+
"learning_rate": 4.374164810690423e-05,
|
24658 |
+
"loss": 0.9251,
|
24659 |
+
"step": 3518
|
24660 |
+
},
|
24661 |
+
{
|
24662 |
+
"epoch": 0.782,
|
24663 |
+
"grad_norm": 0.6271977424621582,
|
24664 |
+
"learning_rate": 4.3697104677060136e-05,
|
24665 |
+
"loss": 0.8018,
|
24666 |
+
"step": 3519
|
24667 |
+
},
|
24668 |
+
{
|
24669 |
+
"epoch": 0.7822222222222223,
|
24670 |
+
"grad_norm": 0.726948618888855,
|
24671 |
+
"learning_rate": 4.365256124721604e-05,
|
24672 |
+
"loss": 0.8888,
|
24673 |
+
"step": 3520
|
24674 |
+
},
|
24675 |
+
{
|
24676 |
+
"epoch": 0.7824444444444445,
|
24677 |
+
"grad_norm": 0.9243329167366028,
|
24678 |
+
"learning_rate": 4.360801781737194e-05,
|
24679 |
+
"loss": 1.6066,
|
24680 |
+
"step": 3521
|
24681 |
+
},
|
24682 |
+
{
|
24683 |
+
"epoch": 0.7826666666666666,
|
24684 |
+
"grad_norm": 1.2513469457626343,
|
24685 |
+
"learning_rate": 4.356347438752784e-05,
|
24686 |
+
"loss": 1.9953,
|
24687 |
+
"step": 3522
|
24688 |
+
},
|
24689 |
+
{
|
24690 |
+
"epoch": 0.7828888888888889,
|
24691 |
+
"grad_norm": 0.9244915246963501,
|
24692 |
+
"learning_rate": 4.3518930957683744e-05,
|
24693 |
+
"loss": 1.8274,
|
24694 |
+
"step": 3523
|
24695 |
+
},
|
24696 |
+
{
|
24697 |
+
"epoch": 0.7831111111111111,
|
24698 |
+
"grad_norm": 1.0428435802459717,
|
24699 |
+
"learning_rate": 4.347438752783964e-05,
|
24700 |
+
"loss": 1.6283,
|
24701 |
+
"step": 3524
|
24702 |
+
},
|
24703 |
+
{
|
24704 |
+
"epoch": 0.7833333333333333,
|
24705 |
+
"grad_norm": 0.8906724452972412,
|
24706 |
+
"learning_rate": 4.342984409799555e-05,
|
24707 |
+
"loss": 1.6417,
|
24708 |
+
"step": 3525
|
24709 |
+
},
|
24710 |
+
{
|
24711 |
+
"epoch": 0.7835555555555556,
|
24712 |
+
"grad_norm": 1.084955096244812,
|
24713 |
+
"learning_rate": 4.3385300668151454e-05,
|
24714 |
+
"loss": 1.6931,
|
24715 |
+
"step": 3526
|
24716 |
+
},
|
24717 |
+
{
|
24718 |
+
"epoch": 0.7837777777777778,
|
24719 |
+
"grad_norm": 0.945055365562439,
|
24720 |
+
"learning_rate": 4.334075723830735e-05,
|
24721 |
+
"loss": 1.0096,
|
24722 |
+
"step": 3527
|
24723 |
+
},
|
24724 |
+
{
|
24725 |
+
"epoch": 0.784,
|
24726 |
+
"grad_norm": 0.0929998904466629,
|
24727 |
+
"learning_rate": 4.329621380846325e-05,
|
24728 |
+
"loss": 0.0173,
|
24729 |
+
"step": 3528
|
24730 |
+
},
|
24731 |
+
{
|
24732 |
+
"epoch": 0.7842222222222223,
|
24733 |
+
"grad_norm": 1.0231083631515503,
|
24734 |
+
"learning_rate": 4.3251670378619156e-05,
|
24735 |
+
"loss": 1.729,
|
24736 |
+
"step": 3529
|
24737 |
+
},
|
24738 |
+
{
|
24739 |
+
"epoch": 0.7844444444444445,
|
24740 |
+
"grad_norm": 1.0118030309677124,
|
24741 |
+
"learning_rate": 4.3207126948775055e-05,
|
24742 |
+
"loss": 1.6239,
|
24743 |
+
"step": 3530
|
24744 |
+
},
|
24745 |
+
{
|
24746 |
+
"epoch": 0.7846666666666666,
|
24747 |
+
"grad_norm": 0.712536096572876,
|
24748 |
+
"learning_rate": 4.316258351893096e-05,
|
24749 |
+
"loss": 0.7916,
|
24750 |
+
"step": 3531
|
24751 |
+
},
|
24752 |
+
{
|
24753 |
+
"epoch": 0.7848888888888889,
|
24754 |
+
"grad_norm": 0.0708150640130043,
|
24755 |
+
"learning_rate": 4.3118040089086865e-05,
|
24756 |
+
"loss": 0.0182,
|
24757 |
+
"step": 3532
|
24758 |
+
},
|
24759 |
+
{
|
24760 |
+
"epoch": 0.7851111111111111,
|
24761 |
+
"grad_norm": 0.08458317071199417,
|
24762 |
+
"learning_rate": 4.3073496659242764e-05,
|
24763 |
+
"loss": 0.0185,
|
24764 |
+
"step": 3533
|
24765 |
+
},
|
24766 |
+
{
|
24767 |
+
"epoch": 0.7853333333333333,
|
24768 |
+
"grad_norm": 1.0665457248687744,
|
24769 |
+
"learning_rate": 4.302895322939867e-05,
|
24770 |
+
"loss": 1.6411,
|
24771 |
+
"step": 3534
|
24772 |
+
},
|
24773 |
+
{
|
24774 |
+
"epoch": 0.7855555555555556,
|
24775 |
+
"grad_norm": 1.1385186910629272,
|
24776 |
+
"learning_rate": 4.298440979955457e-05,
|
24777 |
+
"loss": 1.3018,
|
24778 |
+
"step": 3535
|
24779 |
+
},
|
24780 |
+
{
|
24781 |
+
"epoch": 0.7857777777777778,
|
24782 |
+
"grad_norm": 1.0706120729446411,
|
24783 |
+
"learning_rate": 4.2939866369710466e-05,
|
24784 |
+
"loss": 1.5641,
|
24785 |
+
"step": 3536
|
24786 |
+
},
|
24787 |
+
{
|
24788 |
+
"epoch": 0.786,
|
24789 |
+
"grad_norm": 1.1300500631332397,
|
24790 |
+
"learning_rate": 4.289532293986637e-05,
|
24791 |
+
"loss": 1.8248,
|
24792 |
+
"step": 3537
|
24793 |
+
},
|
24794 |
+
{
|
24795 |
+
"epoch": 0.7862222222222223,
|
24796 |
+
"grad_norm": 1.3996295928955078,
|
24797 |
+
"learning_rate": 4.285077951002228e-05,
|
24798 |
+
"loss": 1.5244,
|
24799 |
+
"step": 3538
|
24800 |
+
},
|
24801 |
+
{
|
24802 |
+
"epoch": 0.7864444444444444,
|
24803 |
+
"grad_norm": 1.079788088798523,
|
24804 |
+
"learning_rate": 4.2806236080178176e-05,
|
24805 |
+
"loss": 1.6515,
|
24806 |
+
"step": 3539
|
24807 |
+
},
|
24808 |
+
{
|
24809 |
+
"epoch": 0.7866666666666666,
|
24810 |
+
"grad_norm": 1.3016208410263062,
|
24811 |
+
"learning_rate": 4.276169265033408e-05,
|
24812 |
+
"loss": 1.5737,
|
24813 |
+
"step": 3540
|
24814 |
+
},
|
24815 |
+
{
|
24816 |
+
"epoch": 0.7868888888888889,
|
24817 |
+
"grad_norm": 1.1378430128097534,
|
24818 |
+
"learning_rate": 4.271714922048998e-05,
|
24819 |
+
"loss": 1.4334,
|
24820 |
+
"step": 3541
|
24821 |
+
},
|
24822 |
+
{
|
24823 |
+
"epoch": 0.7871111111111111,
|
24824 |
+
"grad_norm": 1.0300368070602417,
|
24825 |
+
"learning_rate": 4.267260579064588e-05,
|
24826 |
+
"loss": 1.4333,
|
24827 |
+
"step": 3542
|
24828 |
+
},
|
24829 |
+
{
|
24830 |
+
"epoch": 0.7873333333333333,
|
24831 |
+
"grad_norm": 1.0914349555969238,
|
24832 |
+
"learning_rate": 4.2628062360801784e-05,
|
24833 |
+
"loss": 1.3632,
|
24834 |
+
"step": 3543
|
24835 |
+
},
|
24836 |
+
{
|
24837 |
+
"epoch": 0.7875555555555556,
|
24838 |
+
"grad_norm": 1.0840858221054077,
|
24839 |
+
"learning_rate": 4.258351893095768e-05,
|
24840 |
+
"loss": 1.471,
|
24841 |
+
"step": 3544
|
24842 |
+
},
|
24843 |
+
{
|
24844 |
+
"epoch": 0.7877777777777778,
|
24845 |
+
"grad_norm": 0.19576247036457062,
|
24846 |
+
"learning_rate": 4.253897550111359e-05,
|
24847 |
+
"loss": 0.0362,
|
24848 |
+
"step": 3545
|
24849 |
+
},
|
24850 |
+
{
|
24851 |
+
"epoch": 0.788,
|
24852 |
+
"grad_norm": 0.19378961622714996,
|
24853 |
+
"learning_rate": 4.249443207126949e-05,
|
24854 |
+
"loss": 0.035,
|
24855 |
+
"step": 3546
|
24856 |
+
},
|
24857 |
+
{
|
24858 |
+
"epoch": 0.7882222222222223,
|
24859 |
+
"grad_norm": 0.13323235511779785,
|
24860 |
+
"learning_rate": 4.244988864142539e-05,
|
24861 |
+
"loss": 0.0349,
|
24862 |
+
"step": 3547
|
24863 |
+
},
|
24864 |
+
{
|
24865 |
+
"epoch": 0.7884444444444444,
|
24866 |
+
"grad_norm": 0.1556854248046875,
|
24867 |
+
"learning_rate": 4.240534521158129e-05,
|
24868 |
+
"loss": 0.0353,
|
24869 |
+
"step": 3548
|
24870 |
+
},
|
24871 |
+
{
|
24872 |
+
"epoch": 0.7886666666666666,
|
24873 |
+
"grad_norm": 1.26119065284729,
|
24874 |
+
"learning_rate": 4.2360801781737195e-05,
|
24875 |
+
"loss": 1.1486,
|
24876 |
+
"step": 3549
|
24877 |
+
},
|
24878 |
+
{
|
24879 |
+
"epoch": 0.7888888888888889,
|
24880 |
+
"grad_norm": 0.8363838791847229,
|
24881 |
+
"learning_rate": 4.2316258351893094e-05,
|
24882 |
+
"loss": 0.5973,
|
24883 |
+
"step": 3550
|
24884 |
+
},
|
24885 |
+
{
|
24886 |
+
"epoch": 0.7891111111111111,
|
24887 |
+
"grad_norm": 0.6089571118354797,
|
24888 |
+
"learning_rate": 4.2271714922049e-05,
|
24889 |
+
"loss": 1.1064,
|
24890 |
+
"step": 3551
|
24891 |
+
},
|
24892 |
+
{
|
24893 |
+
"epoch": 0.7893333333333333,
|
24894 |
+
"grad_norm": 0.04548042267560959,
|
24895 |
+
"learning_rate": 4.2227171492204905e-05,
|
24896 |
+
"loss": 0.0104,
|
24897 |
+
"step": 3552
|
24898 |
+
},
|
24899 |
+
{
|
24900 |
+
"epoch": 0.7895555555555556,
|
24901 |
+
"grad_norm": 0.6471196413040161,
|
24902 |
+
"learning_rate": 4.21826280623608e-05,
|
24903 |
+
"loss": 1.1893,
|
24904 |
+
"step": 3553
|
24905 |
+
},
|
24906 |
+
{
|
24907 |
+
"epoch": 0.7897777777777778,
|
24908 |
+
"grad_norm": 0.046915166079998016,
|
24909 |
+
"learning_rate": 4.213808463251671e-05,
|
24910 |
+
"loss": 0.0107,
|
24911 |
+
"step": 3554
|
24912 |
+
},
|
24913 |
+
{
|
24914 |
+
"epoch": 0.79,
|
24915 |
+
"grad_norm": 0.5992786884307861,
|
24916 |
+
"learning_rate": 4.209354120267261e-05,
|
24917 |
+
"loss": 0.9432,
|
24918 |
+
"step": 3555
|
24919 |
+
},
|
24920 |
+
{
|
24921 |
+
"epoch": 0.7902222222222223,
|
24922 |
+
"grad_norm": 0.08186815679073334,
|
24923 |
+
"learning_rate": 4.2048997772828506e-05,
|
24924 |
+
"loss": 0.0118,
|
24925 |
+
"step": 3556
|
24926 |
+
},
|
24927 |
+
{
|
24928 |
+
"epoch": 0.7904444444444444,
|
24929 |
+
"grad_norm": 0.9210121035575867,
|
24930 |
+
"learning_rate": 4.200445434298441e-05,
|
24931 |
+
"loss": 2.1219,
|
24932 |
+
"step": 3557
|
24933 |
+
},
|
24934 |
+
{
|
24935 |
+
"epoch": 0.7906666666666666,
|
24936 |
+
"grad_norm": 1.006956696510315,
|
24937 |
+
"learning_rate": 4.1959910913140317e-05,
|
24938 |
+
"loss": 1.8555,
|
24939 |
+
"step": 3558
|
24940 |
+
},
|
24941 |
+
{
|
24942 |
+
"epoch": 0.7908888888888889,
|
24943 |
+
"grad_norm": 0.9316953420639038,
|
24944 |
+
"learning_rate": 4.1915367483296215e-05,
|
24945 |
+
"loss": 2.0922,
|
24946 |
+
"step": 3559
|
24947 |
+
},
|
24948 |
+
{
|
24949 |
+
"epoch": 0.7911111111111111,
|
24950 |
+
"grad_norm": 0.9348610639572144,
|
24951 |
+
"learning_rate": 4.187082405345212e-05,
|
24952 |
+
"loss": 2.2086,
|
24953 |
+
"step": 3560
|
24954 |
+
},
|
24955 |
+
{
|
24956 |
+
"epoch": 0.7913333333333333,
|
24957 |
+
"grad_norm": 0.884831964969635,
|
24958 |
+
"learning_rate": 4.182628062360802e-05,
|
24959 |
+
"loss": 1.712,
|
24960 |
+
"step": 3561
|
24961 |
+
},
|
24962 |
+
{
|
24963 |
+
"epoch": 0.7915555555555556,
|
24964 |
+
"grad_norm": 0.9388924837112427,
|
24965 |
+
"learning_rate": 4.178173719376392e-05,
|
24966 |
+
"loss": 1.9738,
|
24967 |
+
"step": 3562
|
24968 |
+
},
|
24969 |
+
{
|
24970 |
+
"epoch": 0.7917777777777778,
|
24971 |
+
"grad_norm": 0.8886390924453735,
|
24972 |
+
"learning_rate": 4.173719376391982e-05,
|
24973 |
+
"loss": 2.0557,
|
24974 |
+
"step": 3563
|
24975 |
+
},
|
24976 |
+
{
|
24977 |
+
"epoch": 0.792,
|
24978 |
+
"grad_norm": 0.9279087781906128,
|
24979 |
+
"learning_rate": 4.169265033407573e-05,
|
24980 |
+
"loss": 1.8811,
|
24981 |
+
"step": 3564
|
24982 |
+
},
|
24983 |
+
{
|
24984 |
+
"epoch": 0.7922222222222223,
|
24985 |
+
"grad_norm": 0.8624377250671387,
|
24986 |
+
"learning_rate": 4.164810690423163e-05,
|
24987 |
+
"loss": 1.7352,
|
24988 |
+
"step": 3565
|
24989 |
+
},
|
24990 |
+
{
|
24991 |
+
"epoch": 0.7924444444444444,
|
24992 |
+
"grad_norm": 0.9816845059394836,
|
24993 |
+
"learning_rate": 4.160356347438753e-05,
|
24994 |
+
"loss": 2.084,
|
24995 |
+
"step": 3566
|
24996 |
+
},
|
24997 |
+
{
|
24998 |
+
"epoch": 0.7926666666666666,
|
24999 |
+
"grad_norm": 1.113109827041626,
|
25000 |
+
"learning_rate": 4.155902004454343e-05,
|
25001 |
+
"loss": 1.8477,
|
25002 |
+
"step": 3567
|
25003 |
+
},
|
25004 |
+
{
|
25005 |
+
"epoch": 0.7928888888888889,
|
25006 |
+
"grad_norm": 0.9390388131141663,
|
25007 |
+
"learning_rate": 4.151447661469933e-05,
|
25008 |
+
"loss": 2.0081,
|
25009 |
+
"step": 3568
|
25010 |
+
},
|
25011 |
+
{
|
25012 |
+
"epoch": 0.7931111111111111,
|
25013 |
+
"grad_norm": 0.06967326253652573,
|
25014 |
+
"learning_rate": 4.1469933184855235e-05,
|
25015 |
+
"loss": 0.0158,
|
25016 |
+
"step": 3569
|
25017 |
+
},
|
25018 |
+
{
|
25019 |
+
"epoch": 0.7933333333333333,
|
25020 |
+
"grad_norm": 0.7333217859268188,
|
25021 |
+
"learning_rate": 4.142538975501114e-05,
|
25022 |
+
"loss": 0.959,
|
25023 |
+
"step": 3570
|
25024 |
+
},
|
25025 |
+
{
|
25026 |
+
"epoch": 0.7935555555555556,
|
25027 |
+
"grad_norm": 0.9319103360176086,
|
25028 |
+
"learning_rate": 4.138084632516704e-05,
|
25029 |
+
"loss": 1.7261,
|
25030 |
+
"step": 3571
|
25031 |
+
},
|
25032 |
+
{
|
25033 |
+
"epoch": 0.7937777777777778,
|
25034 |
+
"grad_norm": 1.187261700630188,
|
25035 |
+
"learning_rate": 4.1336302895322944e-05,
|
25036 |
+
"loss": 2.1856,
|
25037 |
+
"step": 3572
|
25038 |
+
},
|
25039 |
+
{
|
25040 |
+
"epoch": 0.794,
|
25041 |
+
"grad_norm": 0.9051704406738281,
|
25042 |
+
"learning_rate": 4.129175946547885e-05,
|
25043 |
+
"loss": 1.5193,
|
25044 |
+
"step": 3573
|
25045 |
+
},
|
25046 |
+
{
|
25047 |
+
"epoch": 0.7942222222222223,
|
25048 |
+
"grad_norm": 1.0516455173492432,
|
25049 |
+
"learning_rate": 4.124721603563475e-05,
|
25050 |
+
"loss": 1.8372,
|
25051 |
+
"step": 3574
|
25052 |
+
},
|
25053 |
+
{
|
25054 |
+
"epoch": 0.7944444444444444,
|
25055 |
+
"grad_norm": 0.9026862978935242,
|
25056 |
+
"learning_rate": 4.120267260579065e-05,
|
25057 |
+
"loss": 1.7081,
|
25058 |
+
"step": 3575
|
25059 |
+
},
|
25060 |
+
{
|
25061 |
+
"epoch": 0.7946666666666666,
|
25062 |
+
"grad_norm": 1.0022499561309814,
|
25063 |
+
"learning_rate": 4.115812917594655e-05,
|
25064 |
+
"loss": 1.8029,
|
25065 |
+
"step": 3576
|
25066 |
+
},
|
25067 |
+
{
|
25068 |
+
"epoch": 0.7948888888888889,
|
25069 |
+
"grad_norm": 1.0183892250061035,
|
25070 |
+
"learning_rate": 4.111358574610245e-05,
|
25071 |
+
"loss": 1.8359,
|
25072 |
+
"step": 3577
|
25073 |
+
},
|
25074 |
+
{
|
25075 |
+
"epoch": 0.7951111111111111,
|
25076 |
+
"grad_norm": 0.06487128883600235,
|
25077 |
+
"learning_rate": 4.1069042316258356e-05,
|
25078 |
+
"loss": 0.018,
|
25079 |
+
"step": 3578
|
25080 |
+
},
|
25081 |
+
{
|
25082 |
+
"epoch": 0.7953333333333333,
|
25083 |
+
"grad_norm": 0.07973368465900421,
|
25084 |
+
"learning_rate": 4.1024498886414255e-05,
|
25085 |
+
"loss": 0.0177,
|
25086 |
+
"step": 3579
|
25087 |
+
},
|
25088 |
+
{
|
25089 |
+
"epoch": 0.7955555555555556,
|
25090 |
+
"grad_norm": 0.6818245053291321,
|
25091 |
+
"learning_rate": 4.097995545657016e-05,
|
25092 |
+
"loss": 0.7042,
|
25093 |
+
"step": 3580
|
25094 |
+
},
|
25095 |
+
{
|
25096 |
+
"epoch": 0.7957777777777778,
|
25097 |
+
"grad_norm": 0.7157871127128601,
|
25098 |
+
"learning_rate": 4.093541202672606e-05,
|
25099 |
+
"loss": 0.9386,
|
25100 |
+
"step": 3581
|
25101 |
+
},
|
25102 |
+
{
|
25103 |
+
"epoch": 0.796,
|
25104 |
+
"grad_norm": 0.0813339352607727,
|
25105 |
+
"learning_rate": 4.089086859688196e-05,
|
25106 |
+
"loss": 0.0178,
|
25107 |
+
"step": 3582
|
25108 |
+
},
|
25109 |
+
{
|
25110 |
+
"epoch": 0.7962222222222223,
|
25111 |
+
"grad_norm": 0.07369329035282135,
|
25112 |
+
"learning_rate": 4.084632516703786e-05,
|
25113 |
+
"loss": 0.0175,
|
25114 |
+
"step": 3583
|
25115 |
+
},
|
25116 |
+
{
|
25117 |
+
"epoch": 0.7964444444444444,
|
25118 |
+
"grad_norm": 0.07479511946439743,
|
25119 |
+
"learning_rate": 4.080178173719377e-05,
|
25120 |
+
"loss": 0.0181,
|
25121 |
+
"step": 3584
|
25122 |
+
},
|
25123 |
+
{
|
25124 |
+
"epoch": 0.7966666666666666,
|
25125 |
+
"grad_norm": 0.07840964198112488,
|
25126 |
+
"learning_rate": 4.0757238307349666e-05,
|
25127 |
+
"loss": 0.0169,
|
25128 |
+
"step": 3585
|
25129 |
+
},
|
25130 |
+
{
|
25131 |
+
"epoch": 0.7968888888888889,
|
25132 |
+
"grad_norm": 1.3525023460388184,
|
25133 |
+
"learning_rate": 4.071269487750557e-05,
|
25134 |
+
"loss": 1.9031,
|
25135 |
+
"step": 3586
|
25136 |
+
},
|
25137 |
+
{
|
25138 |
+
"epoch": 0.7971111111111111,
|
25139 |
+
"grad_norm": 1.6226398944854736,
|
25140 |
+
"learning_rate": 4.066815144766147e-05,
|
25141 |
+
"loss": 1.763,
|
25142 |
+
"step": 3587
|
25143 |
+
},
|
25144 |
+
{
|
25145 |
+
"epoch": 0.7973333333333333,
|
25146 |
+
"grad_norm": 0.9485257267951965,
|
25147 |
+
"learning_rate": 4.062360801781737e-05,
|
25148 |
+
"loss": 1.4717,
|
25149 |
+
"step": 3588
|
25150 |
+
},
|
25151 |
+
{
|
25152 |
+
"epoch": 0.7975555555555556,
|
25153 |
+
"grad_norm": 0.9942083358764648,
|
25154 |
+
"learning_rate": 4.0579064587973274e-05,
|
25155 |
+
"loss": 1.61,
|
25156 |
+
"step": 3589
|
25157 |
+
},
|
25158 |
+
{
|
25159 |
+
"epoch": 0.7977777777777778,
|
25160 |
+
"grad_norm": 0.10811223834753036,
|
25161 |
+
"learning_rate": 4.053452115812918e-05,
|
25162 |
+
"loss": 0.0255,
|
25163 |
+
"step": 3590
|
25164 |
+
},
|
25165 |
+
{
|
25166 |
+
"epoch": 0.798,
|
25167 |
+
"grad_norm": 0.9328152537345886,
|
25168 |
+
"learning_rate": 4.048997772828508e-05,
|
25169 |
+
"loss": 1.6944,
|
25170 |
+
"step": 3591
|
25171 |
+
},
|
25172 |
+
{
|
25173 |
+
"epoch": 0.7982222222222223,
|
25174 |
+
"grad_norm": 1.1752344369888306,
|
25175 |
+
"learning_rate": 4.0445434298440984e-05,
|
25176 |
+
"loss": 1.5285,
|
25177 |
+
"step": 3592
|
25178 |
+
},
|
25179 |
+
{
|
25180 |
+
"epoch": 0.7984444444444444,
|
25181 |
+
"grad_norm": 1.2680740356445312,
|
25182 |
+
"learning_rate": 4.040089086859689e-05,
|
25183 |
+
"loss": 1.4009,
|
25184 |
+
"step": 3593
|
25185 |
+
},
|
25186 |
+
{
|
25187 |
+
"epoch": 0.7986666666666666,
|
25188 |
+
"grad_norm": 1.1118732690811157,
|
25189 |
+
"learning_rate": 4.035634743875278e-05,
|
25190 |
+
"loss": 1.3982,
|
25191 |
+
"step": 3594
|
25192 |
+
},
|
25193 |
+
{
|
25194 |
+
"epoch": 0.7988888888888889,
|
25195 |
+
"grad_norm": 1.389929175376892,
|
25196 |
+
"learning_rate": 4.0311804008908686e-05,
|
25197 |
+
"loss": 1.3998,
|
25198 |
+
"step": 3595
|
25199 |
+
},
|
25200 |
+
{
|
25201 |
+
"epoch": 0.7991111111111111,
|
25202 |
+
"grad_norm": 0.8431739807128906,
|
25203 |
+
"learning_rate": 4.026726057906459e-05,
|
25204 |
+
"loss": 0.8518,
|
25205 |
+
"step": 3596
|
25206 |
+
},
|
25207 |
+
{
|
25208 |
+
"epoch": 0.7993333333333333,
|
25209 |
+
"grad_norm": 1.3290144205093384,
|
25210 |
+
"learning_rate": 4.022271714922049e-05,
|
25211 |
+
"loss": 1.4607,
|
25212 |
+
"step": 3597
|
25213 |
+
},
|
25214 |
+
{
|
25215 |
+
"epoch": 0.7995555555555556,
|
25216 |
+
"grad_norm": 0.15491686761379242,
|
25217 |
+
"learning_rate": 4.0178173719376395e-05,
|
25218 |
+
"loss": 0.0335,
|
25219 |
+
"step": 3598
|
25220 |
+
},
|
25221 |
+
{
|
25222 |
+
"epoch": 0.7997777777777778,
|
25223 |
+
"grad_norm": 0.7583034038543701,
|
25224 |
+
"learning_rate": 4.01336302895323e-05,
|
25225 |
+
"loss": 0.4833,
|
25226 |
+
"step": 3599
|
25227 |
+
},
|
25228 |
+
{
|
25229 |
+
"epoch": 0.8,
|
25230 |
+
"grad_norm": 0.900272786617279,
|
25231 |
+
"learning_rate": 4.00890868596882e-05,
|
25232 |
+
"loss": 0.8159,
|
25233 |
+
"step": 3600
|
25234 |
+
},
|
25235 |
+
{
|
25236 |
+
"epoch": 0.8,
|
25237 |
+
"eval_loss": 1.1739096641540527,
|
25238 |
+
"eval_runtime": 239.8403,
|
25239 |
+
"eval_samples_per_second": 4.169,
|
25240 |
+
"eval_steps_per_second": 4.169,
|
25241 |
+
"step": 3600
|
25242 |
+
},
|
25243 |
+
{
|
25244 |
+
"epoch": 0.8002222222222222,
|
25245 |
+
"grad_norm": 0.5315160155296326,
|
25246 |
+
"learning_rate": 4.00445434298441e-05,
|
25247 |
+
"loss": 1.0474,
|
25248 |
+
"step": 3601
|
25249 |
+
},
|
25250 |
+
{
|
25251 |
+
"epoch": 0.8004444444444444,
|
25252 |
+
"grad_norm": 0.5535997152328491,
|
25253 |
+
"learning_rate": 4e-05,
|
25254 |
+
"loss": 0.9027,
|
25255 |
+
"step": 3602
|
25256 |
+
},
|
25257 |
+
{
|
25258 |
+
"epoch": 0.8006666666666666,
|
25259 |
+
"grad_norm": 0.7079357504844666,
|
25260 |
+
"learning_rate": 3.99554565701559e-05,
|
25261 |
+
"loss": 1.1432,
|
25262 |
+
"step": 3603
|
25263 |
+
},
|
25264 |
+
{
|
25265 |
+
"epoch": 0.8008888888888889,
|
25266 |
+
"grad_norm": 0.6734809875488281,
|
25267 |
+
"learning_rate": 3.991091314031181e-05,
|
25268 |
+
"loss": 1.0644,
|
25269 |
+
"step": 3604
|
25270 |
+
},
|
25271 |
+
{
|
25272 |
+
"epoch": 0.8011111111111111,
|
25273 |
+
"grad_norm": 0.6071887016296387,
|
25274 |
+
"learning_rate": 3.986636971046771e-05,
|
25275 |
+
"loss": 1.101,
|
25276 |
+
"step": 3605
|
25277 |
+
},
|
25278 |
+
{
|
25279 |
+
"epoch": 0.8013333333333333,
|
25280 |
+
"grad_norm": 0.5823980569839478,
|
25281 |
+
"learning_rate": 3.982182628062361e-05,
|
25282 |
+
"loss": 0.8761,
|
25283 |
+
"step": 3606
|
25284 |
+
},
|
25285 |
+
{
|
25286 |
+
"epoch": 0.8015555555555556,
|
25287 |
+
"grad_norm": 0.09482403099536896,
|
25288 |
+
"learning_rate": 3.977728285077951e-05,
|
25289 |
+
"loss": 0.0129,
|
25290 |
+
"step": 3607
|
25291 |
+
},
|
25292 |
+
{
|
25293 |
+
"epoch": 0.8017777777777778,
|
25294 |
+
"grad_norm": 0.588431179523468,
|
25295 |
+
"learning_rate": 3.9732739420935415e-05,
|
25296 |
+
"loss": 1.0989,
|
25297 |
+
"step": 3608
|
25298 |
+
},
|
25299 |
+
{
|
25300 |
+
"epoch": 0.802,
|
25301 |
+
"grad_norm": 0.9978165626525879,
|
25302 |
+
"learning_rate": 3.9688195991091314e-05,
|
25303 |
+
"loss": 1.6576,
|
25304 |
+
"step": 3609
|
25305 |
+
},
|
25306 |
+
{
|
25307 |
+
"epoch": 0.8022222222222222,
|
25308 |
+
"grad_norm": 0.9243869185447693,
|
25309 |
+
"learning_rate": 3.964365256124722e-05,
|
25310 |
+
"loss": 2.1462,
|
25311 |
+
"step": 3610
|
25312 |
+
},
|
25313 |
+
{
|
25314 |
+
"epoch": 0.8024444444444444,
|
25315 |
+
"grad_norm": 0.8666023015975952,
|
25316 |
+
"learning_rate": 3.9599109131403124e-05,
|
25317 |
+
"loss": 2.0262,
|
25318 |
+
"step": 3611
|
25319 |
+
},
|
25320 |
+
{
|
25321 |
+
"epoch": 0.8026666666666666,
|
25322 |
+
"grad_norm": 0.9008248448371887,
|
25323 |
+
"learning_rate": 3.955456570155902e-05,
|
25324 |
+
"loss": 2.0854,
|
25325 |
+
"step": 3612
|
25326 |
+
},
|
25327 |
+
{
|
25328 |
+
"epoch": 0.8028888888888889,
|
25329 |
+
"grad_norm": 0.6247386336326599,
|
25330 |
+
"learning_rate": 3.951002227171492e-05,
|
25331 |
+
"loss": 0.8506,
|
25332 |
+
"step": 3613
|
25333 |
+
},
|
25334 |
+
{
|
25335 |
+
"epoch": 0.8031111111111111,
|
25336 |
+
"grad_norm": 0.13101428747177124,
|
25337 |
+
"learning_rate": 3.946547884187082e-05,
|
25338 |
+
"loss": 0.0223,
|
25339 |
+
"step": 3614
|
25340 |
+
},
|
25341 |
+
{
|
25342 |
+
"epoch": 0.8033333333333333,
|
25343 |
+
"grad_norm": 0.12358218431472778,
|
25344 |
+
"learning_rate": 3.9420935412026726e-05,
|
25345 |
+
"loss": 0.0211,
|
25346 |
+
"step": 3615
|
25347 |
+
},
|
25348 |
+
{
|
25349 |
+
"epoch": 0.8035555555555556,
|
25350 |
+
"grad_norm": 0.9996263980865479,
|
25351 |
+
"learning_rate": 3.937639198218263e-05,
|
25352 |
+
"loss": 2.0538,
|
25353 |
+
"step": 3616
|
25354 |
+
},
|
25355 |
+
{
|
25356 |
+
"epoch": 0.8037777777777778,
|
25357 |
+
"grad_norm": 1.048120379447937,
|
25358 |
+
"learning_rate": 3.933184855233853e-05,
|
25359 |
+
"loss": 1.6907,
|
25360 |
+
"step": 3617
|
25361 |
+
},
|
25362 |
+
{
|
25363 |
+
"epoch": 0.804,
|
25364 |
+
"grad_norm": 0.9704152941703796,
|
25365 |
+
"learning_rate": 3.9287305122494435e-05,
|
25366 |
+
"loss": 2.025,
|
25367 |
+
"step": 3618
|
25368 |
+
},
|
25369 |
+
{
|
25370 |
+
"epoch": 0.8042222222222222,
|
25371 |
+
"grad_norm": 0.9544731378555298,
|
25372 |
+
"learning_rate": 3.924276169265034e-05,
|
25373 |
+
"loss": 1.8855,
|
25374 |
+
"step": 3619
|
25375 |
+
},
|
25376 |
+
{
|
25377 |
+
"epoch": 0.8044444444444444,
|
25378 |
+
"grad_norm": 0.869174599647522,
|
25379 |
+
"learning_rate": 3.919821826280624e-05,
|
25380 |
+
"loss": 2.1548,
|
25381 |
+
"step": 3620
|
25382 |
+
},
|
25383 |
+
{
|
25384 |
+
"epoch": 0.8046666666666666,
|
25385 |
+
"grad_norm": 0.7145273685455322,
|
25386 |
+
"learning_rate": 3.915367483296214e-05,
|
25387 |
+
"loss": 1.0897,
|
25388 |
+
"step": 3621
|
25389 |
+
},
|
25390 |
+
{
|
25391 |
+
"epoch": 0.8048888888888889,
|
25392 |
+
"grad_norm": 0.06968183070421219,
|
25393 |
+
"learning_rate": 3.910913140311804e-05,
|
25394 |
+
"loss": 0.0157,
|
25395 |
+
"step": 3622
|
25396 |
+
},
|
25397 |
+
{
|
25398 |
+
"epoch": 0.8051111111111111,
|
25399 |
+
"grad_norm": 0.636101484298706,
|
25400 |
+
"learning_rate": 3.906458797327394e-05,
|
25401 |
+
"loss": 0.8512,
|
25402 |
+
"step": 3623
|
25403 |
+
},
|
25404 |
+
{
|
25405 |
+
"epoch": 0.8053333333333333,
|
25406 |
+
"grad_norm": 1.1181496381759644,
|
25407 |
+
"learning_rate": 3.902004454342985e-05,
|
25408 |
+
"loss": 1.5164,
|
25409 |
+
"step": 3624
|
25410 |
+
},
|
25411 |
+
{
|
25412 |
+
"epoch": 0.8055555555555556,
|
25413 |
+
"grad_norm": 0.6359825730323792,
|
25414 |
+
"learning_rate": 3.897550111358575e-05,
|
25415 |
+
"loss": 1.0128,
|
25416 |
+
"step": 3625
|
25417 |
+
},
|
25418 |
+
{
|
25419 |
+
"epoch": 0.8057777777777778,
|
25420 |
+
"grad_norm": 0.943658173084259,
|
25421 |
+
"learning_rate": 3.893095768374165e-05,
|
25422 |
+
"loss": 1.582,
|
25423 |
+
"step": 3626
|
25424 |
+
},
|
25425 |
+
{
|
25426 |
+
"epoch": 0.806,
|
25427 |
+
"grad_norm": 1.047963261604309,
|
25428 |
+
"learning_rate": 3.888641425389755e-05,
|
25429 |
+
"loss": 1.704,
|
25430 |
+
"step": 3627
|
25431 |
+
},
|
25432 |
+
{
|
25433 |
+
"epoch": 0.8062222222222222,
|
25434 |
+
"grad_norm": 0.9852431416511536,
|
25435 |
+
"learning_rate": 3.8841870824053455e-05,
|
25436 |
+
"loss": 1.6806,
|
25437 |
+
"step": 3628
|
25438 |
+
},
|
25439 |
+
{
|
25440 |
+
"epoch": 0.8064444444444444,
|
25441 |
+
"grad_norm": 0.8574654459953308,
|
25442 |
+
"learning_rate": 3.879732739420935e-05,
|
25443 |
+
"loss": 0.916,
|
25444 |
+
"step": 3629
|
25445 |
+
},
|
25446 |
+
{
|
25447 |
+
"epoch": 0.8066666666666666,
|
25448 |
+
"grad_norm": 0.9380385279655457,
|
25449 |
+
"learning_rate": 3.875278396436526e-05,
|
25450 |
+
"loss": 0.9947,
|
25451 |
+
"step": 3630
|
25452 |
+
},
|
25453 |
+
{
|
25454 |
+
"epoch": 0.8068888888888889,
|
25455 |
+
"grad_norm": 0.06696880608797073,
|
25456 |
+
"learning_rate": 3.8708240534521164e-05,
|
25457 |
+
"loss": 0.017,
|
25458 |
+
"step": 3631
|
25459 |
+
},
|
25460 |
+
{
|
25461 |
+
"epoch": 0.8071111111111111,
|
25462 |
+
"grad_norm": 0.0664907768368721,
|
25463 |
+
"learning_rate": 3.866369710467706e-05,
|
25464 |
+
"loss": 0.0173,
|
25465 |
+
"step": 3632
|
25466 |
+
},
|
25467 |
+
{
|
25468 |
+
"epoch": 0.8073333333333333,
|
25469 |
+
"grad_norm": 0.06398806720972061,
|
25470 |
+
"learning_rate": 3.861915367483296e-05,
|
25471 |
+
"loss": 0.0171,
|
25472 |
+
"step": 3633
|
25473 |
+
},
|
25474 |
+
{
|
25475 |
+
"epoch": 0.8075555555555556,
|
25476 |
+
"grad_norm": 1.21707022190094,
|
25477 |
+
"learning_rate": 3.8574610244988866e-05,
|
25478 |
+
"loss": 2.0207,
|
25479 |
+
"step": 3634
|
25480 |
+
},
|
25481 |
+
{
|
25482 |
+
"epoch": 0.8077777777777778,
|
25483 |
+
"grad_norm": 1.04575777053833,
|
25484 |
+
"learning_rate": 3.8530066815144765e-05,
|
25485 |
+
"loss": 1.8143,
|
25486 |
+
"step": 3635
|
25487 |
+
},
|
25488 |
+
{
|
25489 |
+
"epoch": 0.808,
|
25490 |
+
"grad_norm": 0.0705951601266861,
|
25491 |
+
"learning_rate": 3.848552338530067e-05,
|
25492 |
+
"loss": 0.0166,
|
25493 |
+
"step": 3636
|
25494 |
+
},
|
25495 |
+
{
|
25496 |
+
"epoch": 0.8082222222222222,
|
25497 |
+
"grad_norm": 0.076121024787426,
|
25498 |
+
"learning_rate": 3.8440979955456576e-05,
|
25499 |
+
"loss": 0.0167,
|
25500 |
+
"step": 3637
|
25501 |
+
},
|
25502 |
+
{
|
25503 |
+
"epoch": 0.8084444444444444,
|
25504 |
+
"grad_norm": 1.0178598165512085,
|
25505 |
+
"learning_rate": 3.8396436525612474e-05,
|
25506 |
+
"loss": 1.6507,
|
25507 |
+
"step": 3638
|
25508 |
+
},
|
25509 |
+
{
|
25510 |
+
"epoch": 0.8086666666666666,
|
25511 |
+
"grad_norm": 1.102067232131958,
|
25512 |
+
"learning_rate": 3.835189309576838e-05,
|
25513 |
+
"loss": 1.7494,
|
25514 |
+
"step": 3639
|
25515 |
+
},
|
25516 |
+
{
|
25517 |
+
"epoch": 0.8088888888888889,
|
25518 |
+
"grad_norm": 0.10027281194925308,
|
25519 |
+
"learning_rate": 3.830734966592428e-05,
|
25520 |
+
"loss": 0.0251,
|
25521 |
+
"step": 3640
|
25522 |
+
},
|
25523 |
+
{
|
25524 |
+
"epoch": 0.8091111111111111,
|
25525 |
+
"grad_norm": 0.6546877026557922,
|
25526 |
+
"learning_rate": 3.826280623608018e-05,
|
25527 |
+
"loss": 0.6345,
|
25528 |
+
"step": 3641
|
25529 |
+
},
|
25530 |
+
{
|
25531 |
+
"epoch": 0.8093333333333333,
|
25532 |
+
"grad_norm": 1.25735342502594,
|
25533 |
+
"learning_rate": 3.821826280623608e-05,
|
25534 |
+
"loss": 1.7112,
|
25535 |
+
"step": 3642
|
25536 |
+
},
|
25537 |
+
{
|
25538 |
+
"epoch": 0.8095555555555556,
|
25539 |
+
"grad_norm": 1.1318788528442383,
|
25540 |
+
"learning_rate": 3.817371937639199e-05,
|
25541 |
+
"loss": 1.5739,
|
25542 |
+
"step": 3643
|
25543 |
+
},
|
25544 |
+
{
|
25545 |
+
"epoch": 0.8097777777777778,
|
25546 |
+
"grad_norm": 1.0459527969360352,
|
25547 |
+
"learning_rate": 3.8129175946547886e-05,
|
25548 |
+
"loss": 1.3376,
|
25549 |
+
"step": 3644
|
25550 |
+
},
|
25551 |
+
{
|
25552 |
+
"epoch": 0.81,
|
25553 |
+
"grad_norm": 1.0099126100540161,
|
25554 |
+
"learning_rate": 3.808463251670379e-05,
|
25555 |
+
"loss": 1.2814,
|
25556 |
+
"step": 3645
|
25557 |
+
},
|
25558 |
+
{
|
25559 |
+
"epoch": 0.8102222222222222,
|
25560 |
+
"grad_norm": 0.82170170545578,
|
25561 |
+
"learning_rate": 3.804008908685969e-05,
|
25562 |
+
"loss": 0.9743,
|
25563 |
+
"step": 3646
|
25564 |
+
},
|
25565 |
+
{
|
25566 |
+
"epoch": 0.8104444444444444,
|
25567 |
+
"grad_norm": 1.0343072414398193,
|
25568 |
+
"learning_rate": 3.799554565701559e-05,
|
25569 |
+
"loss": 1.4283,
|
25570 |
+
"step": 3647
|
25571 |
+
},
|
25572 |
+
{
|
25573 |
+
"epoch": 0.8106666666666666,
|
25574 |
+
"grad_norm": 0.944911777973175,
|
25575 |
+
"learning_rate": 3.7951002227171494e-05,
|
25576 |
+
"loss": 1.05,
|
25577 |
+
"step": 3648
|
25578 |
+
},
|
25579 |
+
{
|
25580 |
+
"epoch": 0.8108888888888889,
|
25581 |
+
"grad_norm": 0.7252945303916931,
|
25582 |
+
"learning_rate": 3.79064587973274e-05,
|
25583 |
+
"loss": 0.6662,
|
25584 |
+
"step": 3649
|
25585 |
+
},
|
25586 |
+
{
|
25587 |
+
"epoch": 0.8111111111111111,
|
25588 |
+
"grad_norm": 1.005825400352478,
|
25589 |
+
"learning_rate": 3.78619153674833e-05,
|
25590 |
+
"loss": 0.7958,
|
25591 |
+
"step": 3650
|
25592 |
+
},
|
25593 |
+
{
|
25594 |
+
"epoch": 0.8113333333333334,
|
25595 |
+
"grad_norm": 0.82235187292099,
|
25596 |
+
"learning_rate": 3.78173719376392e-05,
|
25597 |
+
"loss": 2.2468,
|
25598 |
+
"step": 3651
|
25599 |
+
},
|
25600 |
+
{
|
25601 |
+
"epoch": 0.8115555555555556,
|
25602 |
+
"grad_norm": 0.6531208157539368,
|
25603 |
+
"learning_rate": 3.77728285077951e-05,
|
25604 |
+
"loss": 0.9914,
|
25605 |
+
"step": 3652
|
25606 |
+
},
|
25607 |
+
{
|
25608 |
+
"epoch": 0.8117777777777778,
|
25609 |
+
"grad_norm": 0.8134424686431885,
|
25610 |
+
"learning_rate": 3.7728285077951e-05,
|
25611 |
+
"loss": 2.2763,
|
25612 |
+
"step": 3653
|
25613 |
+
},
|
25614 |
+
{
|
25615 |
+
"epoch": 0.812,
|
25616 |
+
"grad_norm": 0.043821610510349274,
|
25617 |
+
"learning_rate": 3.7683741648106906e-05,
|
25618 |
+
"loss": 0.0101,
|
25619 |
+
"step": 3654
|
25620 |
+
},
|
25621 |
+
{
|
25622 |
+
"epoch": 0.8122222222222222,
|
25623 |
+
"grad_norm": 0.6952782273292542,
|
25624 |
+
"learning_rate": 3.7639198218262804e-05,
|
25625 |
+
"loss": 1.234,
|
25626 |
+
"step": 3655
|
25627 |
+
},
|
25628 |
+
{
|
25629 |
+
"epoch": 0.8124444444444444,
|
25630 |
+
"grad_norm": 0.6162470579147339,
|
25631 |
+
"learning_rate": 3.759465478841871e-05,
|
25632 |
+
"loss": 1.0527,
|
25633 |
+
"step": 3656
|
25634 |
+
},
|
25635 |
+
{
|
25636 |
+
"epoch": 0.8126666666666666,
|
25637 |
+
"grad_norm": 1.0366730690002441,
|
25638 |
+
"learning_rate": 3.7550111358574615e-05,
|
25639 |
+
"loss": 2.3824,
|
25640 |
+
"step": 3657
|
25641 |
+
},
|
25642 |
+
{
|
25643 |
+
"epoch": 0.8128888888888889,
|
25644 |
+
"grad_norm": 0.08013699948787689,
|
25645 |
+
"learning_rate": 3.7505567928730514e-05,
|
25646 |
+
"loss": 0.0114,
|
25647 |
+
"step": 3658
|
25648 |
+
},
|
25649 |
+
{
|
25650 |
+
"epoch": 0.8131111111111111,
|
25651 |
+
"grad_norm": 0.07211296260356903,
|
25652 |
+
"learning_rate": 3.746102449888642e-05,
|
25653 |
+
"loss": 0.0114,
|
25654 |
+
"step": 3659
|
25655 |
+
},
|
25656 |
+
{
|
25657 |
+
"epoch": 0.8133333333333334,
|
25658 |
+
"grad_norm": 0.07523675262928009,
|
25659 |
+
"learning_rate": 3.741648106904232e-05,
|
25660 |
+
"loss": 0.0114,
|
25661 |
+
"step": 3660
|
25662 |
+
},
|
25663 |
+
{
|
25664 |
+
"epoch": 0.8135555555555556,
|
25665 |
+
"grad_norm": 0.4986688494682312,
|
25666 |
+
"learning_rate": 3.7371937639198216e-05,
|
25667 |
+
"loss": 0.946,
|
25668 |
+
"step": 3661
|
25669 |
+
},
|
25670 |
+
{
|
25671 |
+
"epoch": 0.8137777777777778,
|
25672 |
+
"grad_norm": 0.9343963265419006,
|
25673 |
+
"learning_rate": 3.732739420935412e-05,
|
25674 |
+
"loss": 2.0871,
|
25675 |
+
"step": 3662
|
25676 |
+
},
|
25677 |
+
{
|
25678 |
+
"epoch": 0.814,
|
25679 |
+
"grad_norm": 0.881712794303894,
|
25680 |
+
"learning_rate": 3.728285077951003e-05,
|
25681 |
+
"loss": 2.0004,
|
25682 |
+
"step": 3663
|
25683 |
+
},
|
25684 |
+
{
|
25685 |
+
"epoch": 0.8142222222222222,
|
25686 |
+
"grad_norm": 1.054946780204773,
|
25687 |
+
"learning_rate": 3.7238307349665925e-05,
|
25688 |
+
"loss": 2.1722,
|
25689 |
+
"step": 3664
|
25690 |
+
},
|
25691 |
+
{
|
25692 |
+
"epoch": 0.8144444444444444,
|
25693 |
+
"grad_norm": 0.9730517268180847,
|
25694 |
+
"learning_rate": 3.719376391982183e-05,
|
25695 |
+
"loss": 1.7299,
|
25696 |
+
"step": 3665
|
25697 |
+
},
|
25698 |
+
{
|
25699 |
+
"epoch": 0.8146666666666667,
|
25700 |
+
"grad_norm": 0.8659468293190002,
|
25701 |
+
"learning_rate": 3.714922048997773e-05,
|
25702 |
+
"loss": 2.1389,
|
25703 |
+
"step": 3666
|
25704 |
+
},
|
25705 |
+
{
|
25706 |
+
"epoch": 0.8148888888888889,
|
25707 |
+
"grad_norm": 0.6216636896133423,
|
25708 |
+
"learning_rate": 3.710467706013363e-05,
|
25709 |
+
"loss": 1.0111,
|
25710 |
+
"step": 3667
|
25711 |
+
},
|
25712 |
+
{
|
25713 |
+
"epoch": 0.8151111111111111,
|
25714 |
+
"grad_norm": 0.1096658706665039,
|
25715 |
+
"learning_rate": 3.706013363028953e-05,
|
25716 |
+
"loss": 0.0178,
|
25717 |
+
"step": 3668
|
25718 |
+
},
|
25719 |
+
{
|
25720 |
+
"epoch": 0.8153333333333334,
|
25721 |
+
"grad_norm": 0.6512637138366699,
|
25722 |
+
"learning_rate": 3.701559020044544e-05,
|
25723 |
+
"loss": 1.0441,
|
25724 |
+
"step": 3669
|
25725 |
+
},
|
25726 |
+
{
|
25727 |
+
"epoch": 0.8155555555555556,
|
25728 |
+
"grad_norm": 1.0497286319732666,
|
25729 |
+
"learning_rate": 3.697104677060134e-05,
|
25730 |
+
"loss": 1.7955,
|
25731 |
+
"step": 3670
|
25732 |
+
},
|
25733 |
+
{
|
25734 |
+
"epoch": 0.8157777777777778,
|
25735 |
+
"grad_norm": 0.9653757214546204,
|
25736 |
+
"learning_rate": 3.692650334075724e-05,
|
25737 |
+
"loss": 1.8275,
|
25738 |
+
"step": 3671
|
25739 |
+
},
|
25740 |
+
{
|
25741 |
+
"epoch": 0.816,
|
25742 |
+
"grad_norm": 0.6577117443084717,
|
25743 |
+
"learning_rate": 3.688195991091314e-05,
|
25744 |
+
"loss": 0.9564,
|
25745 |
+
"step": 3672
|
25746 |
+
},
|
25747 |
+
{
|
25748 |
+
"epoch": 0.8162222222222222,
|
25749 |
+
"grad_norm": 0.07206307351589203,
|
25750 |
+
"learning_rate": 3.683741648106904e-05,
|
25751 |
+
"loss": 0.0158,
|
25752 |
+
"step": 3673
|
25753 |
+
},
|
25754 |
+
{
|
25755 |
+
"epoch": 0.8164444444444444,
|
25756 |
+
"grad_norm": 0.06910723447799683,
|
25757 |
+
"learning_rate": 3.6792873051224945e-05,
|
25758 |
+
"loss": 0.0159,
|
25759 |
+
"step": 3674
|
25760 |
+
},
|
25761 |
+
{
|
25762 |
+
"epoch": 0.8166666666666667,
|
25763 |
+
"grad_norm": 0.9517031908035278,
|
25764 |
+
"learning_rate": 3.674832962138085e-05,
|
25765 |
+
"loss": 1.9874,
|
25766 |
+
"step": 3675
|
25767 |
+
},
|
25768 |
+
{
|
25769 |
+
"epoch": 0.8168888888888889,
|
25770 |
+
"grad_norm": 0.1267796903848648,
|
25771 |
+
"learning_rate": 3.670378619153675e-05,
|
25772 |
+
"loss": 0.0203,
|
25773 |
+
"step": 3676
|
25774 |
+
},
|
25775 |
+
{
|
25776 |
+
"epoch": 0.8171111111111111,
|
25777 |
+
"grad_norm": 0.6326009631156921,
|
25778 |
+
"learning_rate": 3.6659242761692654e-05,
|
25779 |
+
"loss": 0.8404,
|
25780 |
+
"step": 3677
|
25781 |
+
},
|
25782 |
+
{
|
25783 |
+
"epoch": 0.8173333333333334,
|
25784 |
+
"grad_norm": 0.950645387172699,
|
25785 |
+
"learning_rate": 3.661469933184856e-05,
|
25786 |
+
"loss": 1.7112,
|
25787 |
+
"step": 3678
|
25788 |
+
},
|
25789 |
+
{
|
25790 |
+
"epoch": 0.8175555555555556,
|
25791 |
+
"grad_norm": 1.134836196899414,
|
25792 |
+
"learning_rate": 3.657015590200445e-05,
|
25793 |
+
"loss": 1.8671,
|
25794 |
+
"step": 3679
|
25795 |
+
},
|
25796 |
+
{
|
25797 |
+
"epoch": 0.8177777777777778,
|
25798 |
+
"grad_norm": 0.9950535893440247,
|
25799 |
+
"learning_rate": 3.652561247216036e-05,
|
25800 |
+
"loss": 1.9121,
|
25801 |
+
"step": 3680
|
25802 |
+
},
|
25803 |
+
{
|
25804 |
+
"epoch": 0.818,
|
25805 |
+
"grad_norm": 0.06296125799417496,
|
25806 |
+
"learning_rate": 3.648106904231626e-05,
|
25807 |
+
"loss": 0.0175,
|
25808 |
+
"step": 3681
|
25809 |
+
},
|
25810 |
+
{
|
25811 |
+
"epoch": 0.8182222222222222,
|
25812 |
+
"grad_norm": 0.9528807401657104,
|
25813 |
+
"learning_rate": 3.643652561247216e-05,
|
25814 |
+
"loss": 1.7288,
|
25815 |
+
"step": 3682
|
25816 |
+
},
|
25817 |
+
{
|
25818 |
+
"epoch": 0.8184444444444444,
|
25819 |
+
"grad_norm": 1.0748895406723022,
|
25820 |
+
"learning_rate": 3.6391982182628066e-05,
|
25821 |
+
"loss": 1.3809,
|
25822 |
+
"step": 3683
|
25823 |
+
},
|
25824 |
+
{
|
25825 |
+
"epoch": 0.8186666666666667,
|
25826 |
+
"grad_norm": 0.07369447499513626,
|
25827 |
+
"learning_rate": 3.634743875278397e-05,
|
25828 |
+
"loss": 0.0173,
|
25829 |
+
"step": 3684
|
25830 |
+
},
|
25831 |
+
{
|
25832 |
+
"epoch": 0.8188888888888889,
|
25833 |
+
"grad_norm": 0.07020772248506546,
|
25834 |
+
"learning_rate": 3.630289532293987e-05,
|
25835 |
+
"loss": 0.0172,
|
25836 |
+
"step": 3685
|
25837 |
+
},
|
25838 |
+
{
|
25839 |
+
"epoch": 0.8191111111111111,
|
25840 |
+
"grad_norm": 0.7225638628005981,
|
25841 |
+
"learning_rate": 3.625835189309577e-05,
|
25842 |
+
"loss": 0.9078,
|
25843 |
+
"step": 3686
|
25844 |
+
},
|
25845 |
+
{
|
25846 |
+
"epoch": 0.8193333333333334,
|
25847 |
+
"grad_norm": 1.205776333808899,
|
25848 |
+
"learning_rate": 3.621380846325167e-05,
|
25849 |
+
"loss": 2.0067,
|
25850 |
+
"step": 3687
|
25851 |
+
},
|
25852 |
+
{
|
25853 |
+
"epoch": 0.8195555555555556,
|
25854 |
+
"grad_norm": 1.0239263772964478,
|
25855 |
+
"learning_rate": 3.616926503340757e-05,
|
25856 |
+
"loss": 1.3173,
|
25857 |
+
"step": 3688
|
25858 |
+
},
|
25859 |
+
{
|
25860 |
+
"epoch": 0.8197777777777778,
|
25861 |
+
"grad_norm": 0.09923997521400452,
|
25862 |
+
"learning_rate": 3.612472160356348e-05,
|
25863 |
+
"loss": 0.0243,
|
25864 |
+
"step": 3689
|
25865 |
+
},
|
25866 |
+
{
|
25867 |
+
"epoch": 0.82,
|
25868 |
+
"grad_norm": 1.150796890258789,
|
25869 |
+
"learning_rate": 3.608017817371938e-05,
|
25870 |
+
"loss": 1.5561,
|
25871 |
+
"step": 3690
|
25872 |
+
},
|
25873 |
+
{
|
25874 |
+
"epoch": 0.8202222222222222,
|
25875 |
+
"grad_norm": 1.0021523237228394,
|
25876 |
+
"learning_rate": 3.603563474387528e-05,
|
25877 |
+
"loss": 1.6326,
|
25878 |
+
"step": 3691
|
25879 |
+
},
|
25880 |
+
{
|
25881 |
+
"epoch": 0.8204444444444444,
|
25882 |
+
"grad_norm": 0.9547725319862366,
|
25883 |
+
"learning_rate": 3.599109131403118e-05,
|
25884 |
+
"loss": 1.2198,
|
25885 |
+
"step": 3692
|
25886 |
+
},
|
25887 |
+
{
|
25888 |
+
"epoch": 0.8206666666666667,
|
25889 |
+
"grad_norm": 0.9803183674812317,
|
25890 |
+
"learning_rate": 3.594654788418708e-05,
|
25891 |
+
"loss": 1.4813,
|
25892 |
+
"step": 3693
|
25893 |
+
},
|
25894 |
+
{
|
25895 |
+
"epoch": 0.8208888888888889,
|
25896 |
+
"grad_norm": 1.140236258506775,
|
25897 |
+
"learning_rate": 3.5902004454342985e-05,
|
25898 |
+
"loss": 1.5703,
|
25899 |
+
"step": 3694
|
25900 |
+
},
|
25901 |
+
{
|
25902 |
+
"epoch": 0.8211111111111111,
|
25903 |
+
"grad_norm": 0.1800074428319931,
|
25904 |
+
"learning_rate": 3.585746102449889e-05,
|
25905 |
+
"loss": 0.0296,
|
25906 |
+
"step": 3695
|
25907 |
+
},
|
25908 |
+
{
|
25909 |
+
"epoch": 0.8213333333333334,
|
25910 |
+
"grad_norm": 0.744806170463562,
|
25911 |
+
"learning_rate": 3.581291759465479e-05,
|
25912 |
+
"loss": 0.7234,
|
25913 |
+
"step": 3696
|
25914 |
+
},
|
25915 |
+
{
|
25916 |
+
"epoch": 0.8215555555555556,
|
25917 |
+
"grad_norm": 1.0382987260818481,
|
25918 |
+
"learning_rate": 3.5768374164810694e-05,
|
25919 |
+
"loss": 1.0165,
|
25920 |
+
"step": 3697
|
25921 |
+
},
|
25922 |
+
{
|
25923 |
+
"epoch": 0.8217777777777778,
|
25924 |
+
"grad_norm": 1.01142156124115,
|
25925 |
+
"learning_rate": 3.572383073496659e-05,
|
25926 |
+
"loss": 1.2093,
|
25927 |
+
"step": 3698
|
25928 |
+
},
|
25929 |
+
{
|
25930 |
+
"epoch": 0.822,
|
25931 |
+
"grad_norm": 0.575733482837677,
|
25932 |
+
"learning_rate": 3.567928730512249e-05,
|
25933 |
+
"loss": 0.4409,
|
25934 |
+
"step": 3699
|
25935 |
+
},
|
25936 |
+
{
|
25937 |
+
"epoch": 0.8222222222222222,
|
25938 |
+
"grad_norm": 0.9521045684814453,
|
25939 |
+
"learning_rate": 3.5634743875278396e-05,
|
25940 |
+
"loss": 0.6805,
|
25941 |
+
"step": 3700
|
25942 |
+
},
|
25943 |
+
{
|
25944 |
+
"epoch": 0.8224444444444444,
|
25945 |
+
"grad_norm": 0.9057009220123291,
|
25946 |
+
"learning_rate": 3.55902004454343e-05,
|
25947 |
+
"loss": 2.3256,
|
25948 |
+
"step": 3701
|
25949 |
+
},
|
25950 |
+
{
|
25951 |
+
"epoch": 0.8226666666666667,
|
25952 |
+
"grad_norm": 0.8539500832557678,
|
25953 |
+
"learning_rate": 3.55456570155902e-05,
|
25954 |
+
"loss": 2.1458,
|
25955 |
+
"step": 3702
|
25956 |
+
},
|
25957 |
+
{
|
25958 |
+
"epoch": 0.8228888888888889,
|
25959 |
+
"grad_norm": 0.598690390586853,
|
25960 |
+
"learning_rate": 3.5501113585746106e-05,
|
25961 |
+
"loss": 1.164,
|
25962 |
+
"step": 3703
|
25963 |
+
},
|
25964 |
+
{
|
25965 |
+
"epoch": 0.8231111111111111,
|
25966 |
+
"grad_norm": 1.0174680948257446,
|
25967 |
+
"learning_rate": 3.545657015590201e-05,
|
25968 |
+
"loss": 2.2128,
|
25969 |
+
"step": 3704
|
25970 |
+
},
|
25971 |
+
{
|
25972 |
+
"epoch": 0.8233333333333334,
|
25973 |
+
"grad_norm": 0.5973488092422485,
|
25974 |
+
"learning_rate": 3.541202672605791e-05,
|
25975 |
+
"loss": 1.1449,
|
25976 |
+
"step": 3705
|
25977 |
+
},
|
25978 |
+
{
|
25979 |
+
"epoch": 0.8235555555555556,
|
25980 |
+
"grad_norm": 0.07344137132167816,
|
25981 |
+
"learning_rate": 3.536748329621381e-05,
|
25982 |
+
"loss": 0.0108,
|
25983 |
+
"step": 3706
|
25984 |
+
},
|
25985 |
+
{
|
25986 |
+
"epoch": 0.8237777777777778,
|
25987 |
+
"grad_norm": 0.08576471358537674,
|
25988 |
+
"learning_rate": 3.5322939866369714e-05,
|
25989 |
+
"loss": 0.0111,
|
25990 |
+
"step": 3707
|
25991 |
+
},
|
25992 |
+
{
|
25993 |
+
"epoch": 0.824,
|
25994 |
+
"grad_norm": 0.0801275447010994,
|
25995 |
+
"learning_rate": 3.527839643652561e-05,
|
25996 |
+
"loss": 0.0111,
|
25997 |
+
"step": 3708
|
25998 |
+
},
|
25999 |
+
{
|
26000 |
+
"epoch": 0.8242222222222222,
|
26001 |
+
"grad_norm": 0.08194973319768906,
|
26002 |
+
"learning_rate": 3.523385300668152e-05,
|
26003 |
+
"loss": 0.0109,
|
26004 |
+
"step": 3709
|
26005 |
+
},
|
26006 |
+
{
|
26007 |
+
"epoch": 0.8244444444444444,
|
26008 |
+
"grad_norm": 1.0708433389663696,
|
26009 |
+
"learning_rate": 3.518930957683742e-05,
|
26010 |
+
"loss": 2.033,
|
26011 |
+
"step": 3710
|
26012 |
+
},
|
26013 |
+
{
|
26014 |
+
"epoch": 0.8246666666666667,
|
26015 |
+
"grad_norm": 1.263612985610962,
|
26016 |
+
"learning_rate": 3.514476614699332e-05,
|
26017 |
+
"loss": 2.6208,
|
26018 |
+
"step": 3711
|
26019 |
+
},
|
26020 |
+
{
|
26021 |
+
"epoch": 0.8248888888888889,
|
26022 |
+
"grad_norm": 0.9804373383522034,
|
26023 |
+
"learning_rate": 3.510022271714922e-05,
|
26024 |
+
"loss": 1.8539,
|
26025 |
+
"step": 3712
|
26026 |
+
},
|
26027 |
+
{
|
26028 |
+
"epoch": 0.8251111111111111,
|
26029 |
+
"grad_norm": 0.8642978668212891,
|
26030 |
+
"learning_rate": 3.5055679287305125e-05,
|
26031 |
+
"loss": 1.7259,
|
26032 |
+
"step": 3713
|
26033 |
+
},
|
26034 |
+
{
|
26035 |
+
"epoch": 0.8253333333333334,
|
26036 |
+
"grad_norm": 0.9090456962585449,
|
26037 |
+
"learning_rate": 3.5011135857461024e-05,
|
26038 |
+
"loss": 1.8745,
|
26039 |
+
"step": 3714
|
26040 |
+
},
|
26041 |
+
{
|
26042 |
+
"epoch": 0.8255555555555556,
|
26043 |
+
"grad_norm": 0.9151667356491089,
|
26044 |
+
"learning_rate": 3.496659242761693e-05,
|
26045 |
+
"loss": 1.9903,
|
26046 |
+
"step": 3715
|
26047 |
+
},
|
26048 |
+
{
|
26049 |
+
"epoch": 0.8257777777777778,
|
26050 |
+
"grad_norm": 0.6366732120513916,
|
26051 |
+
"learning_rate": 3.4922048997772835e-05,
|
26052 |
+
"loss": 0.9141,
|
26053 |
+
"step": 3716
|
26054 |
+
},
|
26055 |
+
{
|
26056 |
+
"epoch": 0.826,
|
26057 |
+
"grad_norm": 0.7700564861297607,
|
26058 |
+
"learning_rate": 3.487750556792873e-05,
|
26059 |
+
"loss": 0.9737,
|
26060 |
+
"step": 3717
|
26061 |
+
},
|
26062 |
+
{
|
26063 |
+
"epoch": 0.8262222222222222,
|
26064 |
+
"grad_norm": 0.9940738081932068,
|
26065 |
+
"learning_rate": 3.483296213808463e-05,
|
26066 |
+
"loss": 2.0155,
|
26067 |
+
"step": 3718
|
26068 |
+
},
|
26069 |
+
{
|
26070 |
+
"epoch": 0.8264444444444444,
|
26071 |
+
"grad_norm": 0.8816027641296387,
|
26072 |
+
"learning_rate": 3.478841870824054e-05,
|
26073 |
+
"loss": 1.7297,
|
26074 |
+
"step": 3719
|
26075 |
+
},
|
26076 |
+
{
|
26077 |
+
"epoch": 0.8266666666666667,
|
26078 |
+
"grad_norm": 1.3799381256103516,
|
26079 |
+
"learning_rate": 3.4743875278396436e-05,
|
26080 |
+
"loss": 2.159,
|
26081 |
+
"step": 3720
|
26082 |
+
},
|
26083 |
+
{
|
26084 |
+
"epoch": 0.8268888888888889,
|
26085 |
+
"grad_norm": 1.0440785884857178,
|
26086 |
+
"learning_rate": 3.469933184855234e-05,
|
26087 |
+
"loss": 1.7563,
|
26088 |
+
"step": 3721
|
26089 |
+
},
|
26090 |
+
{
|
26091 |
+
"epoch": 0.8271111111111111,
|
26092 |
+
"grad_norm": 0.6796101331710815,
|
26093 |
+
"learning_rate": 3.465478841870824e-05,
|
26094 |
+
"loss": 0.9447,
|
26095 |
+
"step": 3722
|
26096 |
+
},
|
26097 |
+
{
|
26098 |
+
"epoch": 0.8273333333333334,
|
26099 |
+
"grad_norm": 0.06959293782711029,
|
26100 |
+
"learning_rate": 3.4610244988864145e-05,
|
26101 |
+
"loss": 0.0163,
|
26102 |
+
"step": 3723
|
26103 |
+
},
|
26104 |
+
{
|
26105 |
+
"epoch": 0.8275555555555556,
|
26106 |
+
"grad_norm": 0.06835558265447617,
|
26107 |
+
"learning_rate": 3.456570155902005e-05,
|
26108 |
+
"loss": 0.0164,
|
26109 |
+
"step": 3724
|
26110 |
+
},
|
26111 |
+
{
|
26112 |
+
"epoch": 0.8277777777777777,
|
26113 |
+
"grad_norm": 1.080851435661316,
|
26114 |
+
"learning_rate": 3.452115812917594e-05,
|
26115 |
+
"loss": 1.9956,
|
26116 |
+
"step": 3725
|
26117 |
+
},
|
26118 |
+
{
|
26119 |
+
"epoch": 0.828,
|
26120 |
+
"grad_norm": 0.6416263580322266,
|
26121 |
+
"learning_rate": 3.447661469933185e-05,
|
26122 |
+
"loss": 0.6763,
|
26123 |
+
"step": 3726
|
26124 |
+
},
|
26125 |
+
{
|
26126 |
+
"epoch": 0.8282222222222222,
|
26127 |
+
"grad_norm": 1.0977882146835327,
|
26128 |
+
"learning_rate": 3.443207126948775e-05,
|
26129 |
+
"loss": 1.7526,
|
26130 |
+
"step": 3727
|
26131 |
+
},
|
26132 |
+
{
|
26133 |
+
"epoch": 0.8284444444444444,
|
26134 |
+
"grad_norm": 0.8835853338241577,
|
26135 |
+
"learning_rate": 3.438752783964365e-05,
|
26136 |
+
"loss": 1.7257,
|
26137 |
+
"step": 3728
|
26138 |
+
},
|
26139 |
+
{
|
26140 |
+
"epoch": 0.8286666666666667,
|
26141 |
+
"grad_norm": 1.0617055892944336,
|
26142 |
+
"learning_rate": 3.434298440979956e-05,
|
26143 |
+
"loss": 1.9876,
|
26144 |
+
"step": 3729
|
26145 |
+
},
|
26146 |
+
{
|
26147 |
+
"epoch": 0.8288888888888889,
|
26148 |
+
"grad_norm": 1.1054614782333374,
|
26149 |
+
"learning_rate": 3.429844097995546e-05,
|
26150 |
+
"loss": 1.4692,
|
26151 |
+
"step": 3730
|
26152 |
+
},
|
26153 |
+
{
|
26154 |
+
"epoch": 0.8291111111111111,
|
26155 |
+
"grad_norm": 0.06595687568187714,
|
26156 |
+
"learning_rate": 3.425389755011136e-05,
|
26157 |
+
"loss": 0.0171,
|
26158 |
+
"step": 3731
|
26159 |
+
},
|
26160 |
+
{
|
26161 |
+
"epoch": 0.8293333333333334,
|
26162 |
+
"grad_norm": 0.06783387809991837,
|
26163 |
+
"learning_rate": 3.420935412026726e-05,
|
26164 |
+
"loss": 0.0172,
|
26165 |
+
"step": 3732
|
26166 |
+
},
|
26167 |
+
{
|
26168 |
+
"epoch": 0.8295555555555556,
|
26169 |
+
"grad_norm": 0.7828741073608398,
|
26170 |
+
"learning_rate": 3.4164810690423165e-05,
|
26171 |
+
"loss": 0.8107,
|
26172 |
+
"step": 3733
|
26173 |
+
},
|
26174 |
+
{
|
26175 |
+
"epoch": 0.8297777777777777,
|
26176 |
+
"grad_norm": 0.07702479511499405,
|
26177 |
+
"learning_rate": 3.4120267260579063e-05,
|
26178 |
+
"loss": 0.0177,
|
26179 |
+
"step": 3734
|
26180 |
+
},
|
26181 |
+
{
|
26182 |
+
"epoch": 0.83,
|
26183 |
+
"grad_norm": 0.6449767351150513,
|
26184 |
+
"learning_rate": 3.407572383073497e-05,
|
26185 |
+
"loss": 0.7656,
|
26186 |
+
"step": 3735
|
26187 |
+
},
|
26188 |
+
{
|
26189 |
+
"epoch": 0.8302222222222222,
|
26190 |
+
"grad_norm": 1.3564252853393555,
|
26191 |
+
"learning_rate": 3.4031180400890874e-05,
|
26192 |
+
"loss": 1.9644,
|
26193 |
+
"step": 3736
|
26194 |
+
},
|
26195 |
+
{
|
26196 |
+
"epoch": 0.8304444444444444,
|
26197 |
+
"grad_norm": 0.7747462391853333,
|
26198 |
+
"learning_rate": 3.398663697104677e-05,
|
26199 |
+
"loss": 1.0032,
|
26200 |
+
"step": 3737
|
26201 |
+
},
|
26202 |
+
{
|
26203 |
+
"epoch": 0.8306666666666667,
|
26204 |
+
"grad_norm": 1.13922119140625,
|
26205 |
+
"learning_rate": 3.394209354120267e-05,
|
26206 |
+
"loss": 1.545,
|
26207 |
+
"step": 3738
|
26208 |
+
},
|
26209 |
+
{
|
26210 |
+
"epoch": 0.8308888888888889,
|
26211 |
+
"grad_norm": 0.9685484766960144,
|
26212 |
+
"learning_rate": 3.389755011135858e-05,
|
26213 |
+
"loss": 1.3771,
|
26214 |
+
"step": 3739
|
26215 |
+
},
|
26216 |
+
{
|
26217 |
+
"epoch": 0.8311111111111111,
|
26218 |
+
"grad_norm": 0.7709338665008545,
|
26219 |
+
"learning_rate": 3.3853006681514475e-05,
|
26220 |
+
"loss": 0.696,
|
26221 |
+
"step": 3740
|
26222 |
+
},
|
26223 |
+
{
|
26224 |
+
"epoch": 0.8313333333333334,
|
26225 |
+
"grad_norm": 1.0451520681381226,
|
26226 |
+
"learning_rate": 3.380846325167038e-05,
|
26227 |
+
"loss": 1.564,
|
26228 |
+
"step": 3741
|
26229 |
+
},
|
26230 |
+
{
|
26231 |
+
"epoch": 0.8315555555555556,
|
26232 |
+
"grad_norm": 1.1514785289764404,
|
26233 |
+
"learning_rate": 3.3763919821826286e-05,
|
26234 |
+
"loss": 1.4297,
|
26235 |
+
"step": 3742
|
26236 |
+
},
|
26237 |
+
{
|
26238 |
+
"epoch": 0.8317777777777777,
|
26239 |
+
"grad_norm": 1.3433165550231934,
|
26240 |
+
"learning_rate": 3.3719376391982185e-05,
|
26241 |
+
"loss": 1.7462,
|
26242 |
+
"step": 3743
|
26243 |
+
},
|
26244 |
+
{
|
26245 |
+
"epoch": 0.832,
|
26246 |
+
"grad_norm": 0.9320109486579895,
|
26247 |
+
"learning_rate": 3.367483296213808e-05,
|
26248 |
+
"loss": 0.95,
|
26249 |
+
"step": 3744
|
26250 |
+
},
|
26251 |
+
{
|
26252 |
+
"epoch": 0.8322222222222222,
|
26253 |
+
"grad_norm": 0.9835542440414429,
|
26254 |
+
"learning_rate": 3.363028953229399e-05,
|
26255 |
+
"loss": 1.1297,
|
26256 |
+
"step": 3745
|
26257 |
+
},
|
26258 |
+
{
|
26259 |
+
"epoch": 0.8324444444444444,
|
26260 |
+
"grad_norm": 0.18634167313575745,
|
26261 |
+
"learning_rate": 3.358574610244989e-05,
|
26262 |
+
"loss": 0.0346,
|
26263 |
+
"step": 3746
|
26264 |
+
},
|
26265 |
+
{
|
26266 |
+
"epoch": 0.8326666666666667,
|
26267 |
+
"grad_norm": 0.1696268618106842,
|
26268 |
+
"learning_rate": 3.354120267260579e-05,
|
26269 |
+
"loss": 0.034,
|
26270 |
+
"step": 3747
|
26271 |
+
},
|
26272 |
+
{
|
26273 |
+
"epoch": 0.8328888888888889,
|
26274 |
+
"grad_norm": 1.043257236480713,
|
26275 |
+
"learning_rate": 3.34966592427617e-05,
|
26276 |
+
"loss": 0.9229,
|
26277 |
+
"step": 3748
|
26278 |
+
},
|
26279 |
+
{
|
26280 |
+
"epoch": 0.8331111111111111,
|
26281 |
+
"grad_norm": 1.21977698802948,
|
26282 |
+
"learning_rate": 3.3452115812917596e-05,
|
26283 |
+
"loss": 1.0685,
|
26284 |
+
"step": 3749
|
26285 |
+
},
|
26286 |
+
{
|
26287 |
+
"epoch": 0.8333333333333334,
|
26288 |
+
"grad_norm": 1.1700232028961182,
|
26289 |
+
"learning_rate": 3.34075723830735e-05,
|
26290 |
+
"loss": 0.7485,
|
26291 |
+
"step": 3750
|
26292 |
+
},
|
26293 |
+
{
|
26294 |
+
"epoch": 0.8335555555555556,
|
26295 |
+
"grad_norm": 0.4857214093208313,
|
26296 |
+
"learning_rate": 3.33630289532294e-05,
|
26297 |
+
"loss": 0.9854,
|
26298 |
+
"step": 3751
|
26299 |
+
},
|
26300 |
+
{
|
26301 |
+
"epoch": 0.8337777777777777,
|
26302 |
+
"grad_norm": 0.8265995383262634,
|
26303 |
+
"learning_rate": 3.33184855233853e-05,
|
26304 |
+
"loss": 1.9879,
|
26305 |
+
"step": 3752
|
26306 |
+
},
|
26307 |
+
{
|
26308 |
+
"epoch": 0.834,
|
26309 |
+
"grad_norm": 0.48844748735427856,
|
26310 |
+
"learning_rate": 3.3273942093541204e-05,
|
26311 |
+
"loss": 1.0029,
|
26312 |
+
"step": 3753
|
26313 |
+
},
|
26314 |
+
{
|
26315 |
+
"epoch": 0.8342222222222222,
|
26316 |
+
"grad_norm": 0.6757328510284424,
|
26317 |
+
"learning_rate": 3.322939866369711e-05,
|
26318 |
+
"loss": 1.0597,
|
26319 |
+
"step": 3754
|
26320 |
+
},
|
26321 |
+
{
|
26322 |
+
"epoch": 0.8344444444444444,
|
26323 |
+
"grad_norm": 1.0443055629730225,
|
26324 |
+
"learning_rate": 3.318485523385301e-05,
|
26325 |
+
"loss": 2.3737,
|
26326 |
+
"step": 3755
|
26327 |
+
},
|
26328 |
+
{
|
26329 |
+
"epoch": 0.8346666666666667,
|
26330 |
+
"grad_norm": 0.8254187703132629,
|
26331 |
+
"learning_rate": 3.3140311804008914e-05,
|
26332 |
+
"loss": 1.9924,
|
26333 |
+
"step": 3756
|
26334 |
+
},
|
26335 |
+
{
|
26336 |
+
"epoch": 0.8348888888888889,
|
26337 |
+
"grad_norm": 0.09497911483049393,
|
26338 |
+
"learning_rate": 3.309576837416481e-05,
|
26339 |
+
"loss": 0.0113,
|
26340 |
+
"step": 3757
|
26341 |
+
},
|
26342 |
+
{
|
26343 |
+
"epoch": 0.8351111111111111,
|
26344 |
+
"grad_norm": 0.09401866048574448,
|
26345 |
+
"learning_rate": 3.305122494432071e-05,
|
26346 |
+
"loss": 0.0116,
|
26347 |
+
"step": 3758
|
26348 |
+
},
|
26349 |
+
{
|
26350 |
+
"epoch": 0.8353333333333334,
|
26351 |
+
"grad_norm": 0.09458266943693161,
|
26352 |
+
"learning_rate": 3.3006681514476616e-05,
|
26353 |
+
"loss": 0.0116,
|
26354 |
+
"step": 3759
|
26355 |
+
},
|
26356 |
+
{
|
26357 |
+
"epoch": 0.8355555555555556,
|
26358 |
+
"grad_norm": 0.09231861680746078,
|
26359 |
+
"learning_rate": 3.2962138084632515e-05,
|
26360 |
+
"loss": 0.0114,
|
26361 |
+
"step": 3760
|
26362 |
+
},
|
26363 |
+
{
|
26364 |
+
"epoch": 0.8357777777777777,
|
26365 |
+
"grad_norm": 1.012135624885559,
|
26366 |
+
"learning_rate": 3.291759465478842e-05,
|
26367 |
+
"loss": 2.2142,
|
26368 |
+
"step": 3761
|
26369 |
+
},
|
26370 |
+
{
|
26371 |
+
"epoch": 0.836,
|
26372 |
+
"grad_norm": 0.8345160484313965,
|
26373 |
+
"learning_rate": 3.2873051224944325e-05,
|
26374 |
+
"loss": 2.0887,
|
26375 |
+
"step": 3762
|
26376 |
+
},
|
26377 |
+
{
|
26378 |
+
"epoch": 0.8362222222222222,
|
26379 |
+
"grad_norm": 0.777621865272522,
|
26380 |
+
"learning_rate": 3.2828507795100224e-05,
|
26381 |
+
"loss": 1.7456,
|
26382 |
+
"step": 3763
|
26383 |
+
},
|
26384 |
+
{
|
26385 |
+
"epoch": 0.8364444444444444,
|
26386 |
+
"grad_norm": 0.9471651911735535,
|
26387 |
+
"learning_rate": 3.278396436525612e-05,
|
26388 |
+
"loss": 1.9976,
|
26389 |
+
"step": 3764
|
26390 |
+
},
|
26391 |
+
{
|
26392 |
+
"epoch": 0.8366666666666667,
|
26393 |
+
"grad_norm": 0.9481960535049438,
|
26394 |
+
"learning_rate": 3.273942093541203e-05,
|
26395 |
+
"loss": 2.1314,
|
26396 |
+
"step": 3765
|
26397 |
+
},
|
26398 |
+
{
|
26399 |
+
"epoch": 0.8368888888888889,
|
26400 |
+
"grad_norm": 1.0837010145187378,
|
26401 |
+
"learning_rate": 3.2694877505567926e-05,
|
26402 |
+
"loss": 2.0858,
|
26403 |
+
"step": 3766
|
26404 |
+
},
|
26405 |
+
{
|
26406 |
+
"epoch": 0.8371111111111111,
|
26407 |
+
"grad_norm": 1.0105607509613037,
|
26408 |
+
"learning_rate": 3.265033407572383e-05,
|
26409 |
+
"loss": 2.1732,
|
26410 |
+
"step": 3767
|
26411 |
+
},
|
26412 |
+
{
|
26413 |
+
"epoch": 0.8373333333333334,
|
26414 |
+
"grad_norm": 0.5977281928062439,
|
26415 |
+
"learning_rate": 3.260579064587974e-05,
|
26416 |
+
"loss": 0.8871,
|
26417 |
+
"step": 3768
|
26418 |
+
},
|
26419 |
+
{
|
26420 |
+
"epoch": 0.8375555555555556,
|
26421 |
+
"grad_norm": 1.2047114372253418,
|
26422 |
+
"learning_rate": 3.2561247216035636e-05,
|
26423 |
+
"loss": 2.0168,
|
26424 |
+
"step": 3769
|
26425 |
+
},
|
26426 |
+
{
|
26427 |
+
"epoch": 0.8377777777777777,
|
26428 |
+
"grad_norm": 0.8766410946846008,
|
26429 |
+
"learning_rate": 3.251670378619154e-05,
|
26430 |
+
"loss": 1.6475,
|
26431 |
+
"step": 3770
|
26432 |
+
},
|
26433 |
+
{
|
26434 |
+
"epoch": 0.838,
|
26435 |
+
"grad_norm": 0.9371228218078613,
|
26436 |
+
"learning_rate": 3.247216035634744e-05,
|
26437 |
+
"loss": 1.8688,
|
26438 |
+
"step": 3771
|
26439 |
+
},
|
26440 |
+
{
|
26441 |
+
"epoch": 0.8382222222222222,
|
26442 |
+
"grad_norm": 1.0611170530319214,
|
26443 |
+
"learning_rate": 3.242761692650334e-05,
|
26444 |
+
"loss": 1.8825,
|
26445 |
+
"step": 3772
|
26446 |
+
},
|
26447 |
+
{
|
26448 |
+
"epoch": 0.8384444444444444,
|
26449 |
+
"grad_norm": 1.0075304508209229,
|
26450 |
+
"learning_rate": 3.2383073496659244e-05,
|
26451 |
+
"loss": 2.1026,
|
26452 |
+
"step": 3773
|
26453 |
+
},
|
26454 |
+
{
|
26455 |
+
"epoch": 0.8386666666666667,
|
26456 |
+
"grad_norm": 0.06949839740991592,
|
26457 |
+
"learning_rate": 3.233853006681515e-05,
|
26458 |
+
"loss": 0.0162,
|
26459 |
+
"step": 3774
|
26460 |
+
},
|
26461 |
+
{
|
26462 |
+
"epoch": 0.8388888888888889,
|
26463 |
+
"grad_norm": 0.06908978521823883,
|
26464 |
+
"learning_rate": 3.229398663697105e-05,
|
26465 |
+
"loss": 0.0159,
|
26466 |
+
"step": 3775
|
26467 |
+
},
|
26468 |
+
{
|
26469 |
+
"epoch": 0.8391111111111111,
|
26470 |
+
"grad_norm": 0.905208945274353,
|
26471 |
+
"learning_rate": 3.224944320712695e-05,
|
26472 |
+
"loss": 1.7153,
|
26473 |
+
"step": 3776
|
26474 |
+
},
|
26475 |
+
{
|
26476 |
+
"epoch": 0.8393333333333334,
|
26477 |
+
"grad_norm": 0.8136224150657654,
|
26478 |
+
"learning_rate": 3.220489977728285e-05,
|
26479 |
+
"loss": 0.903,
|
26480 |
+
"step": 3777
|
26481 |
+
},
|
26482 |
+
{
|
26483 |
+
"epoch": 0.8395555555555556,
|
26484 |
+
"grad_norm": 0.9069592356681824,
|
26485 |
+
"learning_rate": 3.216035634743875e-05,
|
26486 |
+
"loss": 1.6758,
|
26487 |
+
"step": 3778
|
26488 |
+
},
|
26489 |
+
{
|
26490 |
+
"epoch": 0.8397777777777777,
|
26491 |
+
"grad_norm": 1.0851026773452759,
|
26492 |
+
"learning_rate": 3.2115812917594655e-05,
|
26493 |
+
"loss": 1.8553,
|
26494 |
+
"step": 3779
|
26495 |
+
},
|
26496 |
+
{
|
26497 |
+
"epoch": 0.84,
|
26498 |
+
"grad_norm": 0.998877763748169,
|
26499 |
+
"learning_rate": 3.207126948775056e-05,
|
26500 |
+
"loss": 1.8056,
|
26501 |
+
"step": 3780
|
26502 |
+
},
|
26503 |
+
{
|
26504 |
+
"epoch": 0.8402222222222222,
|
26505 |
+
"grad_norm": 0.0660950317978859,
|
26506 |
+
"learning_rate": 3.202672605790646e-05,
|
26507 |
+
"loss": 0.0175,
|
26508 |
+
"step": 3781
|
26509 |
+
},
|
26510 |
+
{
|
26511 |
+
"epoch": 0.8404444444444444,
|
26512 |
+
"grad_norm": 0.0653744786977768,
|
26513 |
+
"learning_rate": 3.1982182628062365e-05,
|
26514 |
+
"loss": 0.0173,
|
26515 |
+
"step": 3782
|
26516 |
+
},
|
26517 |
+
{
|
26518 |
+
"epoch": 0.8406666666666667,
|
26519 |
+
"grad_norm": 0.06411214917898178,
|
26520 |
+
"learning_rate": 3.193763919821826e-05,
|
26521 |
+
"loss": 0.0175,
|
26522 |
+
"step": 3783
|
26523 |
+
},
|
26524 |
+
{
|
26525 |
+
"epoch": 0.8408888888888889,
|
26526 |
+
"grad_norm": 0.06954985857009888,
|
26527 |
+
"learning_rate": 3.189309576837416e-05,
|
26528 |
+
"loss": 0.0174,
|
26529 |
+
"step": 3784
|
26530 |
+
},
|
26531 |
+
{
|
26532 |
+
"epoch": 0.8411111111111111,
|
26533 |
+
"grad_norm": 0.7279578447341919,
|
26534 |
+
"learning_rate": 3.184855233853007e-05,
|
26535 |
+
"loss": 1.0075,
|
26536 |
+
"step": 3785
|
26537 |
+
},
|
26538 |
+
{
|
26539 |
+
"epoch": 0.8413333333333334,
|
26540 |
+
"grad_norm": 1.0854923725128174,
|
26541 |
+
"learning_rate": 3.180400890868597e-05,
|
26542 |
+
"loss": 1.8502,
|
26543 |
+
"step": 3786
|
26544 |
+
},
|
26545 |
+
{
|
26546 |
+
"epoch": 0.8415555555555555,
|
26547 |
+
"grad_norm": 1.1248599290847778,
|
26548 |
+
"learning_rate": 3.175946547884187e-05,
|
26549 |
+
"loss": 1.769,
|
26550 |
+
"step": 3787
|
26551 |
+
},
|
26552 |
+
{
|
26553 |
+
"epoch": 0.8417777777777777,
|
26554 |
+
"grad_norm": 0.6535754203796387,
|
26555 |
+
"learning_rate": 3.1714922048997777e-05,
|
26556 |
+
"loss": 0.922,
|
26557 |
+
"step": 3788
|
26558 |
+
},
|
26559 |
+
{
|
26560 |
+
"epoch": 0.842,
|
26561 |
+
"grad_norm": 1.0051473379135132,
|
26562 |
+
"learning_rate": 3.167037861915368e-05,
|
26563 |
+
"loss": 1.6551,
|
26564 |
+
"step": 3789
|
26565 |
+
},
|
26566 |
+
{
|
26567 |
+
"epoch": 0.8422222222222222,
|
26568 |
+
"grad_norm": 1.0957441329956055,
|
26569 |
+
"learning_rate": 3.162583518930958e-05,
|
26570 |
+
"loss": 1.7058,
|
26571 |
+
"step": 3790
|
26572 |
+
},
|
26573 |
+
{
|
26574 |
+
"epoch": 0.8424444444444444,
|
26575 |
+
"grad_norm": 1.0055428743362427,
|
26576 |
+
"learning_rate": 3.158129175946548e-05,
|
26577 |
+
"loss": 1.5342,
|
26578 |
+
"step": 3791
|
26579 |
+
},
|
26580 |
+
{
|
26581 |
+
"epoch": 0.8426666666666667,
|
26582 |
+
"grad_norm": 0.8064576387405396,
|
26583 |
+
"learning_rate": 3.1536748329621384e-05,
|
26584 |
+
"loss": 0.8161,
|
26585 |
+
"step": 3792
|
26586 |
+
},
|
26587 |
+
{
|
26588 |
+
"epoch": 0.8428888888888889,
|
26589 |
+
"grad_norm": 1.1807235479354858,
|
26590 |
+
"learning_rate": 3.149220489977728e-05,
|
26591 |
+
"loss": 1.8853,
|
26592 |
+
"step": 3793
|
26593 |
+
},
|
26594 |
+
{
|
26595 |
+
"epoch": 0.8431111111111111,
|
26596 |
+
"grad_norm": 1.0103986263275146,
|
26597 |
+
"learning_rate": 3.144766146993319e-05,
|
26598 |
+
"loss": 1.3899,
|
26599 |
+
"step": 3794
|
26600 |
+
},
|
26601 |
+
{
|
26602 |
+
"epoch": 0.8433333333333334,
|
26603 |
+
"grad_norm": 0.2762221693992615,
|
26604 |
+
"learning_rate": 3.140311804008909e-05,
|
26605 |
+
"loss": 0.0367,
|
26606 |
+
"step": 3795
|
26607 |
+
},
|
26608 |
+
{
|
26609 |
+
"epoch": 0.8435555555555555,
|
26610 |
+
"grad_norm": 0.7262986302375793,
|
26611 |
+
"learning_rate": 3.135857461024499e-05,
|
26612 |
+
"loss": 0.7566,
|
26613 |
+
"step": 3796
|
26614 |
+
},
|
26615 |
+
{
|
26616 |
+
"epoch": 0.8437777777777777,
|
26617 |
+
"grad_norm": 1.0480473041534424,
|
26618 |
+
"learning_rate": 3.131403118040089e-05,
|
26619 |
+
"loss": 1.6141,
|
26620 |
+
"step": 3797
|
26621 |
+
},
|
26622 |
+
{
|
26623 |
+
"epoch": 0.844,
|
26624 |
+
"grad_norm": 1.0954132080078125,
|
26625 |
+
"learning_rate": 3.126948775055679e-05,
|
26626 |
+
"loss": 1.269,
|
26627 |
+
"step": 3798
|
26628 |
+
},
|
26629 |
+
{
|
26630 |
+
"epoch": 0.8442222222222222,
|
26631 |
+
"grad_norm": 0.7563920021057129,
|
26632 |
+
"learning_rate": 3.1224944320712695e-05,
|
26633 |
+
"loss": 0.6253,
|
26634 |
+
"step": 3799
|
26635 |
+
},
|
26636 |
+
{
|
26637 |
+
"epoch": 0.8444444444444444,
|
26638 |
+
"grad_norm": 0.7246300578117371,
|
26639 |
+
"learning_rate": 3.11804008908686e-05,
|
26640 |
+
"loss": 0.3778,
|
26641 |
+
"step": 3800
|
26642 |
+
},
|
26643 |
+
{
|
26644 |
+
"epoch": 0.8446666666666667,
|
26645 |
+
"grad_norm": 0.9608231782913208,
|
26646 |
+
"learning_rate": 3.11358574610245e-05,
|
26647 |
+
"loss": 2.1959,
|
26648 |
+
"step": 3801
|
26649 |
+
},
|
26650 |
+
{
|
26651 |
+
"epoch": 0.8448888888888889,
|
26652 |
+
"grad_norm": 0.04354199394583702,
|
26653 |
+
"learning_rate": 3.1091314031180404e-05,
|
26654 |
+
"loss": 0.0106,
|
26655 |
+
"step": 3802
|
26656 |
+
},
|
26657 |
+
{
|
26658 |
+
"epoch": 0.8451111111111111,
|
26659 |
+
"grad_norm": 0.8725544810295105,
|
26660 |
+
"learning_rate": 3.10467706013363e-05,
|
26661 |
+
"loss": 2.4019,
|
26662 |
+
"step": 3803
|
26663 |
+
},
|
26664 |
+
{
|
26665 |
+
"epoch": 0.8453333333333334,
|
26666 |
+
"grad_norm": 0.04339034482836723,
|
26667 |
+
"learning_rate": 3.10022271714922e-05,
|
26668 |
+
"loss": 0.0107,
|
26669 |
+
"step": 3804
|
26670 |
+
},
|
26671 |
+
{
|
26672 |
+
"epoch": 0.8455555555555555,
|
26673 |
+
"grad_norm": 0.8996299505233765,
|
26674 |
+
"learning_rate": 3.095768374164811e-05,
|
26675 |
+
"loss": 1.7978,
|
26676 |
+
"step": 3805
|
26677 |
+
},
|
26678 |
+
{
|
26679 |
+
"epoch": 0.8457777777777777,
|
26680 |
+
"grad_norm": 0.08517049998044968,
|
26681 |
+
"learning_rate": 3.091314031180401e-05,
|
26682 |
+
"loss": 0.0108,
|
26683 |
+
"step": 3806
|
26684 |
+
},
|
26685 |
+
{
|
26686 |
+
"epoch": 0.846,
|
26687 |
+
"grad_norm": 0.08175533264875412,
|
26688 |
+
"learning_rate": 3.086859688195991e-05,
|
26689 |
+
"loss": 0.0109,
|
26690 |
+
"step": 3807
|
26691 |
+
},
|
26692 |
+
{
|
26693 |
+
"epoch": 0.8462222222222222,
|
26694 |
+
"grad_norm": 0.06705193221569061,
|
26695 |
+
"learning_rate": 3.0824053452115816e-05,
|
26696 |
+
"loss": 0.0103,
|
26697 |
+
"step": 3808
|
26698 |
+
},
|
26699 |
+
{
|
26700 |
+
"epoch": 0.8464444444444444,
|
26701 |
+
"grad_norm": 0.08038879185914993,
|
26702 |
+
"learning_rate": 3.077951002227172e-05,
|
26703 |
+
"loss": 0.0106,
|
26704 |
+
"step": 3809
|
26705 |
+
},
|
26706 |
+
{
|
26707 |
+
"epoch": 0.8466666666666667,
|
26708 |
+
"grad_norm": 0.617675244808197,
|
26709 |
+
"learning_rate": 3.073496659242761e-05,
|
26710 |
+
"loss": 1.0167,
|
26711 |
+
"step": 3810
|
26712 |
+
},
|
26713 |
+
{
|
26714 |
+
"epoch": 0.8468888888888889,
|
26715 |
+
"grad_norm": 0.8487913012504578,
|
26716 |
+
"learning_rate": 3.069042316258352e-05,
|
26717 |
+
"loss": 1.8088,
|
26718 |
+
"step": 3811
|
26719 |
+
},
|
26720 |
+
{
|
26721 |
+
"epoch": 0.8471111111111111,
|
26722 |
+
"grad_norm": 0.8923436403274536,
|
26723 |
+
"learning_rate": 3.0645879732739424e-05,
|
26724 |
+
"loss": 1.8555,
|
26725 |
+
"step": 3812
|
26726 |
+
},
|
26727 |
+
{
|
26728 |
+
"epoch": 0.8473333333333334,
|
26729 |
+
"grad_norm": 0.9946725368499756,
|
26730 |
+
"learning_rate": 3.060133630289532e-05,
|
26731 |
+
"loss": 2.1355,
|
26732 |
+
"step": 3813
|
26733 |
+
},
|
26734 |
+
{
|
26735 |
+
"epoch": 0.8475555555555555,
|
26736 |
+
"grad_norm": 0.8608193397521973,
|
26737 |
+
"learning_rate": 3.055679287305123e-05,
|
26738 |
+
"loss": 1.8501,
|
26739 |
+
"step": 3814
|
26740 |
+
},
|
26741 |
+
{
|
26742 |
+
"epoch": 0.8477777777777777,
|
26743 |
+
"grad_norm": 1.0533936023712158,
|
26744 |
+
"learning_rate": 3.051224944320713e-05,
|
26745 |
+
"loss": 2.3759,
|
26746 |
+
"step": 3815
|
26747 |
+
},
|
26748 |
+
{
|
26749 |
+
"epoch": 0.848,
|
26750 |
+
"grad_norm": 0.9395473003387451,
|
26751 |
+
"learning_rate": 3.046770601336303e-05,
|
26752 |
+
"loss": 1.8055,
|
26753 |
+
"step": 3816
|
26754 |
+
},
|
26755 |
+
{
|
26756 |
+
"epoch": 0.8482222222222222,
|
26757 |
+
"grad_norm": 0.868739902973175,
|
26758 |
+
"learning_rate": 3.0423162583518934e-05,
|
26759 |
+
"loss": 1.9014,
|
26760 |
+
"step": 3817
|
26761 |
+
},
|
26762 |
+
{
|
26763 |
+
"epoch": 0.8484444444444444,
|
26764 |
+
"grad_norm": 0.9286447167396545,
|
26765 |
+
"learning_rate": 3.0378619153674836e-05,
|
26766 |
+
"loss": 2.1256,
|
26767 |
+
"step": 3818
|
26768 |
+
},
|
26769 |
+
{
|
26770 |
+
"epoch": 0.8486666666666667,
|
26771 |
+
"grad_norm": 0.9453836679458618,
|
26772 |
+
"learning_rate": 3.0334075723830734e-05,
|
26773 |
+
"loss": 1.9572,
|
26774 |
+
"step": 3819
|
26775 |
+
},
|
26776 |
+
{
|
26777 |
+
"epoch": 0.8488888888888889,
|
26778 |
+
"grad_norm": 1.1995909214019775,
|
26779 |
+
"learning_rate": 3.028953229398664e-05,
|
26780 |
+
"loss": 1.7376,
|
26781 |
+
"step": 3820
|
26782 |
+
},
|
26783 |
+
{
|
26784 |
+
"epoch": 0.8491111111111111,
|
26785 |
+
"grad_norm": 1.2044036388397217,
|
26786 |
+
"learning_rate": 3.024498886414254e-05,
|
26787 |
+
"loss": 2.1343,
|
26788 |
+
"step": 3821
|
26789 |
+
},
|
26790 |
+
{
|
26791 |
+
"epoch": 0.8493333333333334,
|
26792 |
+
"grad_norm": 0.8160643577575684,
|
26793 |
+
"learning_rate": 3.020044543429844e-05,
|
26794 |
+
"loss": 0.9794,
|
26795 |
+
"step": 3822
|
26796 |
+
},
|
26797 |
+
{
|
26798 |
+
"epoch": 0.8495555555555555,
|
26799 |
+
"grad_norm": 0.06719803065061569,
|
26800 |
+
"learning_rate": 3.0155902004454346e-05,
|
26801 |
+
"loss": 0.0154,
|
26802 |
+
"step": 3823
|
26803 |
+
},
|
26804 |
+
{
|
26805 |
+
"epoch": 0.8497777777777777,
|
26806 |
+
"grad_norm": 0.06740820407867432,
|
26807 |
+
"learning_rate": 3.0111358574610248e-05,
|
26808 |
+
"loss": 0.0154,
|
26809 |
+
"step": 3824
|
26810 |
+
},
|
26811 |
+
{
|
26812 |
+
"epoch": 0.85,
|
26813 |
+
"grad_norm": 1.0504337549209595,
|
26814 |
+
"learning_rate": 3.0066815144766146e-05,
|
26815 |
+
"loss": 1.8658,
|
26816 |
+
"step": 3825
|
26817 |
+
},
|
26818 |
+
{
|
26819 |
+
"epoch": 0.8502222222222222,
|
26820 |
+
"grad_norm": 0.7439045906066895,
|
26821 |
+
"learning_rate": 3.002227171492205e-05,
|
26822 |
+
"loss": 1.1039,
|
26823 |
+
"step": 3826
|
26824 |
+
},
|
26825 |
+
{
|
26826 |
+
"epoch": 0.8504444444444444,
|
26827 |
+
"grad_norm": 0.1253952980041504,
|
26828 |
+
"learning_rate": 2.9977728285077953e-05,
|
26829 |
+
"loss": 0.0206,
|
26830 |
+
"step": 3827
|
26831 |
+
},
|
26832 |
+
{
|
26833 |
+
"epoch": 0.8506666666666667,
|
26834 |
+
"grad_norm": 1.0089833736419678,
|
26835 |
+
"learning_rate": 2.9933184855233852e-05,
|
26836 |
+
"loss": 2.0178,
|
26837 |
+
"step": 3828
|
26838 |
+
},
|
26839 |
+
{
|
26840 |
+
"epoch": 0.8508888888888889,
|
26841 |
+
"grad_norm": 1.0395070314407349,
|
26842 |
+
"learning_rate": 2.9888641425389757e-05,
|
26843 |
+
"loss": 1.9692,
|
26844 |
+
"step": 3829
|
26845 |
+
},
|
26846 |
+
{
|
26847 |
+
"epoch": 0.8511111111111112,
|
26848 |
+
"grad_norm": 1.0526185035705566,
|
26849 |
+
"learning_rate": 2.9844097995545663e-05,
|
26850 |
+
"loss": 1.8065,
|
26851 |
+
"step": 3830
|
26852 |
+
},
|
26853 |
+
{
|
26854 |
+
"epoch": 0.8513333333333334,
|
26855 |
+
"grad_norm": 1.0034129619598389,
|
26856 |
+
"learning_rate": 2.9799554565701558e-05,
|
26857 |
+
"loss": 1.679,
|
26858 |
+
"step": 3831
|
26859 |
+
},
|
26860 |
+
{
|
26861 |
+
"epoch": 0.8515555555555555,
|
26862 |
+
"grad_norm": 0.06555074453353882,
|
26863 |
+
"learning_rate": 2.9755011135857463e-05,
|
26864 |
+
"loss": 0.017,
|
26865 |
+
"step": 3832
|
26866 |
+
},
|
26867 |
+
{
|
26868 |
+
"epoch": 0.8517777777777777,
|
26869 |
+
"grad_norm": 0.8089559078216553,
|
26870 |
+
"learning_rate": 2.9710467706013362e-05,
|
26871 |
+
"loss": 0.9377,
|
26872 |
+
"step": 3833
|
26873 |
+
},
|
26874 |
+
{
|
26875 |
+
"epoch": 0.852,
|
26876 |
+
"grad_norm": 0.7607543468475342,
|
26877 |
+
"learning_rate": 2.9665924276169267e-05,
|
26878 |
+
"loss": 0.873,
|
26879 |
+
"step": 3834
|
26880 |
+
},
|
26881 |
+
{
|
26882 |
+
"epoch": 0.8522222222222222,
|
26883 |
+
"grad_norm": 0.08201993256807327,
|
26884 |
+
"learning_rate": 2.962138084632517e-05,
|
26885 |
+
"loss": 0.0183,
|
26886 |
+
"step": 3835
|
26887 |
+
},
|
26888 |
+
{
|
26889 |
+
"epoch": 0.8524444444444444,
|
26890 |
+
"grad_norm": 0.6691009402275085,
|
26891 |
+
"learning_rate": 2.9576837416481068e-05,
|
26892 |
+
"loss": 0.6488,
|
26893 |
+
"step": 3836
|
26894 |
+
},
|
26895 |
+
{
|
26896 |
+
"epoch": 0.8526666666666667,
|
26897 |
+
"grad_norm": 1.0818275213241577,
|
26898 |
+
"learning_rate": 2.9532293986636973e-05,
|
26899 |
+
"loss": 1.6231,
|
26900 |
+
"step": 3837
|
26901 |
+
},
|
26902 |
+
{
|
26903 |
+
"epoch": 0.8528888888888889,
|
26904 |
+
"grad_norm": 0.10234081745147705,
|
26905 |
+
"learning_rate": 2.9487750556792875e-05,
|
26906 |
+
"loss": 0.0249,
|
26907 |
+
"step": 3838
|
26908 |
+
},
|
26909 |
+
{
|
26910 |
+
"epoch": 0.8531111111111112,
|
26911 |
+
"grad_norm": 0.7301368117332458,
|
26912 |
+
"learning_rate": 2.9443207126948774e-05,
|
26913 |
+
"loss": 0.7285,
|
26914 |
+
"step": 3839
|
26915 |
+
},
|
26916 |
+
{
|
26917 |
+
"epoch": 0.8533333333333334,
|
26918 |
+
"grad_norm": 1.020973563194275,
|
26919 |
+
"learning_rate": 2.939866369710468e-05,
|
26920 |
+
"loss": 1.5453,
|
26921 |
+
"step": 3840
|
26922 |
+
},
|
26923 |
+
{
|
26924 |
+
"epoch": 0.8535555555555555,
|
26925 |
+
"grad_norm": 1.161118745803833,
|
26926 |
+
"learning_rate": 2.935412026726058e-05,
|
26927 |
+
"loss": 1.5789,
|
26928 |
+
"step": 3841
|
26929 |
+
},
|
26930 |
+
{
|
26931 |
+
"epoch": 0.8537777777777777,
|
26932 |
+
"grad_norm": 1.1855006217956543,
|
26933 |
+
"learning_rate": 2.930957683741648e-05,
|
26934 |
+
"loss": 1.443,
|
26935 |
+
"step": 3842
|
26936 |
+
},
|
26937 |
+
{
|
26938 |
+
"epoch": 0.854,
|
26939 |
+
"grad_norm": 1.0485907793045044,
|
26940 |
+
"learning_rate": 2.9265033407572385e-05,
|
26941 |
+
"loss": 1.2626,
|
26942 |
+
"step": 3843
|
26943 |
+
},
|
26944 |
+
{
|
26945 |
+
"epoch": 0.8542222222222222,
|
26946 |
+
"grad_norm": 1.0825096368789673,
|
26947 |
+
"learning_rate": 2.9220489977728287e-05,
|
26948 |
+
"loss": 1.4062,
|
26949 |
+
"step": 3844
|
26950 |
+
},
|
26951 |
+
{
|
26952 |
+
"epoch": 0.8544444444444445,
|
26953 |
+
"grad_norm": 0.16687047481536865,
|
26954 |
+
"learning_rate": 2.9175946547884186e-05,
|
26955 |
+
"loss": 0.0262,
|
26956 |
+
"step": 3845
|
26957 |
+
},
|
26958 |
+
{
|
26959 |
+
"epoch": 0.8546666666666667,
|
26960 |
+
"grad_norm": 1.0403611660003662,
|
26961 |
+
"learning_rate": 2.913140311804009e-05,
|
26962 |
+
"loss": 1.4555,
|
26963 |
+
"step": 3846
|
26964 |
+
},
|
26965 |
+
{
|
26966 |
+
"epoch": 0.8548888888888889,
|
26967 |
+
"grad_norm": 1.069176197052002,
|
26968 |
+
"learning_rate": 2.9086859688195993e-05,
|
26969 |
+
"loss": 1.2518,
|
26970 |
+
"step": 3847
|
26971 |
+
},
|
26972 |
+
{
|
26973 |
+
"epoch": 0.8551111111111112,
|
26974 |
+
"grad_norm": 1.2168667316436768,
|
26975 |
+
"learning_rate": 2.904231625835189e-05,
|
26976 |
+
"loss": 1.289,
|
26977 |
+
"step": 3848
|
26978 |
+
},
|
26979 |
+
{
|
26980 |
+
"epoch": 0.8553333333333333,
|
26981 |
+
"grad_norm": 0.5977094769477844,
|
26982 |
+
"learning_rate": 2.8997772828507797e-05,
|
26983 |
+
"loss": 0.4731,
|
26984 |
+
"step": 3849
|
26985 |
+
},
|
26986 |
+
{
|
26987 |
+
"epoch": 0.8555555555555555,
|
26988 |
+
"grad_norm": 0.543451189994812,
|
26989 |
+
"learning_rate": 2.89532293986637e-05,
|
26990 |
+
"loss": 0.3367,
|
26991 |
+
"step": 3850
|
26992 |
+
},
|
26993 |
+
{
|
26994 |
+
"epoch": 0.8557777777777777,
|
26995 |
+
"grad_norm": 0.044504791498184204,
|
26996 |
+
"learning_rate": 2.8908685968819597e-05,
|
26997 |
+
"loss": 0.0103,
|
26998 |
+
"step": 3851
|
26999 |
+
},
|
27000 |
+
{
|
27001 |
+
"epoch": 0.856,
|
27002 |
+
"grad_norm": 0.8173375725746155,
|
27003 |
+
"learning_rate": 2.8864142538975503e-05,
|
27004 |
+
"loss": 2.0658,
|
27005 |
+
"step": 3852
|
27006 |
+
},
|
27007 |
+
{
|
27008 |
+
"epoch": 0.8562222222222222,
|
27009 |
+
"grad_norm": 0.6008175015449524,
|
27010 |
+
"learning_rate": 2.8819599109131408e-05,
|
27011 |
+
"loss": 1.0048,
|
27012 |
+
"step": 3853
|
27013 |
+
},
|
27014 |
+
{
|
27015 |
+
"epoch": 0.8564444444444445,
|
27016 |
+
"grad_norm": 0.6246810555458069,
|
27017 |
+
"learning_rate": 2.8775055679287303e-05,
|
27018 |
+
"loss": 1.0661,
|
27019 |
+
"step": 3854
|
27020 |
+
},
|
27021 |
+
{
|
27022 |
+
"epoch": 0.8566666666666667,
|
27023 |
+
"grad_norm": 0.9632955193519592,
|
27024 |
+
"learning_rate": 2.873051224944321e-05,
|
27025 |
+
"loss": 2.273,
|
27026 |
+
"step": 3855
|
27027 |
+
},
|
27028 |
+
{
|
27029 |
+
"epoch": 0.8568888888888889,
|
27030 |
+
"grad_norm": 0.8222072720527649,
|
27031 |
+
"learning_rate": 2.8685968819599114e-05,
|
27032 |
+
"loss": 2.0065,
|
27033 |
+
"step": 3856
|
27034 |
+
},
|
27035 |
+
{
|
27036 |
+
"epoch": 0.8571111111111112,
|
27037 |
+
"grad_norm": 0.6057097911834717,
|
27038 |
+
"learning_rate": 2.8641425389755013e-05,
|
27039 |
+
"loss": 1.1629,
|
27040 |
+
"step": 3857
|
27041 |
+
},
|
27042 |
+
{
|
27043 |
+
"epoch": 0.8573333333333333,
|
27044 |
+
"grad_norm": 0.06852009892463684,
|
27045 |
+
"learning_rate": 2.8596881959910915e-05,
|
27046 |
+
"loss": 0.0104,
|
27047 |
+
"step": 3858
|
27048 |
+
},
|
27049 |
+
{
|
27050 |
+
"epoch": 0.8575555555555555,
|
27051 |
+
"grad_norm": 0.06707873195409775,
|
27052 |
+
"learning_rate": 2.855233853006682e-05,
|
27053 |
+
"loss": 0.0103,
|
27054 |
+
"step": 3859
|
27055 |
+
},
|
27056 |
+
{
|
27057 |
+
"epoch": 0.8577777777777778,
|
27058 |
+
"grad_norm": 0.0674692690372467,
|
27059 |
+
"learning_rate": 2.850779510022272e-05,
|
27060 |
+
"loss": 0.0099,
|
27061 |
+
"step": 3860
|
27062 |
+
},
|
27063 |
+
{
|
27064 |
+
"epoch": 0.858,
|
27065 |
+
"grad_norm": 0.5297547578811646,
|
27066 |
+
"learning_rate": 2.846325167037862e-05,
|
27067 |
+
"loss": 1.1045,
|
27068 |
+
"step": 3861
|
27069 |
+
},
|
27070 |
+
{
|
27071 |
+
"epoch": 0.8582222222222222,
|
27072 |
+
"grad_norm": 0.9173485040664673,
|
27073 |
+
"learning_rate": 2.8418708240534526e-05,
|
27074 |
+
"loss": 1.9225,
|
27075 |
+
"step": 3862
|
27076 |
+
},
|
27077 |
+
{
|
27078 |
+
"epoch": 0.8584444444444445,
|
27079 |
+
"grad_norm": 0.9960424900054932,
|
27080 |
+
"learning_rate": 2.8374164810690424e-05,
|
27081 |
+
"loss": 2.0529,
|
27082 |
+
"step": 3863
|
27083 |
+
},
|
27084 |
+
{
|
27085 |
+
"epoch": 0.8586666666666667,
|
27086 |
+
"grad_norm": 0.6144242286682129,
|
27087 |
+
"learning_rate": 2.8329621380846326e-05,
|
27088 |
+
"loss": 1.0756,
|
27089 |
+
"step": 3864
|
27090 |
+
},
|
27091 |
+
{
|
27092 |
+
"epoch": 0.8588888888888889,
|
27093 |
+
"grad_norm": 0.9492395520210266,
|
27094 |
+
"learning_rate": 2.8285077951002232e-05,
|
27095 |
+
"loss": 1.5811,
|
27096 |
+
"step": 3865
|
27097 |
+
},
|
27098 |
+
{
|
27099 |
+
"epoch": 0.8591111111111112,
|
27100 |
+
"grad_norm": 1.0924068689346313,
|
27101 |
+
"learning_rate": 2.824053452115813e-05,
|
27102 |
+
"loss": 2.1395,
|
27103 |
+
"step": 3866
|
27104 |
+
},
|
27105 |
+
{
|
27106 |
+
"epoch": 0.8593333333333333,
|
27107 |
+
"grad_norm": 1.214991569519043,
|
27108 |
+
"learning_rate": 2.8195991091314032e-05,
|
27109 |
+
"loss": 2.3004,
|
27110 |
+
"step": 3867
|
27111 |
+
},
|
27112 |
+
{
|
27113 |
+
"epoch": 0.8595555555555555,
|
27114 |
+
"grad_norm": 0.9233739972114563,
|
27115 |
+
"learning_rate": 2.815144766146993e-05,
|
27116 |
+
"loss": 1.9741,
|
27117 |
+
"step": 3868
|
27118 |
+
},
|
27119 |
+
{
|
27120 |
+
"epoch": 0.8597777777777778,
|
27121 |
+
"grad_norm": 0.9544225931167603,
|
27122 |
+
"learning_rate": 2.8106904231625836e-05,
|
27123 |
+
"loss": 2.0073,
|
27124 |
+
"step": 3869
|
27125 |
+
},
|
27126 |
+
{
|
27127 |
+
"epoch": 0.86,
|
27128 |
+
"grad_norm": 1.0409972667694092,
|
27129 |
+
"learning_rate": 2.8062360801781738e-05,
|
27130 |
+
"loss": 2.0343,
|
27131 |
+
"step": 3870
|
27132 |
+
},
|
27133 |
+
{
|
27134 |
+
"epoch": 0.8602222222222222,
|
27135 |
+
"grad_norm": 1.0500308275222778,
|
27136 |
+
"learning_rate": 2.8017817371937637e-05,
|
27137 |
+
"loss": 1.8736,
|
27138 |
+
"step": 3871
|
27139 |
+
},
|
27140 |
+
{
|
27141 |
+
"epoch": 0.8604444444444445,
|
27142 |
+
"grad_norm": 1.0792829990386963,
|
27143 |
+
"learning_rate": 2.7973273942093542e-05,
|
27144 |
+
"loss": 1.9845,
|
27145 |
+
"step": 3872
|
27146 |
+
},
|
27147 |
+
{
|
27148 |
+
"epoch": 0.8606666666666667,
|
27149 |
+
"grad_norm": 1.1363780498504639,
|
27150 |
+
"learning_rate": 2.7928730512249447e-05,
|
27151 |
+
"loss": 1.8806,
|
27152 |
+
"step": 3873
|
27153 |
+
},
|
27154 |
+
{
|
27155 |
+
"epoch": 0.8608888888888889,
|
27156 |
+
"grad_norm": 0.6764510869979858,
|
27157 |
+
"learning_rate": 2.7884187082405343e-05,
|
27158 |
+
"loss": 0.9852,
|
27159 |
+
"step": 3874
|
27160 |
+
},
|
27161 |
+
{
|
27162 |
+
"epoch": 0.8611111111111112,
|
27163 |
+
"grad_norm": 0.06674336642026901,
|
27164 |
+
"learning_rate": 2.7839643652561248e-05,
|
27165 |
+
"loss": 0.0153,
|
27166 |
+
"step": 3875
|
27167 |
+
},
|
27168 |
+
{
|
27169 |
+
"epoch": 0.8613333333333333,
|
27170 |
+
"grad_norm": 0.06754778325557709,
|
27171 |
+
"learning_rate": 2.7795100222717153e-05,
|
27172 |
+
"loss": 0.0156,
|
27173 |
+
"step": 3876
|
27174 |
+
},
|
27175 |
+
{
|
27176 |
+
"epoch": 0.8615555555555555,
|
27177 |
+
"grad_norm": 0.6871387362480164,
|
27178 |
+
"learning_rate": 2.7750556792873052e-05,
|
27179 |
+
"loss": 0.8689,
|
27180 |
+
"step": 3877
|
27181 |
+
},
|
27182 |
+
{
|
27183 |
+
"epoch": 0.8617777777777778,
|
27184 |
+
"grad_norm": 1.1654753684997559,
|
27185 |
+
"learning_rate": 2.7706013363028954e-05,
|
27186 |
+
"loss": 1.9632,
|
27187 |
+
"step": 3878
|
27188 |
+
},
|
27189 |
+
{
|
27190 |
+
"epoch": 0.862,
|
27191 |
+
"grad_norm": 0.13107286393642426,
|
27192 |
+
"learning_rate": 2.766146993318486e-05,
|
27193 |
+
"loss": 0.0207,
|
27194 |
+
"step": 3879
|
27195 |
+
},
|
27196 |
+
{
|
27197 |
+
"epoch": 0.8622222222222222,
|
27198 |
+
"grad_norm": 1.0098730325698853,
|
27199 |
+
"learning_rate": 2.7616926503340758e-05,
|
27200 |
+
"loss": 1.7388,
|
27201 |
+
"step": 3880
|
27202 |
+
},
|
27203 |
+
{
|
27204 |
+
"epoch": 0.8624444444444445,
|
27205 |
+
"grad_norm": 0.9629087448120117,
|
27206 |
+
"learning_rate": 2.757238307349666e-05,
|
27207 |
+
"loss": 1.7969,
|
27208 |
+
"step": 3881
|
27209 |
+
},
|
27210 |
+
{
|
27211 |
+
"epoch": 0.8626666666666667,
|
27212 |
+
"grad_norm": 0.9217532873153687,
|
27213 |
+
"learning_rate": 2.7527839643652565e-05,
|
27214 |
+
"loss": 1.9322,
|
27215 |
+
"step": 3882
|
27216 |
+
},
|
27217 |
+
{
|
27218 |
+
"epoch": 0.8628888888888889,
|
27219 |
+
"grad_norm": 1.0283830165863037,
|
27220 |
+
"learning_rate": 2.7483296213808464e-05,
|
27221 |
+
"loss": 1.7681,
|
27222 |
+
"step": 3883
|
27223 |
+
},
|
27224 |
+
{
|
27225 |
+
"epoch": 0.8631111111111112,
|
27226 |
+
"grad_norm": 0.701818585395813,
|
27227 |
+
"learning_rate": 2.7438752783964366e-05,
|
27228 |
+
"loss": 0.8642,
|
27229 |
+
"step": 3884
|
27230 |
+
},
|
27231 |
+
{
|
27232 |
+
"epoch": 0.8633333333333333,
|
27233 |
+
"grad_norm": 0.7634962201118469,
|
27234 |
+
"learning_rate": 2.739420935412027e-05,
|
27235 |
+
"loss": 0.7195,
|
27236 |
+
"step": 3885
|
27237 |
+
},
|
27238 |
+
{
|
27239 |
+
"epoch": 0.8635555555555555,
|
27240 |
+
"grad_norm": 0.9613010287284851,
|
27241 |
+
"learning_rate": 2.734966592427617e-05,
|
27242 |
+
"loss": 1.7299,
|
27243 |
+
"step": 3886
|
27244 |
+
},
|
27245 |
+
{
|
27246 |
+
"epoch": 0.8637777777777778,
|
27247 |
+
"grad_norm": 0.8127443790435791,
|
27248 |
+
"learning_rate": 2.730512249443207e-05,
|
27249 |
+
"loss": 0.9909,
|
27250 |
+
"step": 3887
|
27251 |
+
},
|
27252 |
+
{
|
27253 |
+
"epoch": 0.864,
|
27254 |
+
"grad_norm": 0.7633342146873474,
|
27255 |
+
"learning_rate": 2.7260579064587977e-05,
|
27256 |
+
"loss": 0.7784,
|
27257 |
+
"step": 3888
|
27258 |
+
},
|
27259 |
+
{
|
27260 |
+
"epoch": 0.8642222222222222,
|
27261 |
+
"grad_norm": 0.8209825754165649,
|
27262 |
+
"learning_rate": 2.7216035634743876e-05,
|
27263 |
+
"loss": 0.8605,
|
27264 |
+
"step": 3889
|
27265 |
+
},
|
27266 |
+
{
|
27267 |
+
"epoch": 0.8644444444444445,
|
27268 |
+
"grad_norm": 1.1006879806518555,
|
27269 |
+
"learning_rate": 2.7171492204899778e-05,
|
27270 |
+
"loss": 1.4274,
|
27271 |
+
"step": 3890
|
27272 |
+
},
|
27273 |
+
{
|
27274 |
+
"epoch": 0.8646666666666667,
|
27275 |
+
"grad_norm": 0.9458972811698914,
|
27276 |
+
"learning_rate": 2.7126948775055683e-05,
|
27277 |
+
"loss": 1.4813,
|
27278 |
+
"step": 3891
|
27279 |
+
},
|
27280 |
+
{
|
27281 |
+
"epoch": 0.8648888888888889,
|
27282 |
+
"grad_norm": 1.3641767501831055,
|
27283 |
+
"learning_rate": 2.708240534521158e-05,
|
27284 |
+
"loss": 1.5427,
|
27285 |
+
"step": 3892
|
27286 |
+
},
|
27287 |
+
{
|
27288 |
+
"epoch": 0.8651111111111112,
|
27289 |
+
"grad_norm": 1.0138379335403442,
|
27290 |
+
"learning_rate": 2.7037861915367484e-05,
|
27291 |
+
"loss": 1.4602,
|
27292 |
+
"step": 3893
|
27293 |
+
},
|
27294 |
+
{
|
27295 |
+
"epoch": 0.8653333333333333,
|
27296 |
+
"grad_norm": 1.0641552209854126,
|
27297 |
+
"learning_rate": 2.699331848552339e-05,
|
27298 |
+
"loss": 1.2653,
|
27299 |
+
"step": 3894
|
27300 |
+
},
|
27301 |
+
{
|
27302 |
+
"epoch": 0.8655555555555555,
|
27303 |
+
"grad_norm": 1.2748581171035767,
|
27304 |
+
"learning_rate": 2.6948775055679287e-05,
|
27305 |
+
"loss": 1.3019,
|
27306 |
+
"step": 3895
|
27307 |
+
},
|
27308 |
+
{
|
27309 |
+
"epoch": 0.8657777777777778,
|
27310 |
+
"grad_norm": 1.1393803358078003,
|
27311 |
+
"learning_rate": 2.6904231625835193e-05,
|
27312 |
+
"loss": 1.331,
|
27313 |
+
"step": 3896
|
27314 |
+
},
|
27315 |
+
{
|
27316 |
+
"epoch": 0.866,
|
27317 |
+
"grad_norm": 1.0819334983825684,
|
27318 |
+
"learning_rate": 2.6859688195991095e-05,
|
27319 |
+
"loss": 1.1292,
|
27320 |
+
"step": 3897
|
27321 |
+
},
|
27322 |
+
{
|
27323 |
+
"epoch": 0.8662222222222222,
|
27324 |
+
"grad_norm": 0.14373019337654114,
|
27325 |
+
"learning_rate": 2.6815144766146993e-05,
|
27326 |
+
"loss": 0.0321,
|
27327 |
+
"step": 3898
|
27328 |
+
},
|
27329 |
+
{
|
27330 |
+
"epoch": 0.8664444444444445,
|
27331 |
+
"grad_norm": 0.14658400416374207,
|
27332 |
+
"learning_rate": 2.67706013363029e-05,
|
27333 |
+
"loss": 0.032,
|
27334 |
+
"step": 3899
|
27335 |
+
},
|
27336 |
+
{
|
27337 |
+
"epoch": 0.8666666666666667,
|
27338 |
+
"grad_norm": 1.1421598196029663,
|
27339 |
+
"learning_rate": 2.67260579064588e-05,
|
27340 |
+
"loss": 1.0447,
|
27341 |
+
"step": 3900
|
27342 |
+
},
|
27343 |
+
{
|
27344 |
+
"epoch": 0.8668888888888889,
|
27345 |
+
"grad_norm": 0.6876357793807983,
|
27346 |
+
"learning_rate": 2.66815144766147e-05,
|
27347 |
+
"loss": 1.2227,
|
27348 |
+
"step": 3901
|
27349 |
+
},
|
27350 |
+
{
|
27351 |
+
"epoch": 0.8671111111111112,
|
27352 |
+
"grad_norm": 0.0448576956987381,
|
27353 |
+
"learning_rate": 2.6636971046770605e-05,
|
27354 |
+
"loss": 0.0104,
|
27355 |
+
"step": 3902
|
27356 |
+
},
|
27357 |
+
{
|
27358 |
+
"epoch": 0.8673333333333333,
|
27359 |
+
"grad_norm": 0.6660778522491455,
|
27360 |
+
"learning_rate": 2.6592427616926503e-05,
|
27361 |
+
"loss": 1.121,
|
27362 |
+
"step": 3903
|
27363 |
+
},
|
27364 |
+
{
|
27365 |
+
"epoch": 0.8675555555555555,
|
27366 |
+
"grad_norm": 0.043087027966976166,
|
27367 |
+
"learning_rate": 2.6547884187082405e-05,
|
27368 |
+
"loss": 0.0103,
|
27369 |
+
"step": 3904
|
27370 |
+
},
|
27371 |
+
{
|
27372 |
+
"epoch": 0.8677777777777778,
|
27373 |
+
"grad_norm": 0.5372818112373352,
|
27374 |
+
"learning_rate": 2.650334075723831e-05,
|
27375 |
+
"loss": 1.0993,
|
27376 |
+
"step": 3905
|
27377 |
+
},
|
27378 |
+
{
|
27379 |
+
"epoch": 0.868,
|
27380 |
+
"grad_norm": 0.9083240628242493,
|
27381 |
+
"learning_rate": 2.645879732739421e-05,
|
27382 |
+
"loss": 2.459,
|
27383 |
+
"step": 3906
|
27384 |
+
},
|
27385 |
+
{
|
27386 |
+
"epoch": 0.8682222222222222,
|
27387 |
+
"grad_norm": 0.8523256182670593,
|
27388 |
+
"learning_rate": 2.641425389755011e-05,
|
27389 |
+
"loss": 2.1683,
|
27390 |
+
"step": 3907
|
27391 |
+
},
|
27392 |
+
{
|
27393 |
+
"epoch": 0.8684444444444445,
|
27394 |
+
"grad_norm": 0.6197808384895325,
|
27395 |
+
"learning_rate": 2.6369710467706016e-05,
|
27396 |
+
"loss": 1.0535,
|
27397 |
+
"step": 3908
|
27398 |
+
},
|
27399 |
+
{
|
27400 |
+
"epoch": 0.8686666666666667,
|
27401 |
+
"grad_norm": 0.8953803181648254,
|
27402 |
+
"learning_rate": 2.6325167037861915e-05,
|
27403 |
+
"loss": 1.9434,
|
27404 |
+
"step": 3909
|
27405 |
+
},
|
27406 |
+
{
|
27407 |
+
"epoch": 0.8688888888888889,
|
27408 |
+
"grad_norm": 0.9139788150787354,
|
27409 |
+
"learning_rate": 2.6280623608017817e-05,
|
27410 |
+
"loss": 1.8545,
|
27411 |
+
"step": 3910
|
27412 |
+
},
|
27413 |
+
{
|
27414 |
+
"epoch": 0.8691111111111111,
|
27415 |
+
"grad_norm": 0.8638214468955994,
|
27416 |
+
"learning_rate": 2.6236080178173722e-05,
|
27417 |
+
"loss": 1.8329,
|
27418 |
+
"step": 3911
|
27419 |
+
},
|
27420 |
+
{
|
27421 |
+
"epoch": 0.8693333333333333,
|
27422 |
+
"grad_norm": 0.8344167470932007,
|
27423 |
+
"learning_rate": 2.619153674832962e-05,
|
27424 |
+
"loss": 1.7991,
|
27425 |
+
"step": 3912
|
27426 |
+
},
|
27427 |
+
{
|
27428 |
+
"epoch": 0.8695555555555555,
|
27429 |
+
"grad_norm": 0.96803879737854,
|
27430 |
+
"learning_rate": 2.6146993318485523e-05,
|
27431 |
+
"loss": 1.9849,
|
27432 |
+
"step": 3913
|
27433 |
+
},
|
27434 |
+
{
|
27435 |
+
"epoch": 0.8697777777777778,
|
27436 |
+
"grad_norm": 1.0239784717559814,
|
27437 |
+
"learning_rate": 2.6102449888641428e-05,
|
27438 |
+
"loss": 1.9256,
|
27439 |
+
"step": 3914
|
27440 |
+
},
|
27441 |
+
{
|
27442 |
+
"epoch": 0.87,
|
27443 |
+
"grad_norm": 0.8905801177024841,
|
27444 |
+
"learning_rate": 2.6057906458797327e-05,
|
27445 |
+
"loss": 1.6746,
|
27446 |
+
"step": 3915
|
27447 |
+
},
|
27448 |
+
{
|
27449 |
+
"epoch": 0.8702222222222222,
|
27450 |
+
"grad_norm": 1.0133596658706665,
|
27451 |
+
"learning_rate": 2.601336302895323e-05,
|
27452 |
+
"loss": 2.1594,
|
27453 |
+
"step": 3916
|
27454 |
+
},
|
27455 |
+
{
|
27456 |
+
"epoch": 0.8704444444444445,
|
27457 |
+
"grad_norm": 0.06942284107208252,
|
27458 |
+
"learning_rate": 2.5968819599109134e-05,
|
27459 |
+
"loss": 0.015,
|
27460 |
+
"step": 3917
|
27461 |
+
},
|
27462 |
+
{
|
27463 |
+
"epoch": 0.8706666666666667,
|
27464 |
+
"grad_norm": 0.07573316246271133,
|
27465 |
+
"learning_rate": 2.5924276169265033e-05,
|
27466 |
+
"loss": 0.0148,
|
27467 |
+
"step": 3918
|
27468 |
+
},
|
27469 |
+
{
|
27470 |
+
"epoch": 0.8708888888888889,
|
27471 |
+
"grad_norm": 0.07226064801216125,
|
27472 |
+
"learning_rate": 2.5879732739420938e-05,
|
27473 |
+
"loss": 0.015,
|
27474 |
+
"step": 3919
|
27475 |
+
},
|
27476 |
+
{
|
27477 |
+
"epoch": 0.8711111111111111,
|
27478 |
+
"grad_norm": 1.0551682710647583,
|
27479 |
+
"learning_rate": 2.583518930957684e-05,
|
27480 |
+
"loss": 1.9451,
|
27481 |
+
"step": 3920
|
27482 |
+
},
|
27483 |
+
{
|
27484 |
+
"epoch": 0.8713333333333333,
|
27485 |
+
"grad_norm": 1.0881084203720093,
|
27486 |
+
"learning_rate": 2.579064587973274e-05,
|
27487 |
+
"loss": 1.9361,
|
27488 |
+
"step": 3921
|
27489 |
+
},
|
27490 |
+
{
|
27491 |
+
"epoch": 0.8715555555555555,
|
27492 |
+
"grad_norm": 1.029228925704956,
|
27493 |
+
"learning_rate": 2.5746102449888644e-05,
|
27494 |
+
"loss": 1.97,
|
27495 |
+
"step": 3922
|
27496 |
+
},
|
27497 |
+
{
|
27498 |
+
"epoch": 0.8717777777777778,
|
27499 |
+
"grad_norm": 0.9416628479957581,
|
27500 |
+
"learning_rate": 2.5701559020044546e-05,
|
27501 |
+
"loss": 1.4815,
|
27502 |
+
"step": 3923
|
27503 |
+
},
|
27504 |
+
{
|
27505 |
+
"epoch": 0.872,
|
27506 |
+
"grad_norm": 1.8949933052062988,
|
27507 |
+
"learning_rate": 2.5657015590200445e-05,
|
27508 |
+
"loss": 2.0094,
|
27509 |
+
"step": 3924
|
27510 |
+
},
|
27511 |
+
{
|
27512 |
+
"epoch": 0.8722222222222222,
|
27513 |
+
"grad_norm": 0.9487776756286621,
|
27514 |
+
"learning_rate": 2.561247216035635e-05,
|
27515 |
+
"loss": 1.8348,
|
27516 |
+
"step": 3925
|
27517 |
+
},
|
27518 |
+
{
|
27519 |
+
"epoch": 0.8724444444444445,
|
27520 |
+
"grad_norm": 0.865877091884613,
|
27521 |
+
"learning_rate": 2.5567928730512252e-05,
|
27522 |
+
"loss": 1.5909,
|
27523 |
+
"step": 3926
|
27524 |
+
},
|
27525 |
+
{
|
27526 |
+
"epoch": 0.8726666666666667,
|
27527 |
+
"grad_norm": 0.9927725195884705,
|
27528 |
+
"learning_rate": 2.552338530066815e-05,
|
27529 |
+
"loss": 1.9465,
|
27530 |
+
"step": 3927
|
27531 |
+
},
|
27532 |
+
{
|
27533 |
+
"epoch": 0.8728888888888889,
|
27534 |
+
"grad_norm": 0.9912342429161072,
|
27535 |
+
"learning_rate": 2.5478841870824056e-05,
|
27536 |
+
"loss": 1.3772,
|
27537 |
+
"step": 3928
|
27538 |
+
},
|
27539 |
+
{
|
27540 |
+
"epoch": 0.8731111111111111,
|
27541 |
+
"grad_norm": 0.9611807465553284,
|
27542 |
+
"learning_rate": 2.5434298440979958e-05,
|
27543 |
+
"loss": 1.5165,
|
27544 |
+
"step": 3929
|
27545 |
+
},
|
27546 |
+
{
|
27547 |
+
"epoch": 0.8733333333333333,
|
27548 |
+
"grad_norm": 0.9328694343566895,
|
27549 |
+
"learning_rate": 2.5389755011135856e-05,
|
27550 |
+
"loss": 1.3826,
|
27551 |
+
"step": 3930
|
27552 |
+
},
|
27553 |
+
{
|
27554 |
+
"epoch": 0.8735555555555555,
|
27555 |
+
"grad_norm": 0.9587991237640381,
|
27556 |
+
"learning_rate": 2.5345211581291762e-05,
|
27557 |
+
"loss": 1.6781,
|
27558 |
+
"step": 3931
|
27559 |
+
},
|
27560 |
+
{
|
27561 |
+
"epoch": 0.8737777777777778,
|
27562 |
+
"grad_norm": 0.06626418977975845,
|
27563 |
+
"learning_rate": 2.5300668151447664e-05,
|
27564 |
+
"loss": 0.0176,
|
27565 |
+
"step": 3932
|
27566 |
+
},
|
27567 |
+
{
|
27568 |
+
"epoch": 0.874,
|
27569 |
+
"grad_norm": 0.06570940464735031,
|
27570 |
+
"learning_rate": 2.5256124721603562e-05,
|
27571 |
+
"loss": 0.0179,
|
27572 |
+
"step": 3933
|
27573 |
+
},
|
27574 |
+
{
|
27575 |
+
"epoch": 0.8742222222222222,
|
27576 |
+
"grad_norm": 0.06534791737794876,
|
27577 |
+
"learning_rate": 2.5211581291759468e-05,
|
27578 |
+
"loss": 0.0175,
|
27579 |
+
"step": 3934
|
27580 |
+
},
|
27581 |
+
{
|
27582 |
+
"epoch": 0.8744444444444445,
|
27583 |
+
"grad_norm": 0.07688681036233902,
|
27584 |
+
"learning_rate": 2.516703786191537e-05,
|
27585 |
+
"loss": 0.0177,
|
27586 |
+
"step": 3935
|
27587 |
+
},
|
27588 |
+
{
|
27589 |
+
"epoch": 0.8746666666666667,
|
27590 |
+
"grad_norm": 0.060970455408096313,
|
27591 |
+
"learning_rate": 2.5122494432071268e-05,
|
27592 |
+
"loss": 0.0177,
|
27593 |
+
"step": 3936
|
27594 |
+
},
|
27595 |
+
{
|
27596 |
+
"epoch": 0.8748888888888889,
|
27597 |
+
"grad_norm": 1.0032833814620972,
|
27598 |
+
"learning_rate": 2.5077951002227174e-05,
|
27599 |
+
"loss": 1.7855,
|
27600 |
+
"step": 3937
|
27601 |
+
},
|
27602 |
+
{
|
27603 |
+
"epoch": 0.8751111111111111,
|
27604 |
+
"grad_norm": 0.9916431903839111,
|
27605 |
+
"learning_rate": 2.503340757238308e-05,
|
27606 |
+
"loss": 1.7102,
|
27607 |
+
"step": 3938
|
27608 |
+
},
|
27609 |
+
{
|
27610 |
+
"epoch": 0.8753333333333333,
|
27611 |
+
"grad_norm": 0.6338675618171692,
|
27612 |
+
"learning_rate": 2.4988864142538974e-05,
|
27613 |
+
"loss": 0.744,
|
27614 |
+
"step": 3939
|
27615 |
+
},
|
27616 |
+
{
|
27617 |
+
"epoch": 0.8755555555555555,
|
27618 |
+
"grad_norm": 0.732306957244873,
|
27619 |
+
"learning_rate": 2.494432071269488e-05,
|
27620 |
+
"loss": 0.7834,
|
27621 |
+
"step": 3940
|
27622 |
+
},
|
27623 |
+
{
|
27624 |
+
"epoch": 0.8757777777777778,
|
27625 |
+
"grad_norm": 0.9343276619911194,
|
27626 |
+
"learning_rate": 2.489977728285078e-05,
|
27627 |
+
"loss": 1.6322,
|
27628 |
+
"step": 3941
|
27629 |
+
},
|
27630 |
+
{
|
27631 |
+
"epoch": 0.876,
|
27632 |
+
"grad_norm": 1.0164755582809448,
|
27633 |
+
"learning_rate": 2.4855233853006683e-05,
|
27634 |
+
"loss": 1.561,
|
27635 |
+
"step": 3942
|
27636 |
+
},
|
27637 |
+
{
|
27638 |
+
"epoch": 0.8762222222222222,
|
27639 |
+
"grad_norm": 0.968427300453186,
|
27640 |
+
"learning_rate": 2.4810690423162585e-05,
|
27641 |
+
"loss": 1.2585,
|
27642 |
+
"step": 3943
|
27643 |
+
},
|
27644 |
+
{
|
27645 |
+
"epoch": 0.8764444444444445,
|
27646 |
+
"grad_norm": 0.10157324373722076,
|
27647 |
+
"learning_rate": 2.4766146993318487e-05,
|
27648 |
+
"loss": 0.025,
|
27649 |
+
"step": 3944
|
27650 |
+
},
|
27651 |
+
{
|
27652 |
+
"epoch": 0.8766666666666667,
|
27653 |
+
"grad_norm": 1.4210426807403564,
|
27654 |
+
"learning_rate": 2.472160356347439e-05,
|
27655 |
+
"loss": 1.3668,
|
27656 |
+
"step": 3945
|
27657 |
+
},
|
27658 |
+
{
|
27659 |
+
"epoch": 0.8768888888888889,
|
27660 |
+
"grad_norm": 1.2264833450317383,
|
27661 |
+
"learning_rate": 2.467706013363029e-05,
|
27662 |
+
"loss": 1.2792,
|
27663 |
+
"step": 3946
|
27664 |
+
},
|
27665 |
+
{
|
27666 |
+
"epoch": 0.8771111111111111,
|
27667 |
+
"grad_norm": 1.10524582862854,
|
27668 |
+
"learning_rate": 2.4632516703786193e-05,
|
27669 |
+
"loss": 1.3331,
|
27670 |
+
"step": 3947
|
27671 |
+
},
|
27672 |
+
{
|
27673 |
+
"epoch": 0.8773333333333333,
|
27674 |
+
"grad_norm": 0.8390571475028992,
|
27675 |
+
"learning_rate": 2.4587973273942095e-05,
|
27676 |
+
"loss": 0.6529,
|
27677 |
+
"step": 3948
|
27678 |
+
},
|
27679 |
+
{
|
27680 |
+
"epoch": 0.8775555555555555,
|
27681 |
+
"grad_norm": 1.1716080904006958,
|
27682 |
+
"learning_rate": 2.4543429844097994e-05,
|
27683 |
+
"loss": 1.2058,
|
27684 |
+
"step": 3949
|
27685 |
+
},
|
27686 |
+
{
|
27687 |
+
"epoch": 0.8777777777777778,
|
27688 |
+
"grad_norm": 1.0907095670700073,
|
27689 |
+
"learning_rate": 2.44988864142539e-05,
|
27690 |
+
"loss": 0.9754,
|
27691 |
+
"step": 3950
|
27692 |
+
},
|
27693 |
+
{
|
27694 |
+
"epoch": 0.878,
|
27695 |
+
"grad_norm": 0.5868102312088013,
|
27696 |
+
"learning_rate": 2.44543429844098e-05,
|
27697 |
+
"loss": 1.1827,
|
27698 |
+
"step": 3951
|
27699 |
+
},
|
27700 |
+
{
|
27701 |
+
"epoch": 0.8782222222222222,
|
27702 |
+
"grad_norm": 0.8409274220466614,
|
27703 |
+
"learning_rate": 2.4409799554565703e-05,
|
27704 |
+
"loss": 2.1622,
|
27705 |
+
"step": 3952
|
27706 |
+
},
|
27707 |
+
{
|
27708 |
+
"epoch": 0.8784444444444445,
|
27709 |
+
"grad_norm": 0.6952332258224487,
|
27710 |
+
"learning_rate": 2.4365256124721605e-05,
|
27711 |
+
"loss": 1.1901,
|
27712 |
+
"step": 3953
|
27713 |
+
},
|
27714 |
+
{
|
27715 |
+
"epoch": 0.8786666666666667,
|
27716 |
+
"grad_norm": 0.519538164138794,
|
27717 |
+
"learning_rate": 2.4320712694877507e-05,
|
27718 |
+
"loss": 1.2036,
|
27719 |
+
"step": 3954
|
27720 |
+
},
|
27721 |
+
{
|
27722 |
+
"epoch": 0.8788888888888889,
|
27723 |
+
"grad_norm": 0.6376737356185913,
|
27724 |
+
"learning_rate": 2.427616926503341e-05,
|
27725 |
+
"loss": 1.1078,
|
27726 |
+
"step": 3955
|
27727 |
+
},
|
27728 |
+
{
|
27729 |
+
"epoch": 0.8791111111111111,
|
27730 |
+
"grad_norm": 0.044897519052028656,
|
27731 |
+
"learning_rate": 2.423162583518931e-05,
|
27732 |
+
"loss": 0.0104,
|
27733 |
+
"step": 3956
|
27734 |
+
},
|
27735 |
+
{
|
27736 |
+
"epoch": 0.8793333333333333,
|
27737 |
+
"grad_norm": 0.9802849292755127,
|
27738 |
+
"learning_rate": 2.4187082405345213e-05,
|
27739 |
+
"loss": 1.8767,
|
27740 |
+
"step": 3957
|
27741 |
+
},
|
27742 |
+
{
|
27743 |
+
"epoch": 0.8795555555555555,
|
27744 |
+
"grad_norm": 0.08882291615009308,
|
27745 |
+
"learning_rate": 2.4142538975501115e-05,
|
27746 |
+
"loss": 0.0114,
|
27747 |
+
"step": 3958
|
27748 |
+
},
|
27749 |
+
{
|
27750 |
+
"epoch": 0.8797777777777778,
|
27751 |
+
"grad_norm": 0.9282602667808533,
|
27752 |
+
"learning_rate": 2.4097995545657017e-05,
|
27753 |
+
"loss": 1.8114,
|
27754 |
+
"step": 3959
|
27755 |
+
},
|
27756 |
+
{
|
27757 |
+
"epoch": 0.88,
|
27758 |
+
"grad_norm": 0.9374412894248962,
|
27759 |
+
"learning_rate": 2.405345211581292e-05,
|
27760 |
+
"loss": 1.9425,
|
27761 |
+
"step": 3960
|
27762 |
+
},
|
27763 |
+
{
|
27764 |
+
"epoch": 0.8802222222222222,
|
27765 |
+
"grad_norm": 1.0642507076263428,
|
27766 |
+
"learning_rate": 2.400890868596882e-05,
|
27767 |
+
"loss": 2.585,
|
27768 |
+
"step": 3961
|
27769 |
+
},
|
27770 |
+
{
|
27771 |
+
"epoch": 0.8804444444444445,
|
27772 |
+
"grad_norm": 0.8070052862167358,
|
27773 |
+
"learning_rate": 2.3964365256124723e-05,
|
27774 |
+
"loss": 1.908,
|
27775 |
+
"step": 3962
|
27776 |
+
},
|
27777 |
+
{
|
27778 |
+
"epoch": 0.8806666666666667,
|
27779 |
+
"grad_norm": 0.8729952573776245,
|
27780 |
+
"learning_rate": 2.3919821826280625e-05,
|
27781 |
+
"loss": 1.9254,
|
27782 |
+
"step": 3963
|
27783 |
+
},
|
27784 |
+
{
|
27785 |
+
"epoch": 0.8808888888888889,
|
27786 |
+
"grad_norm": 0.12168601900339127,
|
27787 |
+
"learning_rate": 2.3875278396436527e-05,
|
27788 |
+
"loss": 0.0192,
|
27789 |
+
"step": 3964
|
27790 |
+
},
|
27791 |
+
{
|
27792 |
+
"epoch": 0.8811111111111111,
|
27793 |
+
"grad_norm": 0.6381791830062866,
|
27794 |
+
"learning_rate": 2.383073496659243e-05,
|
27795 |
+
"loss": 0.9943,
|
27796 |
+
"step": 3965
|
27797 |
+
},
|
27798 |
+
{
|
27799 |
+
"epoch": 0.8813333333333333,
|
27800 |
+
"grad_norm": 1.2023353576660156,
|
27801 |
+
"learning_rate": 2.378619153674833e-05,
|
27802 |
+
"loss": 1.9608,
|
27803 |
+
"step": 3966
|
27804 |
+
},
|
27805 |
+
{
|
27806 |
+
"epoch": 0.8815555555555555,
|
27807 |
+
"grad_norm": 0.9587229490280151,
|
27808 |
+
"learning_rate": 2.3741648106904233e-05,
|
27809 |
+
"loss": 1.8919,
|
27810 |
+
"step": 3967
|
27811 |
+
},
|
27812 |
+
{
|
27813 |
+
"epoch": 0.8817777777777778,
|
27814 |
+
"grad_norm": 1.0025968551635742,
|
27815 |
+
"learning_rate": 2.3697104677060135e-05,
|
27816 |
+
"loss": 1.953,
|
27817 |
+
"step": 3968
|
27818 |
+
},
|
27819 |
+
{
|
27820 |
+
"epoch": 0.882,
|
27821 |
+
"grad_norm": 0.9075009226799011,
|
27822 |
+
"learning_rate": 2.3652561247216037e-05,
|
27823 |
+
"loss": 1.9855,
|
27824 |
+
"step": 3969
|
27825 |
+
},
|
27826 |
+
{
|
27827 |
+
"epoch": 0.8822222222222222,
|
27828 |
+
"grad_norm": 0.06732242554426193,
|
27829 |
+
"learning_rate": 2.360801781737194e-05,
|
27830 |
+
"loss": 0.0149,
|
27831 |
+
"step": 3970
|
27832 |
+
},
|
27833 |
+
{
|
27834 |
+
"epoch": 0.8824444444444445,
|
27835 |
+
"grad_norm": 0.06586241722106934,
|
27836 |
+
"learning_rate": 2.356347438752784e-05,
|
27837 |
+
"loss": 0.015,
|
27838 |
+
"step": 3971
|
27839 |
+
},
|
27840 |
+
{
|
27841 |
+
"epoch": 0.8826666666666667,
|
27842 |
+
"grad_norm": 0.06589429080486298,
|
27843 |
+
"learning_rate": 2.3518930957683743e-05,
|
27844 |
+
"loss": 0.015,
|
27845 |
+
"step": 3972
|
27846 |
+
},
|
27847 |
+
{
|
27848 |
+
"epoch": 0.8828888888888888,
|
27849 |
+
"grad_norm": 0.7274507284164429,
|
27850 |
+
"learning_rate": 2.3474387527839645e-05,
|
27851 |
+
"loss": 0.9097,
|
27852 |
+
"step": 3973
|
27853 |
+
},
|
27854 |
+
{
|
27855 |
+
"epoch": 0.8831111111111111,
|
27856 |
+
"grad_norm": 0.9447082877159119,
|
27857 |
+
"learning_rate": 2.3429844097995547e-05,
|
27858 |
+
"loss": 1.8058,
|
27859 |
+
"step": 3974
|
27860 |
+
},
|
27861 |
+
{
|
27862 |
+
"epoch": 0.8833333333333333,
|
27863 |
+
"grad_norm": 0.13066767156124115,
|
27864 |
+
"learning_rate": 2.338530066815145e-05,
|
27865 |
+
"loss": 0.0211,
|
27866 |
+
"step": 3975
|
27867 |
+
},
|
27868 |
+
{
|
27869 |
+
"epoch": 0.8835555555555555,
|
27870 |
+
"grad_norm": 0.7804778218269348,
|
27871 |
+
"learning_rate": 2.334075723830735e-05,
|
27872 |
+
"loss": 1.0144,
|
27873 |
+
"step": 3976
|
27874 |
+
},
|
27875 |
+
{
|
27876 |
+
"epoch": 0.8837777777777778,
|
27877 |
+
"grad_norm": 1.0640380382537842,
|
27878 |
+
"learning_rate": 2.3296213808463252e-05,
|
27879 |
+
"loss": 1.7064,
|
27880 |
+
"step": 3977
|
27881 |
+
},
|
27882 |
+
{
|
27883 |
+
"epoch": 0.884,
|
27884 |
+
"grad_norm": 1.0175601243972778,
|
27885 |
+
"learning_rate": 2.3251670378619154e-05,
|
27886 |
+
"loss": 1.9517,
|
27887 |
+
"step": 3978
|
27888 |
+
},
|
27889 |
+
{
|
27890 |
+
"epoch": 0.8842222222222222,
|
27891 |
+
"grad_norm": 1.1040079593658447,
|
27892 |
+
"learning_rate": 2.3207126948775056e-05,
|
27893 |
+
"loss": 1.8058,
|
27894 |
+
"step": 3979
|
27895 |
+
},
|
27896 |
+
{
|
27897 |
+
"epoch": 0.8844444444444445,
|
27898 |
+
"grad_norm": 0.728284478187561,
|
27899 |
+
"learning_rate": 2.316258351893096e-05,
|
27900 |
+
"loss": 0.7271,
|
27901 |
+
"step": 3980
|
27902 |
+
},
|
27903 |
+
{
|
27904 |
+
"epoch": 0.8846666666666667,
|
27905 |
+
"grad_norm": 0.9347479939460754,
|
27906 |
+
"learning_rate": 2.3118040089086864e-05,
|
27907 |
+
"loss": 1.4855,
|
27908 |
+
"step": 3981
|
27909 |
+
},
|
27910 |
+
{
|
27911 |
+
"epoch": 0.8848888888888888,
|
27912 |
+
"grad_norm": 0.07804767787456512,
|
27913 |
+
"learning_rate": 2.3073496659242762e-05,
|
27914 |
+
"loss": 0.0192,
|
27915 |
+
"step": 3982
|
27916 |
+
},
|
27917 |
+
{
|
27918 |
+
"epoch": 0.8851111111111111,
|
27919 |
+
"grad_norm": 0.08066914230585098,
|
27920 |
+
"learning_rate": 2.3028953229398664e-05,
|
27921 |
+
"loss": 0.0188,
|
27922 |
+
"step": 3983
|
27923 |
+
},
|
27924 |
+
{
|
27925 |
+
"epoch": 0.8853333333333333,
|
27926 |
+
"grad_norm": 0.7905464768409729,
|
27927 |
+
"learning_rate": 2.298440979955457e-05,
|
27928 |
+
"loss": 0.8492,
|
27929 |
+
"step": 3984
|
27930 |
+
},
|
27931 |
+
{
|
27932 |
+
"epoch": 0.8855555555555555,
|
27933 |
+
"grad_norm": 1.2028931379318237,
|
27934 |
+
"learning_rate": 2.2939866369710468e-05,
|
27935 |
+
"loss": 0.9801,
|
27936 |
+
"step": 3985
|
27937 |
+
},
|
27938 |
+
{
|
27939 |
+
"epoch": 0.8857777777777778,
|
27940 |
+
"grad_norm": 0.10054640471935272,
|
27941 |
+
"learning_rate": 2.289532293986637e-05,
|
27942 |
+
"loss": 0.0247,
|
27943 |
+
"step": 3986
|
27944 |
+
},
|
27945 |
+
{
|
27946 |
+
"epoch": 0.886,
|
27947 |
+
"grad_norm": 1.1364169120788574,
|
27948 |
+
"learning_rate": 2.2850779510022272e-05,
|
27949 |
+
"loss": 1.6362,
|
27950 |
+
"step": 3987
|
27951 |
+
},
|
27952 |
+
{
|
27953 |
+
"epoch": 0.8862222222222222,
|
27954 |
+
"grad_norm": 1.2560831308364868,
|
27955 |
+
"learning_rate": 2.2806236080178174e-05,
|
27956 |
+
"loss": 1.8384,
|
27957 |
+
"step": 3988
|
27958 |
+
},
|
27959 |
+
{
|
27960 |
+
"epoch": 0.8864444444444445,
|
27961 |
+
"grad_norm": 1.1757941246032715,
|
27962 |
+
"learning_rate": 2.2761692650334076e-05,
|
27963 |
+
"loss": 1.4828,
|
27964 |
+
"step": 3989
|
27965 |
+
},
|
27966 |
+
{
|
27967 |
+
"epoch": 0.8866666666666667,
|
27968 |
+
"grad_norm": 1.120353102684021,
|
27969 |
+
"learning_rate": 2.2717149220489978e-05,
|
27970 |
+
"loss": 1.3649,
|
27971 |
+
"step": 3990
|
27972 |
+
},
|
27973 |
+
{
|
27974 |
+
"epoch": 0.8868888888888888,
|
27975 |
+
"grad_norm": 1.0847200155258179,
|
27976 |
+
"learning_rate": 2.267260579064588e-05,
|
27977 |
+
"loss": 1.7217,
|
27978 |
+
"step": 3991
|
27979 |
+
},
|
27980 |
+
{
|
27981 |
+
"epoch": 0.8871111111111111,
|
27982 |
+
"grad_norm": 1.1451468467712402,
|
27983 |
+
"learning_rate": 2.2628062360801782e-05,
|
27984 |
+
"loss": 1.6961,
|
27985 |
+
"step": 3992
|
27986 |
+
},
|
27987 |
+
{
|
27988 |
+
"epoch": 0.8873333333333333,
|
27989 |
+
"grad_norm": 1.0738978385925293,
|
27990 |
+
"learning_rate": 2.2583518930957684e-05,
|
27991 |
+
"loss": 1.4236,
|
27992 |
+
"step": 3993
|
27993 |
+
},
|
27994 |
+
{
|
27995 |
+
"epoch": 0.8875555555555555,
|
27996 |
+
"grad_norm": 1.3635321855545044,
|
27997 |
+
"learning_rate": 2.253897550111359e-05,
|
27998 |
+
"loss": 1.4417,
|
27999 |
+
"step": 3994
|
28000 |
+
},
|
28001 |
+
{
|
28002 |
+
"epoch": 0.8877777777777778,
|
28003 |
+
"grad_norm": 0.19308915734291077,
|
28004 |
+
"learning_rate": 2.2494432071269488e-05,
|
28005 |
+
"loss": 0.0304,
|
28006 |
+
"step": 3995
|
28007 |
+
},
|
28008 |
+
{
|
28009 |
+
"epoch": 0.888,
|
28010 |
+
"grad_norm": 1.2087732553482056,
|
28011 |
+
"learning_rate": 2.244988864142539e-05,
|
28012 |
+
"loss": 1.4158,
|
28013 |
+
"step": 3996
|
28014 |
+
},
|
28015 |
+
{
|
28016 |
+
"epoch": 0.8882222222222222,
|
28017 |
+
"grad_norm": 1.0328425168991089,
|
28018 |
+
"learning_rate": 2.2405345211581295e-05,
|
28019 |
+
"loss": 1.014,
|
28020 |
+
"step": 3997
|
28021 |
+
},
|
28022 |
+
{
|
28023 |
+
"epoch": 0.8884444444444445,
|
28024 |
+
"grad_norm": 0.8065721988677979,
|
28025 |
+
"learning_rate": 2.2360801781737194e-05,
|
28026 |
+
"loss": 0.6411,
|
28027 |
+
"step": 3998
|
28028 |
+
},
|
28029 |
+
{
|
28030 |
+
"epoch": 0.8886666666666667,
|
28031 |
+
"grad_norm": 0.6682571172714233,
|
28032 |
+
"learning_rate": 2.2316258351893096e-05,
|
28033 |
+
"loss": 0.5168,
|
28034 |
+
"step": 3999
|
28035 |
+
},
|
28036 |
+
{
|
28037 |
+
"epoch": 0.8888888888888888,
|
28038 |
+
"grad_norm": 0.9756750464439392,
|
28039 |
+
"learning_rate": 2.2271714922049e-05,
|
28040 |
+
"loss": 0.4815,
|
28041 |
+
"step": 4000
|
28042 |
}
|
28043 |
],
|
28044 |
"logging_steps": 1,
|
|
|
28058 |
"attributes": {}
|
28059 |
}
|
28060 |
},
|
28061 |
+
"total_flos": 4.305961824831898e+16,
|
28062 |
"train_batch_size": 1,
|
28063 |
"trial_name": null,
|
28064 |
"trial_params": null
|