Training in progress, step 2752, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 891644712
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d032f96718f6d142d8d7a904b55ba96f7808abcaba152861f5404a278299f59a
|
3 |
size 891644712
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1783444794
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a4029dba8c340d5d9ec8b63bb3fe9e4e8cde3b9c6c63750b18009a4afb761dd
|
3 |
size 1783444794
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3522c8ca2140f6cef3b962bf99096e64fbc7d1c0bb35519541de70b733ef81e7
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2805ed60a71b90fa11cf913d76534a276f46b823d6cde07733a47e3c2571dca3
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8527,6 +8527,1126 @@
|
|
8527 |
"learning_rate": 5.064973629996853e-05,
|
8528 |
"loss": 0.3594,
|
8529 |
"step": 2432
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8530 |
}
|
8531 |
],
|
8532 |
"logging_steps": 2,
|
@@ -8546,7 +9666,7 @@
|
|
8546 |
"attributes": {}
|
8547 |
}
|
8548 |
},
|
8549 |
-
"total_flos":
|
8550 |
"train_batch_size": 8,
|
8551 |
"trial_name": null,
|
8552 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.526344980587909,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2752,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8527 |
"learning_rate": 5.064973629996853e-05,
|
8528 |
"loss": 0.3594,
|
8529 |
"step": 2432
|
8530 |
+
},
|
8531 |
+
{
|
8532 |
+
"epoch": 1.3499722684414865,
|
8533 |
+
"grad_norm": 0.2623654007911682,
|
8534 |
+
"learning_rate": 5.0493545964371036e-05,
|
8535 |
+
"loss": 0.3297,
|
8536 |
+
"step": 2434
|
8537 |
+
},
|
8538 |
+
{
|
8539 |
+
"epoch": 1.35108153078203,
|
8540 |
+
"grad_norm": 0.9932175874710083,
|
8541 |
+
"learning_rate": 5.03375154484238e-05,
|
8542 |
+
"loss": 0.4032,
|
8543 |
+
"step": 2436
|
8544 |
+
},
|
8545 |
+
{
|
8546 |
+
"epoch": 1.3521907931225736,
|
8547 |
+
"grad_norm": 0.2910614013671875,
|
8548 |
+
"learning_rate": 5.018164525583367e-05,
|
8549 |
+
"loss": 0.3441,
|
8550 |
+
"step": 2438
|
8551 |
+
},
|
8552 |
+
{
|
8553 |
+
"epoch": 1.3533000554631172,
|
8554 |
+
"grad_norm": 0.33267906308174133,
|
8555 |
+
"learning_rate": 5.0025935889789924e-05,
|
8556 |
+
"loss": 0.4265,
|
8557 |
+
"step": 2440
|
8558 |
+
},
|
8559 |
+
{
|
8560 |
+
"epoch": 1.3544093178036607,
|
8561 |
+
"grad_norm": 0.3080008327960968,
|
8562 |
+
"learning_rate": 4.987038785296281e-05,
|
8563 |
+
"loss": 0.4539,
|
8564 |
+
"step": 2442
|
8565 |
+
},
|
8566 |
+
{
|
8567 |
+
"epoch": 1.3555185801442042,
|
8568 |
+
"grad_norm": 0.3214891850948334,
|
8569 |
+
"learning_rate": 4.9715001647501614e-05,
|
8570 |
+
"loss": 0.3456,
|
8571 |
+
"step": 2444
|
8572 |
+
},
|
8573 |
+
{
|
8574 |
+
"epoch": 1.3566278424847478,
|
8575 |
+
"grad_norm": 0.3001169264316559,
|
8576 |
+
"learning_rate": 4.955977777503319e-05,
|
8577 |
+
"loss": 0.3448,
|
8578 |
+
"step": 2446
|
8579 |
+
},
|
8580 |
+
{
|
8581 |
+
"epoch": 1.3577371048252913,
|
8582 |
+
"grad_norm": 0.2781212031841278,
|
8583 |
+
"learning_rate": 4.940471673666043e-05,
|
8584 |
+
"loss": 0.404,
|
8585 |
+
"step": 2448
|
8586 |
+
},
|
8587 |
+
{
|
8588 |
+
"epoch": 1.3588463671658348,
|
8589 |
+
"grad_norm": 0.3622058928012848,
|
8590 |
+
"learning_rate": 4.9249819032960555e-05,
|
8591 |
+
"loss": 0.4242,
|
8592 |
+
"step": 2450
|
8593 |
+
},
|
8594 |
+
{
|
8595 |
+
"epoch": 1.3599556295063784,
|
8596 |
+
"grad_norm": 0.29672783613204956,
|
8597 |
+
"learning_rate": 4.909508516398339e-05,
|
8598 |
+
"loss": 0.4158,
|
8599 |
+
"step": 2452
|
8600 |
+
},
|
8601 |
+
{
|
8602 |
+
"epoch": 1.361064891846922,
|
8603 |
+
"grad_norm": 0.2889052927494049,
|
8604 |
+
"learning_rate": 4.8940515629249905e-05,
|
8605 |
+
"loss": 0.3568,
|
8606 |
+
"step": 2454
|
8607 |
+
},
|
8608 |
+
{
|
8609 |
+
"epoch": 1.3621741541874655,
|
8610 |
+
"grad_norm": 0.275849312543869,
|
8611 |
+
"learning_rate": 4.878611092775065e-05,
|
8612 |
+
"loss": 0.4308,
|
8613 |
+
"step": 2456
|
8614 |
+
},
|
8615 |
+
{
|
8616 |
+
"epoch": 1.363283416528009,
|
8617 |
+
"grad_norm": 0.32597821950912476,
|
8618 |
+
"learning_rate": 4.863187155794393e-05,
|
8619 |
+
"loss": 0.4067,
|
8620 |
+
"step": 2458
|
8621 |
+
},
|
8622 |
+
{
|
8623 |
+
"epoch": 1.3643926788685525,
|
8624 |
+
"grad_norm": 0.3108210861682892,
|
8625 |
+
"learning_rate": 4.847779801775436e-05,
|
8626 |
+
"loss": 0.4421,
|
8627 |
+
"step": 2460
|
8628 |
+
},
|
8629 |
+
{
|
8630 |
+
"epoch": 1.365501941209096,
|
8631 |
+
"grad_norm": 0.32195425033569336,
|
8632 |
+
"learning_rate": 4.832389080457118e-05,
|
8633 |
+
"loss": 0.4182,
|
8634 |
+
"step": 2462
|
8635 |
+
},
|
8636 |
+
{
|
8637 |
+
"epoch": 1.3666112035496396,
|
8638 |
+
"grad_norm": 0.2620703876018524,
|
8639 |
+
"learning_rate": 4.817015041524676e-05,
|
8640 |
+
"loss": 0.3928,
|
8641 |
+
"step": 2464
|
8642 |
+
},
|
8643 |
+
{
|
8644 |
+
"epoch": 1.3677204658901831,
|
8645 |
+
"grad_norm": 0.24564792215824127,
|
8646 |
+
"learning_rate": 4.801657734609492e-05,
|
8647 |
+
"loss": 0.2702,
|
8648 |
+
"step": 2466
|
8649 |
+
},
|
8650 |
+
{
|
8651 |
+
"epoch": 1.3688297282307267,
|
8652 |
+
"grad_norm": 0.2738599479198456,
|
8653 |
+
"learning_rate": 4.786317209288923e-05,
|
8654 |
+
"loss": 0.3073,
|
8655 |
+
"step": 2468
|
8656 |
+
},
|
8657 |
+
{
|
8658 |
+
"epoch": 1.3699389905712702,
|
8659 |
+
"grad_norm": 0.4185931086540222,
|
8660 |
+
"learning_rate": 4.7709935150861526e-05,
|
8661 |
+
"loss": 0.4577,
|
8662 |
+
"step": 2470
|
8663 |
+
},
|
8664 |
+
{
|
8665 |
+
"epoch": 1.3710482529118138,
|
8666 |
+
"grad_norm": 0.29279306530952454,
|
8667 |
+
"learning_rate": 4.7556867014700435e-05,
|
8668 |
+
"loss": 0.3537,
|
8669 |
+
"step": 2472
|
8670 |
+
},
|
8671 |
+
{
|
8672 |
+
"epoch": 1.3721575152523573,
|
8673 |
+
"grad_norm": 0.31645312905311584,
|
8674 |
+
"learning_rate": 4.740396817854945e-05,
|
8675 |
+
"loss": 0.3374,
|
8676 |
+
"step": 2474
|
8677 |
+
},
|
8678 |
+
{
|
8679 |
+
"epoch": 1.3732667775929008,
|
8680 |
+
"grad_norm": 0.27415481209754944,
|
8681 |
+
"learning_rate": 4.7251239136005586e-05,
|
8682 |
+
"loss": 0.3386,
|
8683 |
+
"step": 2476
|
8684 |
+
},
|
8685 |
+
{
|
8686 |
+
"epoch": 1.3743760399334444,
|
8687 |
+
"grad_norm": 0.3509266674518585,
|
8688 |
+
"learning_rate": 4.709868038011777e-05,
|
8689 |
+
"loss": 0.411,
|
8690 |
+
"step": 2478
|
8691 |
+
},
|
8692 |
+
{
|
8693 |
+
"epoch": 1.375485302273988,
|
8694 |
+
"grad_norm": 0.31094875931739807,
|
8695 |
+
"learning_rate": 4.694629240338517e-05,
|
8696 |
+
"loss": 0.3523,
|
8697 |
+
"step": 2480
|
8698 |
+
},
|
8699 |
+
{
|
8700 |
+
"epoch": 1.3765945646145314,
|
8701 |
+
"grad_norm": 0.37327978014945984,
|
8702 |
+
"learning_rate": 4.6794075697755626e-05,
|
8703 |
+
"loss": 0.3622,
|
8704 |
+
"step": 2482
|
8705 |
+
},
|
8706 |
+
{
|
8707 |
+
"epoch": 1.377703826955075,
|
8708 |
+
"grad_norm": 0.2905229330062866,
|
8709 |
+
"learning_rate": 4.6642030754624e-05,
|
8710 |
+
"loss": 0.3576,
|
8711 |
+
"step": 2484
|
8712 |
+
},
|
8713 |
+
{
|
8714 |
+
"epoch": 1.3788130892956185,
|
8715 |
+
"grad_norm": 0.2602270245552063,
|
8716 |
+
"learning_rate": 4.6490158064830834e-05,
|
8717 |
+
"loss": 0.3093,
|
8718 |
+
"step": 2486
|
8719 |
+
},
|
8720 |
+
{
|
8721 |
+
"epoch": 1.379922351636162,
|
8722 |
+
"grad_norm": 0.23383758962154388,
|
8723 |
+
"learning_rate": 4.6338458118660434e-05,
|
8724 |
+
"loss": 0.4123,
|
8725 |
+
"step": 2488
|
8726 |
+
},
|
8727 |
+
{
|
8728 |
+
"epoch": 1.3810316139767056,
|
8729 |
+
"grad_norm": 0.2982844114303589,
|
8730 |
+
"learning_rate": 4.618693140583946e-05,
|
8731 |
+
"loss": 0.3948,
|
8732 |
+
"step": 2490
|
8733 |
+
},
|
8734 |
+
{
|
8735 |
+
"epoch": 1.3821408763172491,
|
8736 |
+
"grad_norm": 0.2675088047981262,
|
8737 |
+
"learning_rate": 4.603557841553542e-05,
|
8738 |
+
"loss": 0.4248,
|
8739 |
+
"step": 2492
|
8740 |
+
},
|
8741 |
+
{
|
8742 |
+
"epoch": 1.3832501386577927,
|
8743 |
+
"grad_norm": 0.28989455103874207,
|
8744 |
+
"learning_rate": 4.588439963635498e-05,
|
8745 |
+
"loss": 0.3647,
|
8746 |
+
"step": 2494
|
8747 |
+
},
|
8748 |
+
{
|
8749 |
+
"epoch": 1.3843594009983362,
|
8750 |
+
"grad_norm": 0.2427791953086853,
|
8751 |
+
"learning_rate": 4.573339555634235e-05,
|
8752 |
+
"loss": 0.2877,
|
8753 |
+
"step": 2496
|
8754 |
+
},
|
8755 |
+
{
|
8756 |
+
"epoch": 1.3854686633388797,
|
8757 |
+
"grad_norm": 0.36453622579574585,
|
8758 |
+
"learning_rate": 4.558256666297773e-05,
|
8759 |
+
"loss": 0.4094,
|
8760 |
+
"step": 2498
|
8761 |
+
},
|
8762 |
+
{
|
8763 |
+
"epoch": 1.3865779256794233,
|
8764 |
+
"grad_norm": 0.35472291707992554,
|
8765 |
+
"learning_rate": 4.543191344317594e-05,
|
8766 |
+
"loss": 0.4244,
|
8767 |
+
"step": 2500
|
8768 |
+
},
|
8769 |
+
{
|
8770 |
+
"epoch": 1.3876871880199668,
|
8771 |
+
"grad_norm": 0.33136647939682007,
|
8772 |
+
"learning_rate": 4.5281436383284525e-05,
|
8773 |
+
"loss": 0.3849,
|
8774 |
+
"step": 2502
|
8775 |
+
},
|
8776 |
+
{
|
8777 |
+
"epoch": 1.3887964503605104,
|
8778 |
+
"grad_norm": 0.33231866359710693,
|
8779 |
+
"learning_rate": 4.513113596908235e-05,
|
8780 |
+
"loss": 0.3873,
|
8781 |
+
"step": 2504
|
8782 |
+
},
|
8783 |
+
{
|
8784 |
+
"epoch": 1.389905712701054,
|
8785 |
+
"grad_norm": 0.22978711128234863,
|
8786 |
+
"learning_rate": 4.49810126857781e-05,
|
8787 |
+
"loss": 0.2924,
|
8788 |
+
"step": 2506
|
8789 |
+
},
|
8790 |
+
{
|
8791 |
+
"epoch": 1.3910149750415974,
|
8792 |
+
"grad_norm": 0.2566686272621155,
|
8793 |
+
"learning_rate": 4.483106701800864e-05,
|
8794 |
+
"loss": 0.2569,
|
8795 |
+
"step": 2508
|
8796 |
+
},
|
8797 |
+
{
|
8798 |
+
"epoch": 1.392124237382141,
|
8799 |
+
"grad_norm": 0.29530206322669983,
|
8800 |
+
"learning_rate": 4.468129944983738e-05,
|
8801 |
+
"loss": 0.4219,
|
8802 |
+
"step": 2510
|
8803 |
+
},
|
8804 |
+
{
|
8805 |
+
"epoch": 1.3932334997226845,
|
8806 |
+
"grad_norm": 0.3155916929244995,
|
8807 |
+
"learning_rate": 4.453171046475274e-05,
|
8808 |
+
"loss": 0.4482,
|
8809 |
+
"step": 2512
|
8810 |
+
},
|
8811 |
+
{
|
8812 |
+
"epoch": 1.394342762063228,
|
8813 |
+
"grad_norm": 0.36319971084594727,
|
8814 |
+
"learning_rate": 4.438230054566678e-05,
|
8815 |
+
"loss": 0.4071,
|
8816 |
+
"step": 2514
|
8817 |
+
},
|
8818 |
+
{
|
8819 |
+
"epoch": 1.3954520244037716,
|
8820 |
+
"grad_norm": 0.3203721344470978,
|
8821 |
+
"learning_rate": 4.423307017491336e-05,
|
8822 |
+
"loss": 0.3511,
|
8823 |
+
"step": 2516
|
8824 |
+
},
|
8825 |
+
{
|
8826 |
+
"epoch": 1.3965612867443151,
|
8827 |
+
"grad_norm": 0.297139048576355,
|
8828 |
+
"learning_rate": 4.4084019834246746e-05,
|
8829 |
+
"loss": 0.3582,
|
8830 |
+
"step": 2518
|
8831 |
+
},
|
8832 |
+
{
|
8833 |
+
"epoch": 1.3976705490848587,
|
8834 |
+
"grad_norm": 0.5080666542053223,
|
8835 |
+
"learning_rate": 4.3935150004839996e-05,
|
8836 |
+
"loss": 0.5732,
|
8837 |
+
"step": 2520
|
8838 |
+
},
|
8839 |
+
{
|
8840 |
+
"epoch": 1.3987798114254022,
|
8841 |
+
"grad_norm": 0.3271535336971283,
|
8842 |
+
"learning_rate": 4.3786461167283496e-05,
|
8843 |
+
"loss": 0.3288,
|
8844 |
+
"step": 2522
|
8845 |
+
},
|
8846 |
+
{
|
8847 |
+
"epoch": 1.3998890737659457,
|
8848 |
+
"grad_norm": 0.3107675015926361,
|
8849 |
+
"learning_rate": 4.3637953801583344e-05,
|
8850 |
+
"loss": 0.3273,
|
8851 |
+
"step": 2524
|
8852 |
+
},
|
8853 |
+
{
|
8854 |
+
"epoch": 1.4009983361064893,
|
8855 |
+
"grad_norm": 0.25614145398139954,
|
8856 |
+
"learning_rate": 4.3489628387159706e-05,
|
8857 |
+
"loss": 0.3979,
|
8858 |
+
"step": 2526
|
8859 |
+
},
|
8860 |
+
{
|
8861 |
+
"epoch": 1.4021075984470328,
|
8862 |
+
"grad_norm": 0.3792392313480377,
|
8863 |
+
"learning_rate": 4.334148540284542e-05,
|
8864 |
+
"loss": 0.3923,
|
8865 |
+
"step": 2528
|
8866 |
+
},
|
8867 |
+
{
|
8868 |
+
"epoch": 1.4032168607875763,
|
8869 |
+
"grad_norm": 0.440168172121048,
|
8870 |
+
"learning_rate": 4.3193525326884435e-05,
|
8871 |
+
"loss": 0.395,
|
8872 |
+
"step": 2530
|
8873 |
+
},
|
8874 |
+
{
|
8875 |
+
"epoch": 1.4043261231281199,
|
8876 |
+
"grad_norm": 0.3226903975009918,
|
8877 |
+
"learning_rate": 4.304574863693015e-05,
|
8878 |
+
"loss": 0.4261,
|
8879 |
+
"step": 2532
|
8880 |
+
},
|
8881 |
+
{
|
8882 |
+
"epoch": 1.4054353854686634,
|
8883 |
+
"grad_norm": 0.2869783937931061,
|
8884 |
+
"learning_rate": 4.289815581004396e-05,
|
8885 |
+
"loss": 0.3517,
|
8886 |
+
"step": 2534
|
8887 |
+
},
|
8888 |
+
{
|
8889 |
+
"epoch": 1.406544647809207,
|
8890 |
+
"grad_norm": 0.2879214584827423,
|
8891 |
+
"learning_rate": 4.275074732269373e-05,
|
8892 |
+
"loss": 0.4079,
|
8893 |
+
"step": 2536
|
8894 |
+
},
|
8895 |
+
{
|
8896 |
+
"epoch": 1.4076539101497505,
|
8897 |
+
"grad_norm": 0.38455456495285034,
|
8898 |
+
"learning_rate": 4.260352365075226e-05,
|
8899 |
+
"loss": 0.3773,
|
8900 |
+
"step": 2538
|
8901 |
+
},
|
8902 |
+
{
|
8903 |
+
"epoch": 1.408763172490294,
|
8904 |
+
"grad_norm": 0.33934473991394043,
|
8905 |
+
"learning_rate": 4.245648526949567e-05,
|
8906 |
+
"loss": 0.326,
|
8907 |
+
"step": 2540
|
8908 |
+
},
|
8909 |
+
{
|
8910 |
+
"epoch": 1.4098724348308376,
|
8911 |
+
"grad_norm": 0.35121288895606995,
|
8912 |
+
"learning_rate": 4.230963265360185e-05,
|
8913 |
+
"loss": 0.3952,
|
8914 |
+
"step": 2542
|
8915 |
+
},
|
8916 |
+
{
|
8917 |
+
"epoch": 1.410981697171381,
|
8918 |
+
"grad_norm": 0.3085687756538391,
|
8919 |
+
"learning_rate": 4.216296627714915e-05,
|
8920 |
+
"loss": 0.3655,
|
8921 |
+
"step": 2544
|
8922 |
+
},
|
8923 |
+
{
|
8924 |
+
"epoch": 1.4120909595119246,
|
8925 |
+
"grad_norm": 0.320854127407074,
|
8926 |
+
"learning_rate": 4.201648661361457e-05,
|
8927 |
+
"loss": 0.426,
|
8928 |
+
"step": 2546
|
8929 |
+
},
|
8930 |
+
{
|
8931 |
+
"epoch": 1.4132002218524682,
|
8932 |
+
"grad_norm": 0.24805665016174316,
|
8933 |
+
"learning_rate": 4.187019413587234e-05,
|
8934 |
+
"loss": 0.2548,
|
8935 |
+
"step": 2548
|
8936 |
+
},
|
8937 |
+
{
|
8938 |
+
"epoch": 1.4143094841930117,
|
8939 |
+
"grad_norm": 0.3249068260192871,
|
8940 |
+
"learning_rate": 4.172408931619249e-05,
|
8941 |
+
"loss": 0.3629,
|
8942 |
+
"step": 2550
|
8943 |
+
},
|
8944 |
+
{
|
8945 |
+
"epoch": 1.4154187465335553,
|
8946 |
+
"grad_norm": 0.3463628888130188,
|
8947 |
+
"learning_rate": 4.1578172626239245e-05,
|
8948 |
+
"loss": 0.4273,
|
8949 |
+
"step": 2552
|
8950 |
+
},
|
8951 |
+
{
|
8952 |
+
"epoch": 1.4165280088740988,
|
8953 |
+
"grad_norm": 0.3546282649040222,
|
8954 |
+
"learning_rate": 4.143244453706941e-05,
|
8955 |
+
"loss": 0.4464,
|
8956 |
+
"step": 2554
|
8957 |
+
},
|
8958 |
+
{
|
8959 |
+
"epoch": 1.4176372712146423,
|
8960 |
+
"grad_norm": 0.2811850607395172,
|
8961 |
+
"learning_rate": 4.1286905519130955e-05,
|
8962 |
+
"loss": 0.3781,
|
8963 |
+
"step": 2556
|
8964 |
+
},
|
8965 |
+
{
|
8966 |
+
"epoch": 1.4187465335551859,
|
8967 |
+
"grad_norm": 0.2609008252620697,
|
8968 |
+
"learning_rate": 4.114155604226159e-05,
|
8969 |
+
"loss": 0.3267,
|
8970 |
+
"step": 2558
|
8971 |
+
},
|
8972 |
+
{
|
8973 |
+
"epoch": 1.4198557958957294,
|
8974 |
+
"grad_norm": 0.35078784823417664,
|
8975 |
+
"learning_rate": 4.0996396575687e-05,
|
8976 |
+
"loss": 0.4347,
|
8977 |
+
"step": 2560
|
8978 |
+
},
|
8979 |
+
{
|
8980 |
+
"epoch": 1.420965058236273,
|
8981 |
+
"grad_norm": 0.26544249057769775,
|
8982 |
+
"learning_rate": 4.085142758801953e-05,
|
8983 |
+
"loss": 0.341,
|
8984 |
+
"step": 2562
|
8985 |
+
},
|
8986 |
+
{
|
8987 |
+
"epoch": 1.4220743205768165,
|
8988 |
+
"grad_norm": 0.25733861327171326,
|
8989 |
+
"learning_rate": 4.070664954725657e-05,
|
8990 |
+
"loss": 0.3998,
|
8991 |
+
"step": 2564
|
8992 |
+
},
|
8993 |
+
{
|
8994 |
+
"epoch": 1.42318358291736,
|
8995 |
+
"grad_norm": 0.21705186367034912,
|
8996 |
+
"learning_rate": 4.056206292077915e-05,
|
8997 |
+
"loss": 0.3024,
|
8998 |
+
"step": 2566
|
8999 |
+
},
|
9000 |
+
{
|
9001 |
+
"epoch": 1.4242928452579036,
|
9002 |
+
"grad_norm": 0.2453695386648178,
|
9003 |
+
"learning_rate": 4.0417668175350365e-05,
|
9004 |
+
"loss": 0.3176,
|
9005 |
+
"step": 2568
|
9006 |
+
},
|
9007 |
+
{
|
9008 |
+
"epoch": 1.425402107598447,
|
9009 |
+
"grad_norm": 0.31507784128189087,
|
9010 |
+
"learning_rate": 4.0273465777113804e-05,
|
9011 |
+
"loss": 0.4491,
|
9012 |
+
"step": 2570
|
9013 |
+
},
|
9014 |
+
{
|
9015 |
+
"epoch": 1.4265113699389906,
|
9016 |
+
"grad_norm": 0.2840031087398529,
|
9017 |
+
"learning_rate": 4.0129456191592106e-05,
|
9018 |
+
"loss": 0.3364,
|
9019 |
+
"step": 2572
|
9020 |
+
},
|
9021 |
+
{
|
9022 |
+
"epoch": 1.4276206322795342,
|
9023 |
+
"grad_norm": 0.2707609236240387,
|
9024 |
+
"learning_rate": 3.9985639883685566e-05,
|
9025 |
+
"loss": 0.3686,
|
9026 |
+
"step": 2574
|
9027 |
+
},
|
9028 |
+
{
|
9029 |
+
"epoch": 1.4287298946200777,
|
9030 |
+
"grad_norm": 0.30946969985961914,
|
9031 |
+
"learning_rate": 3.984201731767042e-05,
|
9032 |
+
"loss": 0.3125,
|
9033 |
+
"step": 2576
|
9034 |
+
},
|
9035 |
+
{
|
9036 |
+
"epoch": 1.4298391569606212,
|
9037 |
+
"grad_norm": 0.3130171597003937,
|
9038 |
+
"learning_rate": 3.9698588957197456e-05,
|
9039 |
+
"loss": 0.3413,
|
9040 |
+
"step": 2578
|
9041 |
+
},
|
9042 |
+
{
|
9043 |
+
"epoch": 1.4309484193011648,
|
9044 |
+
"grad_norm": 0.3146243989467621,
|
9045 |
+
"learning_rate": 3.9555355265290605e-05,
|
9046 |
+
"loss": 0.3603,
|
9047 |
+
"step": 2580
|
9048 |
+
},
|
9049 |
+
{
|
9050 |
+
"epoch": 1.4320576816417083,
|
9051 |
+
"grad_norm": 0.3555578887462616,
|
9052 |
+
"learning_rate": 3.9412316704345307e-05,
|
9053 |
+
"loss": 0.4793,
|
9054 |
+
"step": 2582
|
9055 |
+
},
|
9056 |
+
{
|
9057 |
+
"epoch": 1.4331669439822519,
|
9058 |
+
"grad_norm": 0.26377207040786743,
|
9059 |
+
"learning_rate": 3.9269473736127075e-05,
|
9060 |
+
"loss": 0.421,
|
9061 |
+
"step": 2584
|
9062 |
+
},
|
9063 |
+
{
|
9064 |
+
"epoch": 1.4342762063227954,
|
9065 |
+
"grad_norm": 0.3223215937614441,
|
9066 |
+
"learning_rate": 3.9126826821769916e-05,
|
9067 |
+
"loss": 0.4443,
|
9068 |
+
"step": 2586
|
9069 |
+
},
|
9070 |
+
{
|
9071 |
+
"epoch": 1.435385468663339,
|
9072 |
+
"grad_norm": 0.36294499039649963,
|
9073 |
+
"learning_rate": 3.898437642177508e-05,
|
9074 |
+
"loss": 0.4517,
|
9075 |
+
"step": 2588
|
9076 |
+
},
|
9077 |
+
{
|
9078 |
+
"epoch": 1.4364947310038825,
|
9079 |
+
"grad_norm": 0.3447701036930084,
|
9080 |
+
"learning_rate": 3.8842122996009324e-05,
|
9081 |
+
"loss": 0.4231,
|
9082 |
+
"step": 2590
|
9083 |
+
},
|
9084 |
+
{
|
9085 |
+
"epoch": 1.437603993344426,
|
9086 |
+
"grad_norm": 0.27772748470306396,
|
9087 |
+
"learning_rate": 3.8700067003703474e-05,
|
9088 |
+
"loss": 0.4101,
|
9089 |
+
"step": 2592
|
9090 |
+
},
|
9091 |
+
{
|
9092 |
+
"epoch": 1.4387132556849695,
|
9093 |
+
"grad_norm": 0.28603261709213257,
|
9094 |
+
"learning_rate": 3.8558208903451096e-05,
|
9095 |
+
"loss": 0.3622,
|
9096 |
+
"step": 2594
|
9097 |
+
},
|
9098 |
+
{
|
9099 |
+
"epoch": 1.439822518025513,
|
9100 |
+
"grad_norm": 0.3110034763813019,
|
9101 |
+
"learning_rate": 3.84165491532069e-05,
|
9102 |
+
"loss": 0.3487,
|
9103 |
+
"step": 2596
|
9104 |
+
},
|
9105 |
+
{
|
9106 |
+
"epoch": 1.4409317803660566,
|
9107 |
+
"grad_norm": 0.41178080439567566,
|
9108 |
+
"learning_rate": 3.8275088210285195e-05,
|
9109 |
+
"loss": 0.4317,
|
9110 |
+
"step": 2598
|
9111 |
+
},
|
9112 |
+
{
|
9113 |
+
"epoch": 1.4420410427066002,
|
9114 |
+
"grad_norm": 0.35106709599494934,
|
9115 |
+
"learning_rate": 3.813382653135849e-05,
|
9116 |
+
"loss": 0.3458,
|
9117 |
+
"step": 2600
|
9118 |
+
},
|
9119 |
+
{
|
9120 |
+
"epoch": 1.4431503050471437,
|
9121 |
+
"grad_norm": 0.28827354311943054,
|
9122 |
+
"learning_rate": 3.799276457245612e-05,
|
9123 |
+
"loss": 0.4995,
|
9124 |
+
"step": 2602
|
9125 |
+
},
|
9126 |
+
{
|
9127 |
+
"epoch": 1.4442595673876872,
|
9128 |
+
"grad_norm": 0.2418784201145172,
|
9129 |
+
"learning_rate": 3.785190278896258e-05,
|
9130 |
+
"loss": 0.3832,
|
9131 |
+
"step": 2604
|
9132 |
+
},
|
9133 |
+
{
|
9134 |
+
"epoch": 1.4453688297282308,
|
9135 |
+
"grad_norm": 0.2937265932559967,
|
9136 |
+
"learning_rate": 3.771124163561614e-05,
|
9137 |
+
"loss": 0.4129,
|
9138 |
+
"step": 2606
|
9139 |
+
},
|
9140 |
+
{
|
9141 |
+
"epoch": 1.4464780920687743,
|
9142 |
+
"grad_norm": 0.31382623314857483,
|
9143 |
+
"learning_rate": 3.757078156650745e-05,
|
9144 |
+
"loss": 0.3812,
|
9145 |
+
"step": 2608
|
9146 |
+
},
|
9147 |
+
{
|
9148 |
+
"epoch": 1.4475873544093179,
|
9149 |
+
"grad_norm": 0.4727577269077301,
|
9150 |
+
"learning_rate": 3.7430523035078016e-05,
|
9151 |
+
"loss": 0.331,
|
9152 |
+
"step": 2610
|
9153 |
+
},
|
9154 |
+
{
|
9155 |
+
"epoch": 1.4486966167498614,
|
9156 |
+
"grad_norm": 0.2999882102012634,
|
9157 |
+
"learning_rate": 3.729046649411865e-05,
|
9158 |
+
"loss": 0.442,
|
9159 |
+
"step": 2612
|
9160 |
+
},
|
9161 |
+
{
|
9162 |
+
"epoch": 1.449805879090405,
|
9163 |
+
"grad_norm": 0.23084048926830292,
|
9164 |
+
"learning_rate": 3.715061239576809e-05,
|
9165 |
+
"loss": 0.4015,
|
9166 |
+
"step": 2614
|
9167 |
+
},
|
9168 |
+
{
|
9169 |
+
"epoch": 1.4509151414309485,
|
9170 |
+
"grad_norm": 0.303265780210495,
|
9171 |
+
"learning_rate": 3.701096119151165e-05,
|
9172 |
+
"loss": 0.3631,
|
9173 |
+
"step": 2616
|
9174 |
+
},
|
9175 |
+
{
|
9176 |
+
"epoch": 1.452024403771492,
|
9177 |
+
"grad_norm": 0.27064260840415955,
|
9178 |
+
"learning_rate": 3.687151333217952e-05,
|
9179 |
+
"loss": 0.3674,
|
9180 |
+
"step": 2618
|
9181 |
+
},
|
9182 |
+
{
|
9183 |
+
"epoch": 1.4531336661120355,
|
9184 |
+
"grad_norm": 0.3085685074329376,
|
9185 |
+
"learning_rate": 3.6732269267945506e-05,
|
9186 |
+
"loss": 0.3967,
|
9187 |
+
"step": 2620
|
9188 |
+
},
|
9189 |
+
{
|
9190 |
+
"epoch": 1.454242928452579,
|
9191 |
+
"grad_norm": 0.30328476428985596,
|
9192 |
+
"learning_rate": 3.659322944832545e-05,
|
9193 |
+
"loss": 0.3723,
|
9194 |
+
"step": 2622
|
9195 |
+
},
|
9196 |
+
{
|
9197 |
+
"epoch": 1.4553521907931226,
|
9198 |
+
"grad_norm": 0.4076519012451172,
|
9199 |
+
"learning_rate": 3.645439432217593e-05,
|
9200 |
+
"loss": 0.3435,
|
9201 |
+
"step": 2624
|
9202 |
+
},
|
9203 |
+
{
|
9204 |
+
"epoch": 1.4564614531336662,
|
9205 |
+
"grad_norm": 0.32473883032798767,
|
9206 |
+
"learning_rate": 3.63157643376927e-05,
|
9207 |
+
"loss": 0.3348,
|
9208 |
+
"step": 2626
|
9209 |
+
},
|
9210 |
+
{
|
9211 |
+
"epoch": 1.4575707154742097,
|
9212 |
+
"grad_norm": 0.37467846274375916,
|
9213 |
+
"learning_rate": 3.617733994240921e-05,
|
9214 |
+
"loss": 0.4531,
|
9215 |
+
"step": 2628
|
9216 |
+
},
|
9217 |
+
{
|
9218 |
+
"epoch": 1.4586799778147532,
|
9219 |
+
"grad_norm": 0.25045719742774963,
|
9220 |
+
"learning_rate": 3.6039121583195224e-05,
|
9221 |
+
"loss": 0.3328,
|
9222 |
+
"step": 2630
|
9223 |
+
},
|
9224 |
+
{
|
9225 |
+
"epoch": 1.4597892401552968,
|
9226 |
+
"grad_norm": 0.28525876998901367,
|
9227 |
+
"learning_rate": 3.590110970625543e-05,
|
9228 |
+
"loss": 0.2818,
|
9229 |
+
"step": 2632
|
9230 |
+
},
|
9231 |
+
{
|
9232 |
+
"epoch": 1.4608985024958403,
|
9233 |
+
"grad_norm": 0.3426540791988373,
|
9234 |
+
"learning_rate": 3.576330475712788e-05,
|
9235 |
+
"loss": 0.4207,
|
9236 |
+
"step": 2634
|
9237 |
+
},
|
9238 |
+
{
|
9239 |
+
"epoch": 1.4620077648363838,
|
9240 |
+
"grad_norm": 0.2515394985675812,
|
9241 |
+
"learning_rate": 3.562570718068259e-05,
|
9242 |
+
"loss": 0.2807,
|
9243 |
+
"step": 2636
|
9244 |
+
},
|
9245 |
+
{
|
9246 |
+
"epoch": 1.4631170271769274,
|
9247 |
+
"grad_norm": 0.33639976382255554,
|
9248 |
+
"learning_rate": 3.5488317421120174e-05,
|
9249 |
+
"loss": 0.4283,
|
9250 |
+
"step": 2638
|
9251 |
+
},
|
9252 |
+
{
|
9253 |
+
"epoch": 1.464226289517471,
|
9254 |
+
"grad_norm": 0.3443042039871216,
|
9255 |
+
"learning_rate": 3.535113592197041e-05,
|
9256 |
+
"loss": 0.4462,
|
9257 |
+
"step": 2640
|
9258 |
+
},
|
9259 |
+
{
|
9260 |
+
"epoch": 1.4653355518580145,
|
9261 |
+
"grad_norm": 0.26121675968170166,
|
9262 |
+
"learning_rate": 3.521416312609064e-05,
|
9263 |
+
"loss": 0.3072,
|
9264 |
+
"step": 2642
|
9265 |
+
},
|
9266 |
+
{
|
9267 |
+
"epoch": 1.466444814198558,
|
9268 |
+
"grad_norm": 0.2611722946166992,
|
9269 |
+
"learning_rate": 3.507739947566447e-05,
|
9270 |
+
"loss": 0.3332,
|
9271 |
+
"step": 2644
|
9272 |
+
},
|
9273 |
+
{
|
9274 |
+
"epoch": 1.4675540765391015,
|
9275 |
+
"grad_norm": 0.28175342082977295,
|
9276 |
+
"learning_rate": 3.4940845412200465e-05,
|
9277 |
+
"loss": 0.3816,
|
9278 |
+
"step": 2646
|
9279 |
+
},
|
9280 |
+
{
|
9281 |
+
"epoch": 1.468663338879645,
|
9282 |
+
"grad_norm": 0.26533833146095276,
|
9283 |
+
"learning_rate": 3.480450137653043e-05,
|
9284 |
+
"loss": 0.3493,
|
9285 |
+
"step": 2648
|
9286 |
+
},
|
9287 |
+
{
|
9288 |
+
"epoch": 1.4697726012201886,
|
9289 |
+
"grad_norm": 0.28460225462913513,
|
9290 |
+
"learning_rate": 3.466836780880818e-05,
|
9291 |
+
"loss": 0.351,
|
9292 |
+
"step": 2650
|
9293 |
+
},
|
9294 |
+
{
|
9295 |
+
"epoch": 1.4708818635607321,
|
9296 |
+
"grad_norm": 0.2972380518913269,
|
9297 |
+
"learning_rate": 3.4532445148508164e-05,
|
9298 |
+
"loss": 0.3784,
|
9299 |
+
"step": 2652
|
9300 |
+
},
|
9301 |
+
{
|
9302 |
+
"epoch": 1.4719911259012757,
|
9303 |
+
"grad_norm": 0.23668786883354187,
|
9304 |
+
"learning_rate": 3.439673383442393e-05,
|
9305 |
+
"loss": 0.289,
|
9306 |
+
"step": 2654
|
9307 |
+
},
|
9308 |
+
{
|
9309 |
+
"epoch": 1.4731003882418192,
|
9310 |
+
"grad_norm": 0.3169480264186859,
|
9311 |
+
"learning_rate": 3.426123430466672e-05,
|
9312 |
+
"loss": 0.4497,
|
9313 |
+
"step": 2656
|
9314 |
+
},
|
9315 |
+
{
|
9316 |
+
"epoch": 1.4742096505823628,
|
9317 |
+
"grad_norm": 0.33888205885887146,
|
9318 |
+
"learning_rate": 3.412594699666406e-05,
|
9319 |
+
"loss": 0.3623,
|
9320 |
+
"step": 2658
|
9321 |
+
},
|
9322 |
+
{
|
9323 |
+
"epoch": 1.4753189129229063,
|
9324 |
+
"grad_norm": 0.32237014174461365,
|
9325 |
+
"learning_rate": 3.399087234715846e-05,
|
9326 |
+
"loss": 0.3569,
|
9327 |
+
"step": 2660
|
9328 |
+
},
|
9329 |
+
{
|
9330 |
+
"epoch": 1.4764281752634498,
|
9331 |
+
"grad_norm": 0.29429054260253906,
|
9332 |
+
"learning_rate": 3.3856010792205836e-05,
|
9333 |
+
"loss": 0.3411,
|
9334 |
+
"step": 2662
|
9335 |
+
},
|
9336 |
+
{
|
9337 |
+
"epoch": 1.4775374376039934,
|
9338 |
+
"grad_norm": 0.2668110728263855,
|
9339 |
+
"learning_rate": 3.372136276717417e-05,
|
9340 |
+
"loss": 0.4503,
|
9341 |
+
"step": 2664
|
9342 |
+
},
|
9343 |
+
{
|
9344 |
+
"epoch": 1.478646699944537,
|
9345 |
+
"grad_norm": 0.30392691493034363,
|
9346 |
+
"learning_rate": 3.358692870674219e-05,
|
9347 |
+
"loss": 0.406,
|
9348 |
+
"step": 2666
|
9349 |
+
},
|
9350 |
+
{
|
9351 |
+
"epoch": 1.4797559622850804,
|
9352 |
+
"grad_norm": 0.3019475042819977,
|
9353 |
+
"learning_rate": 3.34527090448978e-05,
|
9354 |
+
"loss": 0.2844,
|
9355 |
+
"step": 2668
|
9356 |
+
},
|
9357 |
+
{
|
9358 |
+
"epoch": 1.480865224625624,
|
9359 |
+
"grad_norm": 0.3453599214553833,
|
9360 |
+
"learning_rate": 3.331870421493688e-05,
|
9361 |
+
"loss": 0.5458,
|
9362 |
+
"step": 2670
|
9363 |
+
},
|
9364 |
+
{
|
9365 |
+
"epoch": 1.4819744869661675,
|
9366 |
+
"grad_norm": 0.36470338702201843,
|
9367 |
+
"learning_rate": 3.318491464946163e-05,
|
9368 |
+
"loss": 0.3794,
|
9369 |
+
"step": 2672
|
9370 |
+
},
|
9371 |
+
{
|
9372 |
+
"epoch": 1.483083749306711,
|
9373 |
+
"grad_norm": 0.3321688771247864,
|
9374 |
+
"learning_rate": 3.3051340780379494e-05,
|
9375 |
+
"loss": 0.4129,
|
9376 |
+
"step": 2674
|
9377 |
+
},
|
9378 |
+
{
|
9379 |
+
"epoch": 1.4841930116472546,
|
9380 |
+
"grad_norm": 0.34594064950942993,
|
9381 |
+
"learning_rate": 3.291798303890146e-05,
|
9382 |
+
"loss": 0.3418,
|
9383 |
+
"step": 2676
|
9384 |
+
},
|
9385 |
+
{
|
9386 |
+
"epoch": 1.4853022739877981,
|
9387 |
+
"grad_norm": 0.33681973814964294,
|
9388 |
+
"learning_rate": 3.2784841855540835e-05,
|
9389 |
+
"loss": 0.3854,
|
9390 |
+
"step": 2678
|
9391 |
+
},
|
9392 |
+
{
|
9393 |
+
"epoch": 1.4864115363283417,
|
9394 |
+
"grad_norm": 0.2645164430141449,
|
9395 |
+
"learning_rate": 3.265191766011181e-05,
|
9396 |
+
"loss": 0.3878,
|
9397 |
+
"step": 2680
|
9398 |
+
},
|
9399 |
+
{
|
9400 |
+
"epoch": 1.4875207986688852,
|
9401 |
+
"grad_norm": 0.29455429315567017,
|
9402 |
+
"learning_rate": 3.2519210881728114e-05,
|
9403 |
+
"loss": 0.4093,
|
9404 |
+
"step": 2682
|
9405 |
+
},
|
9406 |
+
{
|
9407 |
+
"epoch": 1.4886300610094287,
|
9408 |
+
"grad_norm": 0.3346690833568573,
|
9409 |
+
"learning_rate": 3.238672194880162e-05,
|
9410 |
+
"loss": 0.5245,
|
9411 |
+
"step": 2684
|
9412 |
+
},
|
9413 |
+
{
|
9414 |
+
"epoch": 1.4897393233499723,
|
9415 |
+
"grad_norm": 0.24734847247600555,
|
9416 |
+
"learning_rate": 3.2254451289040886e-05,
|
9417 |
+
"loss": 0.3321,
|
9418 |
+
"step": 2686
|
9419 |
+
},
|
9420 |
+
{
|
9421 |
+
"epoch": 1.4908485856905158,
|
9422 |
+
"grad_norm": 0.29895398020744324,
|
9423 |
+
"learning_rate": 3.212239932944979e-05,
|
9424 |
+
"loss": 0.4684,
|
9425 |
+
"step": 2688
|
9426 |
+
},
|
9427 |
+
{
|
9428 |
+
"epoch": 1.4919578480310594,
|
9429 |
+
"grad_norm": 0.2465728223323822,
|
9430 |
+
"learning_rate": 3.1990566496326333e-05,
|
9431 |
+
"loss": 0.3415,
|
9432 |
+
"step": 2690
|
9433 |
+
},
|
9434 |
+
{
|
9435 |
+
"epoch": 1.493067110371603,
|
9436 |
+
"grad_norm": 0.4069317579269409,
|
9437 |
+
"learning_rate": 3.185895321526099e-05,
|
9438 |
+
"loss": 0.3942,
|
9439 |
+
"step": 2692
|
9440 |
+
},
|
9441 |
+
{
|
9442 |
+
"epoch": 1.4941763727121464,
|
9443 |
+
"grad_norm": 0.38210323452949524,
|
9444 |
+
"learning_rate": 3.1727559911135464e-05,
|
9445 |
+
"loss": 0.4437,
|
9446 |
+
"step": 2694
|
9447 |
+
},
|
9448 |
+
{
|
9449 |
+
"epoch": 1.49528563505269,
|
9450 |
+
"grad_norm": 0.34570619463920593,
|
9451 |
+
"learning_rate": 3.1596387008121385e-05,
|
9452 |
+
"loss": 0.357,
|
9453 |
+
"step": 2696
|
9454 |
+
},
|
9455 |
+
{
|
9456 |
+
"epoch": 1.4963948973932335,
|
9457 |
+
"grad_norm": 0.27611759305000305,
|
9458 |
+
"learning_rate": 3.146543492967889e-05,
|
9459 |
+
"loss": 0.4378,
|
9460 |
+
"step": 2698
|
9461 |
+
},
|
9462 |
+
{
|
9463 |
+
"epoch": 1.497504159733777,
|
9464 |
+
"grad_norm": 0.3507147431373596,
|
9465 |
+
"learning_rate": 3.133470409855516e-05,
|
9466 |
+
"loss": 0.4252,
|
9467 |
+
"step": 2700
|
9468 |
+
},
|
9469 |
+
{
|
9470 |
+
"epoch": 1.4986134220743206,
|
9471 |
+
"grad_norm": 0.25004035234451294,
|
9472 |
+
"learning_rate": 3.1204194936783114e-05,
|
9473 |
+
"loss": 0.3186,
|
9474 |
+
"step": 2702
|
9475 |
+
},
|
9476 |
+
{
|
9477 |
+
"epoch": 1.4997226844148641,
|
9478 |
+
"grad_norm": 0.28456154465675354,
|
9479 |
+
"learning_rate": 3.1073907865680195e-05,
|
9480 |
+
"loss": 0.4283,
|
9481 |
+
"step": 2704
|
9482 |
+
},
|
9483 |
+
{
|
9484 |
+
"epoch": 1.5008319467554077,
|
9485 |
+
"grad_norm": 0.22293393313884735,
|
9486 |
+
"learning_rate": 3.094384330584674e-05,
|
9487 |
+
"loss": 0.3121,
|
9488 |
+
"step": 2706
|
9489 |
+
},
|
9490 |
+
{
|
9491 |
+
"epoch": 1.5019412090959512,
|
9492 |
+
"grad_norm": 0.3094489872455597,
|
9493 |
+
"learning_rate": 3.0814001677164816e-05,
|
9494 |
+
"loss": 0.4233,
|
9495 |
+
"step": 2708
|
9496 |
+
},
|
9497 |
+
{
|
9498 |
+
"epoch": 1.5030504714364947,
|
9499 |
+
"grad_norm": 0.3050517141819,
|
9500 |
+
"learning_rate": 3.0684383398796834e-05,
|
9501 |
+
"loss": 0.4525,
|
9502 |
+
"step": 2710
|
9503 |
+
},
|
9504 |
+
{
|
9505 |
+
"epoch": 1.5041597337770383,
|
9506 |
+
"grad_norm": 0.372335821390152,
|
9507 |
+
"learning_rate": 3.055498888918419e-05,
|
9508 |
+
"loss": 0.4682,
|
9509 |
+
"step": 2712
|
9510 |
+
},
|
9511 |
+
{
|
9512 |
+
"epoch": 1.5052689961175818,
|
9513 |
+
"grad_norm": 0.24653975665569305,
|
9514 |
+
"learning_rate": 3.042581856604583e-05,
|
9515 |
+
"loss": 0.3376,
|
9516 |
+
"step": 2714
|
9517 |
+
},
|
9518 |
+
{
|
9519 |
+
"epoch": 1.5063782584581253,
|
9520 |
+
"grad_norm": 0.3160128891468048,
|
9521 |
+
"learning_rate": 3.0296872846376945e-05,
|
9522 |
+
"loss": 0.4083,
|
9523 |
+
"step": 2716
|
9524 |
+
},
|
9525 |
+
{
|
9526 |
+
"epoch": 1.5074875207986689,
|
9527 |
+
"grad_norm": 0.4075262248516083,
|
9528 |
+
"learning_rate": 3.016815214644778e-05,
|
9529 |
+
"loss": 0.3918,
|
9530 |
+
"step": 2718
|
9531 |
+
},
|
9532 |
+
{
|
9533 |
+
"epoch": 1.5085967831392124,
|
9534 |
+
"grad_norm": 0.2748975157737732,
|
9535 |
+
"learning_rate": 3.003965688180206e-05,
|
9536 |
+
"loss": 0.425,
|
9537 |
+
"step": 2720
|
9538 |
+
},
|
9539 |
+
{
|
9540 |
+
"epoch": 1.509706045479756,
|
9541 |
+
"grad_norm": 0.3711773753166199,
|
9542 |
+
"learning_rate": 2.9911387467255734e-05,
|
9543 |
+
"loss": 0.3966,
|
9544 |
+
"step": 2722
|
9545 |
+
},
|
9546 |
+
{
|
9547 |
+
"epoch": 1.5108153078202995,
|
9548 |
+
"grad_norm": 0.2733793258666992,
|
9549 |
+
"learning_rate": 2.978334431689568e-05,
|
9550 |
+
"loss": 0.4595,
|
9551 |
+
"step": 2724
|
9552 |
+
},
|
9553 |
+
{
|
9554 |
+
"epoch": 1.511924570160843,
|
9555 |
+
"grad_norm": 0.33144858479499817,
|
9556 |
+
"learning_rate": 2.9655527844078345e-05,
|
9557 |
+
"loss": 0.3786,
|
9558 |
+
"step": 2726
|
9559 |
+
},
|
9560 |
+
{
|
9561 |
+
"epoch": 1.5130338325013866,
|
9562 |
+
"grad_norm": 0.308682918548584,
|
9563 |
+
"learning_rate": 2.9527938461428428e-05,
|
9564 |
+
"loss": 0.3316,
|
9565 |
+
"step": 2728
|
9566 |
+
},
|
9567 |
+
{
|
9568 |
+
"epoch": 1.51414309484193,
|
9569 |
+
"grad_norm": 0.23612940311431885,
|
9570 |
+
"learning_rate": 2.940057658083747e-05,
|
9571 |
+
"loss": 0.3468,
|
9572 |
+
"step": 2730
|
9573 |
+
},
|
9574 |
+
{
|
9575 |
+
"epoch": 1.5152523571824736,
|
9576 |
+
"grad_norm": 0.27691999077796936,
|
9577 |
+
"learning_rate": 2.9273442613462543e-05,
|
9578 |
+
"loss": 0.4282,
|
9579 |
+
"step": 2732
|
9580 |
+
},
|
9581 |
+
{
|
9582 |
+
"epoch": 1.5163616195230172,
|
9583 |
+
"grad_norm": 0.31364646553993225,
|
9584 |
+
"learning_rate": 2.914653696972508e-05,
|
9585 |
+
"loss": 0.3863,
|
9586 |
+
"step": 2734
|
9587 |
+
},
|
9588 |
+
{
|
9589 |
+
"epoch": 1.5174708818635607,
|
9590 |
+
"grad_norm": 0.3351525366306305,
|
9591 |
+
"learning_rate": 2.9019860059309335e-05,
|
9592 |
+
"loss": 0.3861,
|
9593 |
+
"step": 2736
|
9594 |
+
},
|
9595 |
+
{
|
9596 |
+
"epoch": 1.5185801442041043,
|
9597 |
+
"grad_norm": 0.24499474465847015,
|
9598 |
+
"learning_rate": 2.8893412291161114e-05,
|
9599 |
+
"loss": 0.3362,
|
9600 |
+
"step": 2738
|
9601 |
+
},
|
9602 |
+
{
|
9603 |
+
"epoch": 1.5196894065446478,
|
9604 |
+
"grad_norm": 0.249772846698761,
|
9605 |
+
"learning_rate": 2.876719407348659e-05,
|
9606 |
+
"loss": 0.3336,
|
9607 |
+
"step": 2740
|
9608 |
+
},
|
9609 |
+
{
|
9610 |
+
"epoch": 1.5207986688851913,
|
9611 |
+
"grad_norm": 0.2366907149553299,
|
9612 |
+
"learning_rate": 2.864120581375088e-05,
|
9613 |
+
"loss": 0.4717,
|
9614 |
+
"step": 2742
|
9615 |
+
},
|
9616 |
+
{
|
9617 |
+
"epoch": 1.5219079312257349,
|
9618 |
+
"grad_norm": 0.2543250322341919,
|
9619 |
+
"learning_rate": 2.8515447918676664e-05,
|
9620 |
+
"loss": 0.338,
|
9621 |
+
"step": 2744
|
9622 |
+
},
|
9623 |
+
{
|
9624 |
+
"epoch": 1.5230171935662784,
|
9625 |
+
"grad_norm": 0.3489428162574768,
|
9626 |
+
"learning_rate": 2.8389920794242963e-05,
|
9627 |
+
"loss": 0.4358,
|
9628 |
+
"step": 2746
|
9629 |
+
},
|
9630 |
+
{
|
9631 |
+
"epoch": 1.524126455906822,
|
9632 |
+
"grad_norm": 0.32319262623786926,
|
9633 |
+
"learning_rate": 2.8264624845683894e-05,
|
9634 |
+
"loss": 0.4148,
|
9635 |
+
"step": 2748
|
9636 |
+
},
|
9637 |
+
{
|
9638 |
+
"epoch": 1.5252357182473655,
|
9639 |
+
"grad_norm": 0.32093530893325806,
|
9640 |
+
"learning_rate": 2.813956047748717e-05,
|
9641 |
+
"loss": 0.4165,
|
9642 |
+
"step": 2750
|
9643 |
+
},
|
9644 |
+
{
|
9645 |
+
"epoch": 1.526344980587909,
|
9646 |
+
"grad_norm": 0.28289756178855896,
|
9647 |
+
"learning_rate": 2.801472809339294e-05,
|
9648 |
+
"loss": 0.3601,
|
9649 |
+
"step": 2752
|
9650 |
}
|
9651 |
],
|
9652 |
"logging_steps": 2,
|
|
|
9666 |
"attributes": {}
|
9667 |
}
|
9668 |
},
|
9669 |
+
"total_flos": 6702951740866560.0,
|
9670 |
"train_batch_size": 8,
|
9671 |
"trial_name": null,
|
9672 |
"trial_params": null
|