Training in progress, step 555, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bef787ba16eb3d41f4d13340c07bc05fd0530058b348be8288493c90ab94325f
|
3 |
size 167832240
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 85723732
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3635defef027975c19b220db2e7d50884b06413d7e99c178c0845e44afd1e88
|
3 |
size 85723732
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fd23d48d0a00f2bcef4e61e7b0ebc30691cd0946be2aa8316f2c61edcd7b2f1
|
3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59225b59d81ea40b5f6d48123628af66023e2b10a1af4f3a269ca2da584f937a
|
3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fa5404a9d5a7f82472a09215ed58ac37185a39533d448975bd7ab0b239729ef
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 4.
|
5 |
"eval_steps": 56,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3867,6 +3867,111 @@
|
|
3867 |
"learning_rate": 2.1753260154906973e-07,
|
3868 |
"loss": 0.2876,
|
3869 |
"step": 540
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3870 |
}
|
3871 |
],
|
3872 |
"logging_steps": 1,
|
@@ -3881,12 +3986,12 @@
|
|
3881 |
"should_evaluate": false,
|
3882 |
"should_log": false,
|
3883 |
"should_save": true,
|
3884 |
-
"should_training_stop":
|
3885 |
},
|
3886 |
"attributes": {}
|
3887 |
}
|
3888 |
},
|
3889 |
-
"total_flos": 3.
|
3890 |
"train_batch_size": 4,
|
3891 |
"trial_name": null,
|
3892 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.98876404494382,
|
5 |
"eval_steps": 56,
|
6 |
+
"global_step": 555,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3867 |
"learning_rate": 2.1753260154906973e-07,
|
3868 |
"loss": 0.2876,
|
3869 |
"step": 540
|
3870 |
+
},
|
3871 |
+
{
|
3872 |
+
"epoch": 4.8629213483146065,
|
3873 |
+
"grad_norm": 1.6425896883010864,
|
3874 |
+
"learning_rate": 1.8951279224599382e-07,
|
3875 |
+
"loss": 0.2625,
|
3876 |
+
"step": 541
|
3877 |
+
},
|
3878 |
+
{
|
3879 |
+
"epoch": 4.871910112359551,
|
3880 |
+
"grad_norm": 1.553606629371643,
|
3881 |
+
"learning_rate": 1.6342066931995802e-07,
|
3882 |
+
"loss": 0.2581,
|
3883 |
+
"step": 542
|
3884 |
+
},
|
3885 |
+
{
|
3886 |
+
"epoch": 4.880898876404494,
|
3887 |
+
"grad_norm": 1.5650955438613892,
|
3888 |
+
"learning_rate": 1.3925724254686413e-07,
|
3889 |
+
"loss": 0.2533,
|
3890 |
+
"step": 543
|
3891 |
+
},
|
3892 |
+
{
|
3893 |
+
"epoch": 4.889887640449438,
|
3894 |
+
"grad_norm": 1.6483417749404907,
|
3895 |
+
"learning_rate": 1.1702344706129298e-07,
|
3896 |
+
"loss": 0.2943,
|
3897 |
+
"step": 544
|
3898 |
+
},
|
3899 |
+
{
|
3900 |
+
"epoch": 4.898876404494382,
|
3901 |
+
"grad_norm": 1.5799306631088257,
|
3902 |
+
"learning_rate": 9.672014332028356e-08,
|
3903 |
+
"loss": 0.2944,
|
3904 |
+
"step": 545
|
3905 |
+
},
|
3906 |
+
{
|
3907 |
+
"epoch": 4.907865168539326,
|
3908 |
+
"grad_norm": 1.736416220664978,
|
3909 |
+
"learning_rate": 7.834811707005951e-08,
|
3910 |
+
"loss": 0.3674,
|
3911 |
+
"step": 546
|
3912 |
+
},
|
3913 |
+
{
|
3914 |
+
"epoch": 4.91685393258427,
|
3915 |
+
"grad_norm": 1.6711561679840088,
|
3916 |
+
"learning_rate": 6.190807931560349e-08,
|
3917 |
+
"loss": 0.2691,
|
3918 |
+
"step": 547
|
3919 |
+
},
|
3920 |
+
{
|
3921 |
+
"epoch": 4.925842696629213,
|
3922 |
+
"grad_norm": 1.663176417350769,
|
3923 |
+
"learning_rate": 4.740066629315143e-08,
|
3924 |
+
"loss": 0.2818,
|
3925 |
+
"step": 548
|
3926 |
+
},
|
3927 |
+
{
|
3928 |
+
"epoch": 4.9348314606741575,
|
3929 |
+
"grad_norm": 1.7786431312561035,
|
3930 |
+
"learning_rate": 3.482643944556219e-08,
|
3931 |
+
"loss": 0.3012,
|
3932 |
+
"step": 549
|
3933 |
+
},
|
3934 |
+
{
|
3935 |
+
"epoch": 4.943820224719101,
|
3936 |
+
"grad_norm": 1.6006958484649658,
|
3937 |
+
"learning_rate": 2.4185885400596075e-08,
|
3938 |
+
"loss": 0.2574,
|
3939 |
+
"step": 550
|
3940 |
+
},
|
3941 |
+
{
|
3942 |
+
"epoch": 4.952808988764045,
|
3943 |
+
"grad_norm": 1.7665148973464966,
|
3944 |
+
"learning_rate": 1.5479415952085463e-08,
|
3945 |
+
"loss": 0.2547,
|
3946 |
+
"step": 551
|
3947 |
+
},
|
3948 |
+
{
|
3949 |
+
"epoch": 4.961797752808989,
|
3950 |
+
"grad_norm": 1.5012476444244385,
|
3951 |
+
"learning_rate": 8.707368043975317e-09,
|
3952 |
+
"loss": 0.2478,
|
3953 |
+
"step": 552
|
3954 |
+
},
|
3955 |
+
{
|
3956 |
+
"epoch": 4.9707865168539325,
|
3957 |
+
"grad_norm": 1.7341482639312744,
|
3958 |
+
"learning_rate": 3.870003757311391e-09,
|
3959 |
+
"loss": 0.2677,
|
3960 |
+
"step": 553
|
3961 |
+
},
|
3962 |
+
{
|
3963 |
+
"epoch": 4.979775280898877,
|
3964 |
+
"grad_norm": 1.6566503047943115,
|
3965 |
+
"learning_rate": 9.675103000872376e-10,
|
3966 |
+
"loss": 0.2858,
|
3967 |
+
"step": 554
|
3968 |
+
},
|
3969 |
+
{
|
3970 |
+
"epoch": 4.98876404494382,
|
3971 |
+
"grad_norm": 1.8078707456588745,
|
3972 |
+
"learning_rate": 0.0,
|
3973 |
+
"loss": 0.3493,
|
3974 |
+
"step": 555
|
3975 |
}
|
3976 |
],
|
3977 |
"logging_steps": 1,
|
|
|
3986 |
"should_evaluate": false,
|
3987 |
"should_log": false,
|
3988 |
"should_save": true,
|
3989 |
+
"should_training_stop": true
|
3990 |
},
|
3991 |
"attributes": {}
|
3992 |
}
|
3993 |
},
|
3994 |
+
"total_flos": 3.293979539008389e+18,
|
3995 |
"train_batch_size": 4,
|
3996 |
"trial_name": null,
|
3997 |
"trial_params": null
|