Femboyuwu2000
commited on
Commit
•
72d6f61
1
Parent(s):
ce9b4c1
Training in progress, step 9040, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13982248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba139240646f433f9f0106d855aea9e350f07f031693b316743dd388f006a45a
|
3 |
size 13982248
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7062522
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd6c424fd81f222bd0b9ad0388c7d2dd3efa61cf565f0946a945da841c18c43f
|
3 |
size 7062522
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ac19e6550ed28e30f1e0773f61b5a93cb672dde550a8914d4067b84caffee97
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bb2ad4299490e0d014e4083d00933053d6f0c483383ad6b71adbd0a8a72b6c2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3122,6 +3122,55 @@
|
|
3122 |
"learning_rate": 1.8371635972030942e-05,
|
3123 |
"loss": 3.4938,
|
3124 |
"step": 8900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3125 |
}
|
3126 |
],
|
3127 |
"logging_steps": 20,
|
@@ -3129,7 +3178,7 @@
|
|
3129 |
"num_input_tokens_seen": 0,
|
3130 |
"num_train_epochs": 2,
|
3131 |
"save_steps": 20,
|
3132 |
-
"total_flos": 2.
|
3133 |
"train_batch_size": 8,
|
3134 |
"trial_name": null,
|
3135 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7232,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 9040,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3122 |
"learning_rate": 1.8371635972030942e-05,
|
3123 |
"loss": 3.4938,
|
3124 |
"step": 8900
|
3125 |
+
},
|
3126 |
+
{
|
3127 |
+
"epoch": 0.71,
|
3128 |
+
"grad_norm": 25.6701602935791,
|
3129 |
+
"learning_rate": 1.8324280212401316e-05,
|
3130 |
+
"loss": 3.3931,
|
3131 |
+
"step": 8920
|
3132 |
+
},
|
3133 |
+
{
|
3134 |
+
"epoch": 0.72,
|
3135 |
+
"grad_norm": 27.28550148010254,
|
3136 |
+
"learning_rate": 1.8276889582645278e-05,
|
3137 |
+
"loss": 3.426,
|
3138 |
+
"step": 8940
|
3139 |
+
},
|
3140 |
+
{
|
3141 |
+
"epoch": 0.72,
|
3142 |
+
"grad_norm": 26.73450469970703,
|
3143 |
+
"learning_rate": 1.8229464579868124e-05,
|
3144 |
+
"loss": 3.4074,
|
3145 |
+
"step": 8960
|
3146 |
+
},
|
3147 |
+
{
|
3148 |
+
"epoch": 0.72,
|
3149 |
+
"grad_norm": 30.16777992248535,
|
3150 |
+
"learning_rate": 1.818200570153568e-05,
|
3151 |
+
"loss": 3.4204,
|
3152 |
+
"step": 8980
|
3153 |
+
},
|
3154 |
+
{
|
3155 |
+
"epoch": 0.72,
|
3156 |
+
"grad_norm": 30.022031784057617,
|
3157 |
+
"learning_rate": 1.813451344546913e-05,
|
3158 |
+
"loss": 3.4905,
|
3159 |
+
"step": 9000
|
3160 |
+
},
|
3161 |
+
{
|
3162 |
+
"epoch": 0.72,
|
3163 |
+
"grad_norm": 34.67860412597656,
|
3164 |
+
"learning_rate": 1.8086988309839755e-05,
|
3165 |
+
"loss": 3.4165,
|
3166 |
+
"step": 9020
|
3167 |
+
},
|
3168 |
+
{
|
3169 |
+
"epoch": 0.72,
|
3170 |
+
"grad_norm": 26.23653793334961,
|
3171 |
+
"learning_rate": 1.8039430793163753e-05,
|
3172 |
+
"loss": 3.5014,
|
3173 |
+
"step": 9040
|
3174 |
}
|
3175 |
],
|
3176 |
"logging_steps": 20,
|
|
|
3178 |
"num_input_tokens_seen": 0,
|
3179 |
"num_train_epochs": 2,
|
3180 |
"save_steps": 20,
|
3181 |
+
"total_flos": 2.1373183266914304e+16,
|
3182 |
"train_batch_size": 8,
|
3183 |
"trial_name": null,
|
3184 |
"trial_params": null
|