Training in progress, step 85000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 747557272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad9d161ccfe8557f774389914a0964e7cdf04d6e8ea6d5448a02a5477eba0971
|
3 |
size 747557272
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1495236026
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51c751859bef95ec2c893271651707f5eb8ea5792b49f2773a99044eeec97c6e
|
3 |
size 1495236026
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c5348d2e9fea1109e86f6b89c5d5fa153a55ab6c2c1017e0eb00abc8b2a78cf
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61d291fc8147253f44bd142e317a0e7f96fef440dba6211ddac6ff2f9ed3f157
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1127,6 +1127,76 @@
|
|
1127 |
"learning_rate": 2.1131122066829887e-06,
|
1128 |
"loss": 4.1045,
|
1129 |
"step": 80000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1130 |
}
|
1131 |
],
|
1132 |
"logging_steps": 500,
|
@@ -1146,7 +1216,7 @@
|
|
1146 |
"attributes": {}
|
1147 |
}
|
1148 |
},
|
1149 |
-
"total_flos": 2.
|
1150 |
"train_batch_size": 16,
|
1151 |
"trial_name": null,
|
1152 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.8507227420598986,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 85000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1127 |
"learning_rate": 2.1131122066829887e-06,
|
1128 |
"loss": 4.1045,
|
1129 |
"step": 80000
|
1130 |
+
},
|
1131 |
+
{
|
1132 |
+
"epoch": 2.6998021263037866,
|
1133 |
+
"grad_norm": 16.334218978881836,
|
1134 |
+
"learning_rate": 2.001319157974757e-06,
|
1135 |
+
"loss": 4.1196,
|
1136 |
+
"step": 80500
|
1137 |
+
},
|
1138 |
+
{
|
1139 |
+
"epoch": 2.716571083610021,
|
1140 |
+
"grad_norm": 15.715066909790039,
|
1141 |
+
"learning_rate": 1.889526109266526e-06,
|
1142 |
+
"loss": 4.0669,
|
1143 |
+
"step": 81000
|
1144 |
+
},
|
1145 |
+
{
|
1146 |
+
"epoch": 2.733340040916256,
|
1147 |
+
"grad_norm": 19.521465301513672,
|
1148 |
+
"learning_rate": 1.7777330605582947e-06,
|
1149 |
+
"loss": 4.0515,
|
1150 |
+
"step": 81500
|
1151 |
+
},
|
1152 |
+
{
|
1153 |
+
"epoch": 2.7501089982224904,
|
1154 |
+
"grad_norm": 14.903164863586426,
|
1155 |
+
"learning_rate": 1.6659400118500632e-06,
|
1156 |
+
"loss": 4.0894,
|
1157 |
+
"step": 82000
|
1158 |
+
},
|
1159 |
+
{
|
1160 |
+
"epoch": 2.766877955528725,
|
1161 |
+
"grad_norm": 15.718372344970703,
|
1162 |
+
"learning_rate": 1.554146963141832e-06,
|
1163 |
+
"loss": 4.1012,
|
1164 |
+
"step": 82500
|
1165 |
+
},
|
1166 |
+
{
|
1167 |
+
"epoch": 2.7836469128349597,
|
1168 |
+
"grad_norm": 18.20594024658203,
|
1169 |
+
"learning_rate": 1.4423539144336007e-06,
|
1170 |
+
"loss": 4.0898,
|
1171 |
+
"step": 83000
|
1172 |
+
},
|
1173 |
+
{
|
1174 |
+
"epoch": 2.8004158701411948,
|
1175 |
+
"grad_norm": 26.69768524169922,
|
1176 |
+
"learning_rate": 1.3305608657253694e-06,
|
1177 |
+
"loss": 4.1673,
|
1178 |
+
"step": 83500
|
1179 |
+
},
|
1180 |
+
{
|
1181 |
+
"epoch": 2.8171848274474294,
|
1182 |
+
"grad_norm": 15.399796485900879,
|
1183 |
+
"learning_rate": 1.218767817017138e-06,
|
1184 |
+
"loss": 4.0204,
|
1185 |
+
"step": 84000
|
1186 |
+
},
|
1187 |
+
{
|
1188 |
+
"epoch": 2.833953784753664,
|
1189 |
+
"grad_norm": 14.401926040649414,
|
1190 |
+
"learning_rate": 1.1069747683089067e-06,
|
1191 |
+
"loss": 4.0974,
|
1192 |
+
"step": 84500
|
1193 |
+
},
|
1194 |
+
{
|
1195 |
+
"epoch": 2.8507227420598986,
|
1196 |
+
"grad_norm": 18.283782958984375,
|
1197 |
+
"learning_rate": 9.951817196006754e-07,
|
1198 |
+
"loss": 4.1121,
|
1199 |
+
"step": 85000
|
1200 |
}
|
1201 |
],
|
1202 |
"logging_steps": 500,
|
|
|
1216 |
"attributes": {}
|
1217 |
}
|
1218 |
},
|
1219 |
+
"total_flos": 2.6168910452202e+16,
|
1220 |
"train_batch_size": 16,
|
1221 |
"trial_name": null,
|
1222 |
"trial_params": null
|