Upload trainer_state.json with huggingface_hub
Browse files- trainer_state.json +5 -56
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 2.
|
3 |
-
"best_model_checkpoint": "./results/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 250,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -160,57 +160,6 @@
|
|
160 |
"eval_samples_per_second": 47.975,
|
161 |
"eval_steps_per_second": 1.5,
|
162 |
"step": 1500
|
163 |
-
},
|
164 |
-
{
|
165 |
-
"epoch": 0.7752877044215627,
|
166 |
-
"grad_norm": 0.302734375,
|
167 |
-
"learning_rate": 0.0002431524046570296,
|
168 |
-
"loss": 2.6635,
|
169 |
-
"step": 1600
|
170 |
-
},
|
171 |
-
{
|
172 |
-
"epoch": 0.8237431859479104,
|
173 |
-
"grad_norm": 0.33203125,
|
174 |
-
"learning_rate": 0.00015193378712876149,
|
175 |
-
"loss": 2.6561,
|
176 |
-
"step": 1700
|
177 |
-
},
|
178 |
-
{
|
179 |
-
"epoch": 0.8479709267110842,
|
180 |
-
"eval_loss": 2.655076503753662,
|
181 |
-
"eval_runtime": 1225.3417,
|
182 |
-
"eval_samples_per_second": 47.906,
|
183 |
-
"eval_steps_per_second": 1.498,
|
184 |
-
"step": 1750
|
185 |
-
},
|
186 |
-
{
|
187 |
-
"epoch": 0.872198667474258,
|
188 |
-
"grad_norm": 0.298828125,
|
189 |
-
"learning_rate": 8.074887282213439e-05,
|
190 |
-
"loss": 2.6481,
|
191 |
-
"step": 1800
|
192 |
-
},
|
193 |
-
{
|
194 |
-
"epoch": 0.9206541490006057,
|
195 |
-
"grad_norm": 0.26953125,
|
196 |
-
"learning_rate": 3.127924912435132e-05,
|
197 |
-
"loss": 2.6565,
|
198 |
-
"step": 1900
|
199 |
-
},
|
200 |
-
{
|
201 |
-
"epoch": 0.9691096305269533,
|
202 |
-
"grad_norm": 0.416015625,
|
203 |
-
"learning_rate": 4.693527335575154e-06,
|
204 |
-
"loss": 2.6513,
|
205 |
-
"step": 2000
|
206 |
-
},
|
207 |
-
{
|
208 |
-
"epoch": 0.9691096305269533,
|
209 |
-
"eval_loss": 2.6527161598205566,
|
210 |
-
"eval_runtime": 1223.4873,
|
211 |
-
"eval_samples_per_second": 47.978,
|
212 |
-
"eval_steps_per_second": 1.5,
|
213 |
-
"step": 2000
|
214 |
}
|
215 |
],
|
216 |
"logging_steps": 100,
|
@@ -230,7 +179,7 @@
|
|
230 |
"attributes": {}
|
231 |
}
|
232 |
},
|
233 |
-
"total_flos":
|
234 |
"train_batch_size": 4,
|
235 |
"trial_name": null,
|
236 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 2.6806631088256836,
|
3 |
+
"best_model_checkpoint": "./results/checkpoint-1500",
|
4 |
+
"epoch": 0.726832222895215,
|
5 |
"eval_steps": 250,
|
6 |
+
"global_step": 1500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
160 |
"eval_samples_per_second": 47.975,
|
161 |
"eval_steps_per_second": 1.5,
|
162 |
"step": 1500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
}
|
164 |
],
|
165 |
"logging_steps": 100,
|
|
|
179 |
"attributes": {}
|
180 |
}
|
181 |
},
|
182 |
+
"total_flos": 3.508942109933568e+17,
|
183 |
"train_batch_size": 4,
|
184 |
"trial_name": null,
|
185 |
"trial_params": null
|