Training in progress, step 240
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3544920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be720761f0e10b60ecbc47900152f65973bd708bf66af4cbd26d0e647c2fc9ea
|
3 |
size 3544920
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1140034381
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:279001289160111f69989bdbd63e59b46d9353e04b8efe5c45c0ee36ea822c2b
|
3 |
size 1140034381
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdc259941b88629c38a5d42d50c6190d622a9bb02d68782819b22239cb26f270
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df343fa90fcddafb2dfc655c399dcd6fb9788397b581ec853a351289fbf07315
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -301,11 +301,70 @@
|
|
301 |
"eval_samples_per_second": 0.065,
|
302 |
"eval_steps_per_second": 0.034,
|
303 |
"step": 200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
}
|
305 |
],
|
306 |
"max_steps": 369,
|
307 |
"num_train_epochs": 3,
|
308 |
-
"total_flos":
|
309 |
"trial_name": null,
|
310 |
"trial_params": null
|
311 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.9393939393939394,
|
5 |
+
"global_step": 240,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
301 |
"eval_samples_per_second": 0.065,
|
302 |
"eval_steps_per_second": 0.034,
|
303 |
"step": 200
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 1.66,
|
307 |
+
"learning_rate": 2.2493224932249323e-05,
|
308 |
+
"loss": 3.4208,
|
309 |
+
"step": 205
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"epoch": 1.7,
|
313 |
+
"learning_rate": 2.181571815718157e-05,
|
314 |
+
"loss": 3.7157,
|
315 |
+
"step": 210
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"epoch": 1.74,
|
319 |
+
"learning_rate": 2.1138211382113822e-05,
|
320 |
+
"loss": 3.0534,
|
321 |
+
"step": 215
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"epoch": 1.78,
|
325 |
+
"learning_rate": 2.046070460704607e-05,
|
326 |
+
"loss": 3.2667,
|
327 |
+
"step": 220
|
328 |
+
},
|
329 |
+
{
|
330 |
+
"epoch": 1.82,
|
331 |
+
"learning_rate": 1.978319783197832e-05,
|
332 |
+
"loss": 3.3489,
|
333 |
+
"step": 225
|
334 |
+
},
|
335 |
+
{
|
336 |
+
"epoch": 1.86,
|
337 |
+
"learning_rate": 1.9105691056910573e-05,
|
338 |
+
"loss": 3.3139,
|
339 |
+
"step": 230
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 1.9,
|
343 |
+
"learning_rate": 1.842818428184282e-05,
|
344 |
+
"loss": 3.3155,
|
345 |
+
"step": 235
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 1.94,
|
349 |
+
"learning_rate": 1.775067750677507e-05,
|
350 |
+
"loss": 3.603,
|
351 |
+
"step": 240
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"epoch": 1.94,
|
355 |
+
"eval_loss": 3.135263681411743,
|
356 |
+
"eval_rouge2_fmeasure": 0.0,
|
357 |
+
"eval_rouge2_precision": 0.0,
|
358 |
+
"eval_rouge2_recall": 0.0,
|
359 |
+
"eval_runtime": 517.2838,
|
360 |
+
"eval_samples_per_second": 0.052,
|
361 |
+
"eval_steps_per_second": 0.027,
|
362 |
+
"step": 240
|
363 |
}
|
364 |
],
|
365 |
"max_steps": 369,
|
366 |
"num_train_epochs": 3,
|
367 |
+
"total_flos": 9365407043420160.0,
|
368 |
"trial_name": null,
|
369 |
"trial_params": null
|
370 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1140034381
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:279001289160111f69989bdbd63e59b46d9353e04b8efe5c45c0ee36ea822c2b
|
3 |
size 1140034381
|