indiejoseph
commited on
Commit
·
e469930
1
Parent(s):
74c55f8
End of training
Browse files- all_results.json +5 -5
- train_results.json +5 -5
- trainer_state.json +132 -12
all_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 128272,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 2.
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 7.0,
|
3 |
+
"train_loss": 0.34533324811340976,
|
4 |
+
"train_runtime": 2399.0328,
|
5 |
"train_samples": 128272,
|
6 |
+
"train_samples_per_second": 374.277,
|
7 |
+
"train_steps_per_second": 2.924
|
8 |
}
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 128272,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 2.
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 7.0,
|
3 |
+
"train_loss": 0.34533324811340976,
|
4 |
+
"train_runtime": 2399.0328,
|
5 |
"train_samples": 128272,
|
6 |
+
"train_samples_per_second": 374.277,
|
7 |
+
"train_steps_per_second": 2.924
|
8 |
}
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -309,20 +309,140 @@
|
|
309 |
"step": 5000
|
310 |
},
|
311 |
{
|
312 |
-
"epoch": 5.
|
313 |
-
"
|
314 |
-
"
|
315 |
-
"
|
316 |
-
|
317 |
-
|
318 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
}
|
320 |
],
|
321 |
"logging_steps": 100,
|
322 |
-
"max_steps":
|
323 |
-
"num_train_epochs":
|
324 |
"save_steps": 500,
|
325 |
-
"total_flos":
|
326 |
"trial_name": null,
|
327 |
"trial_params": null
|
328 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.999625795185231,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 7014,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
309 |
"step": 5000
|
310 |
},
|
311 |
{
|
312 |
+
"epoch": 5.09,
|
313 |
+
"learning_rate": 1.364414029084688e-05,
|
314 |
+
"loss": 1.2276,
|
315 |
+
"step": 5100
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"epoch": 5.19,
|
319 |
+
"learning_rate": 1.2931280296549758e-05,
|
320 |
+
"loss": 1.2064,
|
321 |
+
"step": 5200
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"epoch": 5.29,
|
325 |
+
"learning_rate": 1.2218420302252638e-05,
|
326 |
+
"loss": 1.2102,
|
327 |
+
"step": 5300
|
328 |
+
},
|
329 |
+
{
|
330 |
+
"epoch": 5.39,
|
331 |
+
"learning_rate": 1.1505560307955517e-05,
|
332 |
+
"loss": 1.2095,
|
333 |
+
"step": 5400
|
334 |
+
},
|
335 |
+
{
|
336 |
+
"epoch": 5.49,
|
337 |
+
"learning_rate": 1.0792700313658398e-05,
|
338 |
+
"loss": 1.2239,
|
339 |
+
"step": 5500
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 5.59,
|
343 |
+
"learning_rate": 1.0079840319361278e-05,
|
344 |
+
"loss": 1.216,
|
345 |
+
"step": 5600
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 5.69,
|
349 |
+
"learning_rate": 9.366980325064158e-06,
|
350 |
+
"loss": 1.2206,
|
351 |
+
"step": 5700
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"epoch": 5.79,
|
355 |
+
"learning_rate": 8.654120330767037e-06,
|
356 |
+
"loss": 1.2133,
|
357 |
+
"step": 5800
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"epoch": 5.89,
|
361 |
+
"learning_rate": 7.941260336469917e-06,
|
362 |
+
"loss": 1.2059,
|
363 |
+
"step": 5900
|
364 |
+
},
|
365 |
+
{
|
366 |
+
"epoch": 5.99,
|
367 |
+
"learning_rate": 7.228400342172797e-06,
|
368 |
+
"loss": 1.206,
|
369 |
+
"step": 6000
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"epoch": 6.09,
|
373 |
+
"learning_rate": 6.5155403478756775e-06,
|
374 |
+
"loss": 1.1976,
|
375 |
+
"step": 6100
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"epoch": 6.19,
|
379 |
+
"learning_rate": 5.802680353578557e-06,
|
380 |
+
"loss": 1.1966,
|
381 |
+
"step": 6200
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"epoch": 6.29,
|
385 |
+
"learning_rate": 5.0898203592814375e-06,
|
386 |
+
"loss": 1.1992,
|
387 |
+
"step": 6300
|
388 |
+
},
|
389 |
+
{
|
390 |
+
"epoch": 6.39,
|
391 |
+
"learning_rate": 4.376960364984317e-06,
|
392 |
+
"loss": 1.1946,
|
393 |
+
"step": 6400
|
394 |
+
},
|
395 |
+
{
|
396 |
+
"epoch": 6.49,
|
397 |
+
"learning_rate": 3.664100370687197e-06,
|
398 |
+
"loss": 1.1879,
|
399 |
+
"step": 6500
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"epoch": 6.59,
|
403 |
+
"learning_rate": 2.951240376390077e-06,
|
404 |
+
"loss": 1.1863,
|
405 |
+
"step": 6600
|
406 |
+
},
|
407 |
+
{
|
408 |
+
"epoch": 6.69,
|
409 |
+
"learning_rate": 2.238380382092957e-06,
|
410 |
+
"loss": 1.182,
|
411 |
+
"step": 6700
|
412 |
+
},
|
413 |
+
{
|
414 |
+
"epoch": 6.79,
|
415 |
+
"learning_rate": 1.525520387795837e-06,
|
416 |
+
"loss": 1.1914,
|
417 |
+
"step": 6800
|
418 |
+
},
|
419 |
+
{
|
420 |
+
"epoch": 6.89,
|
421 |
+
"learning_rate": 8.12660393498717e-07,
|
422 |
+
"loss": 1.1892,
|
423 |
+
"step": 6900
|
424 |
+
},
|
425 |
+
{
|
426 |
+
"epoch": 6.99,
|
427 |
+
"learning_rate": 9.98003992015968e-08,
|
428 |
+
"loss": 1.1943,
|
429 |
+
"step": 7000
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"epoch": 7.0,
|
433 |
+
"step": 7014,
|
434 |
+
"total_flos": 1.1685618503900851e+17,
|
435 |
+
"train_loss": 0.34533324811340976,
|
436 |
+
"train_runtime": 2399.0328,
|
437 |
+
"train_samples_per_second": 374.277,
|
438 |
+
"train_steps_per_second": 2.924
|
439 |
}
|
440 |
],
|
441 |
"logging_steps": 100,
|
442 |
+
"max_steps": 7014,
|
443 |
+
"num_train_epochs": 7,
|
444 |
"save_steps": 500,
|
445 |
+
"total_flos": 1.1685618503900851e+17,
|
446 |
"trial_name": null,
|
447 |
"trial_params": null
|
448 |
}
|