training state at step 60
Browse files- trainer_state.json +63 -3
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 1,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -802,6 +802,66 @@
|
|
802 |
"eval_samples_per_second": 1.268,
|
803 |
"eval_steps_per_second": 0.634,
|
804 |
"step": 54
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
805 |
}
|
806 |
],
|
807 |
"logging_steps": 1,
|
@@ -821,7 +881,7 @@
|
|
821 |
"attributes": {}
|
822 |
}
|
823 |
},
|
824 |
-
"total_flos":
|
825 |
"train_batch_size": 2,
|
826 |
"trial_name": null,
|
827 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8275862068965517,
|
5 |
"eval_steps": 1,
|
6 |
+
"global_step": 60,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
802 |
"eval_samples_per_second": 1.268,
|
803 |
"eval_steps_per_second": 0.634,
|
804 |
"step": 54
|
805 |
+
},
|
806 |
+
{
|
807 |
+
"epoch": 0.7724137931034483,
|
808 |
+
"grad_norm": 2.1803667545318604,
|
809 |
+
"learning_rate": 2.2727272727272728e-06,
|
810 |
+
"loss": 1.4271,
|
811 |
+
"step": 56
|
812 |
+
},
|
813 |
+
{
|
814 |
+
"epoch": 0.7724137931034483,
|
815 |
+
"eval_loss": 0.8433731198310852,
|
816 |
+
"eval_runtime": 17.9885,
|
817 |
+
"eval_samples_per_second": 1.112,
|
818 |
+
"eval_steps_per_second": 0.556,
|
819 |
+
"step": 56
|
820 |
+
},
|
821 |
+
{
|
822 |
+
"epoch": 0.7862068965517242,
|
823 |
+
"grad_norm": 2.3162448406219482,
|
824 |
+
"learning_rate": 4.5454545454545455e-06,
|
825 |
+
"loss": 1.4689,
|
826 |
+
"step": 57
|
827 |
+
},
|
828 |
+
{
|
829 |
+
"epoch": 0.7862068965517242,
|
830 |
+
"eval_loss": 0.8418852090835571,
|
831 |
+
"eval_runtime": 18.2763,
|
832 |
+
"eval_samples_per_second": 1.094,
|
833 |
+
"eval_steps_per_second": 0.547,
|
834 |
+
"step": 57
|
835 |
+
},
|
836 |
+
{
|
837 |
+
"epoch": 0.8,
|
838 |
+
"grad_norm": 1.9732853174209595,
|
839 |
+
"learning_rate": 6.818181818181818e-06,
|
840 |
+
"loss": 1.2825,
|
841 |
+
"step": 58
|
842 |
+
},
|
843 |
+
{
|
844 |
+
"epoch": 0.8,
|
845 |
+
"eval_loss": 0.8386393785476685,
|
846 |
+
"eval_runtime": 18.1184,
|
847 |
+
"eval_samples_per_second": 1.104,
|
848 |
+
"eval_steps_per_second": 0.552,
|
849 |
+
"step": 58
|
850 |
+
},
|
851 |
+
{
|
852 |
+
"epoch": 0.8137931034482758,
|
853 |
+
"grad_norm": 2.0547423362731934,
|
854 |
+
"learning_rate": 9.090909090909091e-06,
|
855 |
+
"loss": 1.2972,
|
856 |
+
"step": 59
|
857 |
+
},
|
858 |
+
{
|
859 |
+
"epoch": 0.8137931034482758,
|
860 |
+
"eval_loss": 0.8355510830879211,
|
861 |
+
"eval_runtime": 18.2216,
|
862 |
+
"eval_samples_per_second": 1.098,
|
863 |
+
"eval_steps_per_second": 0.549,
|
864 |
+
"step": 59
|
865 |
}
|
866 |
],
|
867 |
"logging_steps": 1,
|
|
|
881 |
"attributes": {}
|
882 |
}
|
883 |
},
|
884 |
+
"total_flos": 7.477855969291469e+16,
|
885 |
"train_batch_size": 2,
|
886 |
"trial_name": null,
|
887 |
"trial_params": null
|