YL95 commited on
Commit
da8580c
1 Parent(s): c47b1ae

training state at step 10

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06896551724137931,
5
  "eval_steps": 1,
6
- "global_step": 5,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -67,6 +67,81 @@
67
  "eval_samples_per_second": 1.134,
68
  "eval_steps_per_second": 0.567,
69
  "step": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  }
71
  ],
72
  "logging_steps": 1,
@@ -86,7 +161,7 @@
86
  "attributes": {}
87
  }
88
  },
89
- "total_flos": 5047562107060224.0,
90
  "train_batch_size": 2,
91
  "trial_name": null,
92
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.13793103448275862,
5
  "eval_steps": 1,
6
+ "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
67
  "eval_samples_per_second": 1.134,
68
  "eval_steps_per_second": 0.567,
69
  "step": 4
70
+ },
71
+ {
72
+ "epoch": 0.06896551724137931,
73
+ "grad_norm": 3.4548702239990234,
74
+ "learning_rate": 1.1363636363636365e-05,
75
+ "loss": 1.8838,
76
+ "step": 5
77
+ },
78
+ {
79
+ "epoch": 0.06896551724137931,
80
+ "eval_loss": 1.7746165990829468,
81
+ "eval_runtime": 18.0257,
82
+ "eval_samples_per_second": 1.11,
83
+ "eval_steps_per_second": 0.555,
84
+ "step": 5
85
+ },
86
+ {
87
+ "epoch": 0.08275862068965517,
88
+ "grad_norm": 3.1943702697753906,
89
+ "learning_rate": 1.3636363636363637e-05,
90
+ "loss": 1.7707,
91
+ "step": 6
92
+ },
93
+ {
94
+ "epoch": 0.08275862068965517,
95
+ "eval_loss": 1.6792665719985962,
96
+ "eval_runtime": 17.7498,
97
+ "eval_samples_per_second": 1.127,
98
+ "eval_steps_per_second": 0.563,
99
+ "step": 6
100
+ },
101
+ {
102
+ "epoch": 0.09655172413793103,
103
+ "grad_norm": 3.318288564682007,
104
+ "learning_rate": 1.590909090909091e-05,
105
+ "loss": 1.7171,
106
+ "step": 7
107
+ },
108
+ {
109
+ "epoch": 0.09655172413793103,
110
+ "eval_loss": 1.5874873399734497,
111
+ "eval_runtime": 17.6295,
112
+ "eval_samples_per_second": 1.134,
113
+ "eval_steps_per_second": 0.567,
114
+ "step": 7
115
+ },
116
+ {
117
+ "epoch": 0.1103448275862069,
118
+ "grad_norm": 3.210330009460449,
119
+ "learning_rate": 1.8181818181818182e-05,
120
+ "loss": 1.5734,
121
+ "step": 8
122
+ },
123
+ {
124
+ "epoch": 0.1103448275862069,
125
+ "eval_loss": 1.535287618637085,
126
+ "eval_runtime": 17.6232,
127
+ "eval_samples_per_second": 1.135,
128
+ "eval_steps_per_second": 0.567,
129
+ "step": 8
130
+ },
131
+ {
132
+ "epoch": 0.12413793103448276,
133
+ "grad_norm": 3.2319107055664062,
134
+ "learning_rate": 2.0454545454545457e-05,
135
+ "loss": 1.7986,
136
+ "step": 9
137
+ },
138
+ {
139
+ "epoch": 0.12413793103448276,
140
+ "eval_loss": 1.467301607131958,
141
+ "eval_runtime": 17.5824,
142
+ "eval_samples_per_second": 1.138,
143
+ "eval_steps_per_second": 0.569,
144
+ "step": 9
145
  }
146
  ],
147
  "logging_steps": 1,
 
161
  "attributes": {}
162
  }
163
  },
164
+ "total_flos": 1.169340664455168e+16,
165
  "train_batch_size": 2,
166
  "trial_name": null,
167
  "trial_params": null