YL95 commited on
Commit
dbd584c
1 Parent(s): 07c72ac

training state at step 15

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.13793103448275862,
5
  "eval_steps": 1,
6
- "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -142,6 +142,81 @@
142
  "eval_samples_per_second": 1.138,
143
  "eval_steps_per_second": 0.569,
144
  "step": 9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  }
146
  ],
147
  "logging_steps": 1,
@@ -161,7 +236,7 @@
161
  "attributes": {}
162
  }
163
  },
164
- "total_flos": 1.169340664455168e+16,
165
  "train_batch_size": 2,
166
  "trial_name": null,
167
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.20689655172413793,
5
  "eval_steps": 1,
6
+ "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
142
  "eval_samples_per_second": 1.138,
143
  "eval_steps_per_second": 0.569,
144
  "step": 9
145
+ },
146
+ {
147
+ "epoch": 0.13793103448275862,
148
+ "grad_norm": 2.79286789894104,
149
+ "learning_rate": 2.272727272727273e-05,
150
+ "loss": 1.5025,
151
+ "step": 10
152
+ },
153
+ {
154
+ "epoch": 0.13793103448275862,
155
+ "eval_loss": 1.3961191177368164,
156
+ "eval_runtime": 18.3446,
157
+ "eval_samples_per_second": 1.09,
158
+ "eval_steps_per_second": 0.545,
159
+ "step": 10
160
+ },
161
+ {
162
+ "epoch": 0.15172413793103448,
163
+ "grad_norm": 2.885422706604004,
164
+ "learning_rate": 2.5e-05,
165
+ "loss": 1.5477,
166
+ "step": 11
167
+ },
168
+ {
169
+ "epoch": 0.15172413793103448,
170
+ "eval_loss": 1.3420469760894775,
171
+ "eval_runtime": 17.7683,
172
+ "eval_samples_per_second": 1.126,
173
+ "eval_steps_per_second": 0.563,
174
+ "step": 11
175
+ },
176
+ {
177
+ "epoch": 0.16551724137931034,
178
+ "grad_norm": 2.7671327590942383,
179
+ "learning_rate": 2.7272727272727273e-05,
180
+ "loss": 1.6921,
181
+ "step": 12
182
+ },
183
+ {
184
+ "epoch": 0.16551724137931034,
185
+ "eval_loss": 1.3071445226669312,
186
+ "eval_runtime": 17.652,
187
+ "eval_samples_per_second": 1.133,
188
+ "eval_steps_per_second": 0.567,
189
+ "step": 12
190
+ },
191
+ {
192
+ "epoch": 0.1793103448275862,
193
+ "grad_norm": 2.9047963619232178,
194
+ "learning_rate": 2.954545454545455e-05,
195
+ "loss": 1.5365,
196
+ "step": 13
197
+ },
198
+ {
199
+ "epoch": 0.1793103448275862,
200
+ "eval_loss": 1.2601890563964844,
201
+ "eval_runtime": 17.5232,
202
+ "eval_samples_per_second": 1.141,
203
+ "eval_steps_per_second": 0.571,
204
+ "step": 13
205
+ },
206
+ {
207
+ "epoch": 0.19310344827586207,
208
+ "grad_norm": 2.6054675579071045,
209
+ "learning_rate": 3.181818181818182e-05,
210
+ "loss": 1.6621,
211
+ "step": 14
212
+ },
213
+ {
214
+ "epoch": 0.19310344827586207,
215
+ "eval_loss": 1.2506535053253174,
216
+ "eval_runtime": 17.6295,
217
+ "eval_samples_per_second": 1.134,
218
+ "eval_steps_per_second": 0.567,
219
+ "step": 14
220
  }
221
  ],
222
  "logging_steps": 1,
 
236
  "attributes": {}
237
  }
238
  },
239
+ "total_flos": 1.8199561861840896e+16,
240
  "train_batch_size": 2,
241
  "trial_name": null,
242
  "trial_params": null