YL95 commited on
Commit
0ab8d1a
1 Parent(s): 99266bf

training state at step 20

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.20689655172413793,
5
  "eval_steps": 1,
6
- "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -217,6 +217,81 @@
217
  "eval_samples_per_second": 1.134,
218
  "eval_steps_per_second": 0.567,
219
  "step": 14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  }
221
  ],
222
  "logging_steps": 1,
@@ -236,7 +311,7 @@
236
  "attributes": {}
237
  }
238
  },
239
- "total_flos": 1.8199561861840896e+16,
240
  "train_batch_size": 2,
241
  "trial_name": null,
242
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.27586206896551724,
5
  "eval_steps": 1,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
217
  "eval_samples_per_second": 1.134,
218
  "eval_steps_per_second": 0.567,
219
  "step": 14
220
+ },
221
+ {
222
+ "epoch": 0.20689655172413793,
223
+ "grad_norm": 2.538036823272705,
224
+ "learning_rate": 3.409090909090909e-05,
225
+ "loss": 1.6763,
226
+ "step": 15
227
+ },
228
+ {
229
+ "epoch": 0.20689655172413793,
230
+ "eval_loss": 1.2366451025009155,
231
+ "eval_runtime": 18.6172,
232
+ "eval_samples_per_second": 1.074,
233
+ "eval_steps_per_second": 0.537,
234
+ "step": 15
235
+ },
236
+ {
237
+ "epoch": 0.2206896551724138,
238
+ "grad_norm": 2.5125789642333984,
239
+ "learning_rate": 3.6363636363636364e-05,
240
+ "loss": 1.668,
241
+ "step": 16
242
+ },
243
+ {
244
+ "epoch": 0.2206896551724138,
245
+ "eval_loss": 1.2205184698104858,
246
+ "eval_runtime": 17.7529,
247
+ "eval_samples_per_second": 1.127,
248
+ "eval_steps_per_second": 0.563,
249
+ "step": 16
250
+ },
251
+ {
252
+ "epoch": 0.23448275862068965,
253
+ "grad_norm": 5.055665969848633,
254
+ "learning_rate": 3.8636363636363636e-05,
255
+ "loss": 1.5703,
256
+ "step": 17
257
+ },
258
+ {
259
+ "epoch": 0.23448275862068965,
260
+ "eval_loss": 1.167407751083374,
261
+ "eval_runtime": 17.5902,
262
+ "eval_samples_per_second": 1.137,
263
+ "eval_steps_per_second": 0.568,
264
+ "step": 17
265
+ },
266
+ {
267
+ "epoch": 0.2482758620689655,
268
+ "grad_norm": 2.567411422729492,
269
+ "learning_rate": 4.0909090909090915e-05,
270
+ "loss": 1.4859,
271
+ "step": 18
272
+ },
273
+ {
274
+ "epoch": 0.2482758620689655,
275
+ "eval_loss": 1.1367636919021606,
276
+ "eval_runtime": 17.4832,
277
+ "eval_samples_per_second": 1.144,
278
+ "eval_steps_per_second": 0.572,
279
+ "step": 18
280
+ },
281
+ {
282
+ "epoch": 0.2620689655172414,
283
+ "grad_norm": 2.3214948177337646,
284
+ "learning_rate": 4.318181818181819e-05,
285
+ "loss": 1.4511,
286
+ "step": 19
287
+ },
288
+ {
289
+ "epoch": 0.2620689655172414,
290
+ "eval_loss": 1.1296402215957642,
291
+ "eval_runtime": 17.6655,
292
+ "eval_samples_per_second": 1.132,
293
+ "eval_steps_per_second": 0.566,
294
+ "step": 19
295
  }
296
  ],
297
  "logging_steps": 1,
 
311
  "attributes": {}
312
  }
313
  },
314
+ "total_flos": 2.4706263096016896e+16,
315
  "train_batch_size": 2,
316
  "trial_name": null,
317
  "trial_params": null