Rakhman16 commited on
Commit
1d6e517
·
verified ·
1 Parent(s): 02c5d28

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb86b7f70b45053cb6d55ae4376642fa03d54495b5c27e78b0cd2d46a00b5c0b
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8edb6bc652ecf8d8b92148bf158d74d215cf39e93698c3cc1dc292a00fa77c20
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d232a9a6c756331696e700546387d842e2067cae42866a06dd49ad2965fbaa1e
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afd60e4bb235de62341215451cdba1253c241ce49458a0fd6fdfa8847c6d9c92
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b1952f58fa52e592468a775e85ac7e72d5b2b6d4cdb2766a2b20ab5e3f11899
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a29ed82ba21af44a36d8e59405dc8d62a827ba1626cee944f74dd401606bce37
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e999a568b2d0f01998dabb86d34bac49ae5618451719e3426ca61159d87ee5c4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b8cd62b06b1b0d4df8a23768d02bad43aa10b569b79d6231c3ee6daaff17e07
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.3063213527202606,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-1000",
4
- "epoch": 1.6625103906899419,
5
  "eval_steps": 100,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -227,6 +227,116 @@
227
  "eval_samples_per_second": 12.901,
228
  "eval_steps_per_second": 1.619,
229
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  }
231
  ],
232
  "logging_steps": 50,
@@ -246,7 +356,7 @@
246
  "attributes": {}
247
  }
248
  },
249
- "total_flos": 9742717291069440.0,
250
  "train_batch_size": 8,
251
  "trial_name": null,
252
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.29390034079551697,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-1500",
4
+ "epoch": 2.493765586034913,
5
  "eval_steps": 100,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
227
  "eval_samples_per_second": 12.901,
228
  "eval_steps_per_second": 1.619,
229
  "step": 1000
230
+ },
231
+ {
232
+ "epoch": 1.745635910224439,
233
+ "grad_norm": 57559.86328125,
234
+ "learning_rate": 2.816139767054909e-05,
235
+ "loss": 0.3649,
236
+ "step": 1050
237
+ },
238
+ {
239
+ "epoch": 1.828761429758936,
240
+ "grad_norm": 54892.80078125,
241
+ "learning_rate": 2.7121464226289517e-05,
242
+ "loss": 0.3518,
243
+ "step": 1100
244
+ },
245
+ {
246
+ "epoch": 1.828761429758936,
247
+ "eval_loss": 0.30387237668037415,
248
+ "eval_runtime": 38.3202,
249
+ "eval_samples_per_second": 12.891,
250
+ "eval_steps_per_second": 1.618,
251
+ "step": 1100
252
+ },
253
+ {
254
+ "epoch": 1.9118869492934332,
255
+ "grad_norm": 56018.88671875,
256
+ "learning_rate": 2.608153078202995e-05,
257
+ "loss": 0.3515,
258
+ "step": 1150
259
+ },
260
+ {
261
+ "epoch": 1.9950124688279303,
262
+ "grad_norm": 46875.38671875,
263
+ "learning_rate": 2.5041597337770382e-05,
264
+ "loss": 0.3459,
265
+ "step": 1200
266
+ },
267
+ {
268
+ "epoch": 1.9950124688279303,
269
+ "eval_loss": 0.29989051818847656,
270
+ "eval_runtime": 38.3688,
271
+ "eval_samples_per_second": 12.875,
272
+ "eval_steps_per_second": 1.616,
273
+ "step": 1200
274
+ },
275
+ {
276
+ "epoch": 2.0781379883624274,
277
+ "grad_norm": 46399.90234375,
278
+ "learning_rate": 2.4001663893510817e-05,
279
+ "loss": 0.3293,
280
+ "step": 1250
281
+ },
282
+ {
283
+ "epoch": 2.1612635078969245,
284
+ "grad_norm": 57348.53515625,
285
+ "learning_rate": 2.296173044925125e-05,
286
+ "loss": 0.3387,
287
+ "step": 1300
288
+ },
289
+ {
290
+ "epoch": 2.1612635078969245,
291
+ "eval_loss": 0.29858091473579407,
292
+ "eval_runtime": 38.3821,
293
+ "eval_samples_per_second": 12.871,
294
+ "eval_steps_per_second": 1.615,
295
+ "step": 1300
296
+ },
297
+ {
298
+ "epoch": 2.2443890274314215,
299
+ "grad_norm": 32873.6796875,
300
+ "learning_rate": 2.1921797004991683e-05,
301
+ "loss": 0.3366,
302
+ "step": 1350
303
+ },
304
+ {
305
+ "epoch": 2.3275145469659186,
306
+ "grad_norm": 47216.03125,
307
+ "learning_rate": 2.0881863560732114e-05,
308
+ "loss": 0.3528,
309
+ "step": 1400
310
+ },
311
+ {
312
+ "epoch": 2.3275145469659186,
313
+ "eval_loss": 0.29636165499687195,
314
+ "eval_runtime": 38.2663,
315
+ "eval_samples_per_second": 12.91,
316
+ "eval_steps_per_second": 1.62,
317
+ "step": 1400
318
+ },
319
+ {
320
+ "epoch": 2.4106400665004157,
321
+ "grad_norm": 52153.40234375,
322
+ "learning_rate": 1.9841930116472545e-05,
323
+ "loss": 0.3379,
324
+ "step": 1450
325
+ },
326
+ {
327
+ "epoch": 2.493765586034913,
328
+ "grad_norm": 57533.625,
329
+ "learning_rate": 1.880199667221298e-05,
330
+ "loss": 0.3175,
331
+ "step": 1500
332
+ },
333
+ {
334
+ "epoch": 2.493765586034913,
335
+ "eval_loss": 0.29390034079551697,
336
+ "eval_runtime": 38.2441,
337
+ "eval_samples_per_second": 12.917,
338
+ "eval_steps_per_second": 1.621,
339
+ "step": 1500
340
  }
341
  ],
342
  "logging_steps": 50,
 
356
  "attributes": {}
357
  }
358
  },
359
+ "total_flos": 1.461377145765888e+16,
360
  "train_batch_size": 8,
361
  "trial_name": null,
362
  "trial_params": null