lesso10 commited on
Commit
8e07a4e
·
verified ·
1 Parent(s): 0725bbe

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0c66bb5ff83f2d13fb13e710bdf34e073dbbe0fd37dbf0ea61c41237f2e0180
3
  size 63592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72025eee6c8df79570a58f6dfb4738763a40398724aaf8c956bbea01f3d19d9b
3
  size 63592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02bf14a3972139bbc878d973bf716dbdd40227329bdcd0171ccb3bd7d10e0fa8
3
  size 136814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b7903ceb63f7319d7743707770b8458bbb24c5490fbd6cb08d05c524c733f2d
3
  size 136814
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54ccaa53d2d8ec06c1ee34cc01a5fe264fe9af0525bcf751771ab98a9dbdd66d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1f7e85ec0c7cf94e6539c614b2cdae35e592e6008d0e889a34268e7ffb5ad26
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:447df76721bbd643dda80100b0c96aa0132e69f3bcbf6b3499b1531cc29ef803
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf4d0d730ccc9e2c8ddc5f19d724b476847a4fd05d50494d967b231b2616759
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.04161262512207,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-30",
4
- "epoch": 0.0010082423814685051,
5
  "eval_steps": 5,
6
- "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -273,6 +273,92 @@
273
  "eval_samples_per_second": 114.858,
274
  "eval_steps_per_second": 28.718,
275
  "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  }
277
  ],
278
  "logging_steps": 1,
@@ -301,7 +387,7 @@
301
  "attributes": {}
302
  }
303
  },
304
- "total_flos": 224604979200.0,
305
  "train_batch_size": 4,
306
  "trial_name": null,
307
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.035011291503906,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-40",
4
+ "epoch": 0.00134432317529134,
5
  "eval_steps": 5,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
273
  "eval_samples_per_second": 114.858,
274
  "eval_steps_per_second": 28.718,
275
  "step": 30
276
+ },
277
+ {
278
+ "epoch": 0.0010418504608507885,
279
+ "grad_norm": 1.397843360900879,
280
+ "learning_rate": 3.790390522001662e-05,
281
+ "loss": 44.1668,
282
+ "step": 31
283
+ },
284
+ {
285
+ "epoch": 0.001075458540233072,
286
+ "grad_norm": 1.3839954137802124,
287
+ "learning_rate": 3.4549150281252636e-05,
288
+ "loss": 44.1662,
289
+ "step": 32
290
+ },
291
+ {
292
+ "epoch": 0.0011090666196153555,
293
+ "grad_norm": 1.4163095951080322,
294
+ "learning_rate": 3.12696703292044e-05,
295
+ "loss": 44.1496,
296
+ "step": 33
297
+ },
298
+ {
299
+ "epoch": 0.001142674698997639,
300
+ "grad_norm": 1.4045735597610474,
301
+ "learning_rate": 2.8081442660546125e-05,
302
+ "loss": 44.1685,
303
+ "step": 34
304
+ },
305
+ {
306
+ "epoch": 0.0011762827783799224,
307
+ "grad_norm": 1.3793485164642334,
308
+ "learning_rate": 2.500000000000001e-05,
309
+ "loss": 44.1507,
310
+ "step": 35
311
+ },
312
+ {
313
+ "epoch": 0.0011762827783799224,
314
+ "eval_loss": 11.037788391113281,
315
+ "eval_runtime": 218.4833,
316
+ "eval_samples_per_second": 114.686,
317
+ "eval_steps_per_second": 28.675,
318
+ "step": 35
319
+ },
320
+ {
321
+ "epoch": 0.001209890857762206,
322
+ "grad_norm": 1.4301193952560425,
323
+ "learning_rate": 2.2040354826462668e-05,
324
+ "loss": 44.1489,
325
+ "step": 36
326
+ },
327
+ {
328
+ "epoch": 0.0012434989371444896,
329
+ "grad_norm": 1.376386284828186,
330
+ "learning_rate": 1.9216926233717085e-05,
331
+ "loss": 44.1472,
332
+ "step": 37
333
+ },
334
+ {
335
+ "epoch": 0.001277107016526773,
336
+ "grad_norm": 1.4159599542617798,
337
+ "learning_rate": 1.6543469682057106e-05,
338
+ "loss": 44.159,
339
+ "step": 38
340
+ },
341
+ {
342
+ "epoch": 0.0013107150959090566,
343
+ "grad_norm": 1.3692336082458496,
344
+ "learning_rate": 1.4033009983067452e-05,
345
+ "loss": 44.1495,
346
+ "step": 39
347
+ },
348
+ {
349
+ "epoch": 0.00134432317529134,
350
+ "grad_norm": 1.4065481424331665,
351
+ "learning_rate": 1.1697777844051105e-05,
352
+ "loss": 44.1369,
353
+ "step": 40
354
+ },
355
+ {
356
+ "epoch": 0.00134432317529134,
357
+ "eval_loss": 11.035011291503906,
358
+ "eval_runtime": 218.3178,
359
+ "eval_samples_per_second": 114.773,
360
+ "eval_steps_per_second": 28.697,
361
+ "step": 40
362
  }
363
  ],
364
  "logging_steps": 1,
 
387
  "attributes": {}
388
  }
389
  },
390
+ "total_flos": 299473305600.0,
391
  "train_batch_size": 4,
392
  "trial_name": null,
393
  "trial_params": null