lesso10 commited on
Commit
92200e6
·
verified ·
1 Parent(s): adb950e

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72025eee6c8df79570a58f6dfb4738763a40398724aaf8c956bbea01f3d19d9b
3
  size 63592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a13c38a5e8a10c453232fb808b95d43116441c99f250f8c100fda576959b9d2
3
  size 63592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b7903ceb63f7319d7743707770b8458bbb24c5490fbd6cb08d05c524c733f2d
3
  size 136814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28aca89fee032ac1494bc318106ecc362c633271aea450c760905896b0149b4e
3
  size 136814
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1f7e85ec0c7cf94e6539c614b2cdae35e592e6008d0e889a34268e7ffb5ad26
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:593090ac6ddb613a0b67633813c2dfd6652e5eceb859bc48ecee91149eae4ce7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cf4d0d730ccc9e2c8ddc5f19d724b476847a4fd05d50494d967b231b2616759
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.035011291503906,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-40",
4
- "epoch": 0.00134432317529134,
5
  "eval_steps": 5,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -359,6 +359,92 @@
359
  "eval_samples_per_second": 114.773,
360
  "eval_steps_per_second": 28.697,
361
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  }
363
  ],
364
  "logging_steps": 1,
@@ -382,12 +468,12 @@
382
  "should_evaluate": false,
383
  "should_log": false,
384
  "should_save": true,
385
- "should_training_stop": false
386
  },
387
  "attributes": {}
388
  }
389
  },
390
- "total_flos": 299473305600.0,
391
  "train_batch_size": 4,
392
  "trial_name": null,
393
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.03365707397461,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 0.001680403969114175,
5
  "eval_steps": 5,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
359
  "eval_samples_per_second": 114.773,
360
  "eval_steps_per_second": 28.697,
361
  "step": 40
362
+ },
363
+ {
364
+ "epoch": 0.0013779312546736235,
365
+ "grad_norm": 1.4056628942489624,
366
+ "learning_rate": 9.549150281252633e-06,
367
+ "loss": 44.1327,
368
+ "step": 41
369
+ },
370
+ {
371
+ "epoch": 0.0014115393340559071,
372
+ "grad_norm": 1.4018856287002563,
373
+ "learning_rate": 7.597595192178702e-06,
374
+ "loss": 44.1258,
375
+ "step": 42
376
+ },
377
+ {
378
+ "epoch": 0.0014451474134381905,
379
+ "grad_norm": 1.462540864944458,
380
+ "learning_rate": 5.852620357053651e-06,
381
+ "loss": 44.1366,
382
+ "step": 43
383
+ },
384
+ {
385
+ "epoch": 0.001478755492820474,
386
+ "grad_norm": 1.461982011795044,
387
+ "learning_rate": 4.322727117869951e-06,
388
+ "loss": 44.1334,
389
+ "step": 44
390
+ },
391
+ {
392
+ "epoch": 0.0015123635722027575,
393
+ "grad_norm": 1.422272801399231,
394
+ "learning_rate": 3.0153689607045845e-06,
395
+ "loss": 44.1435,
396
+ "step": 45
397
+ },
398
+ {
399
+ "epoch": 0.0015123635722027575,
400
+ "eval_loss": 11.033855438232422,
401
+ "eval_runtime": 217.7428,
402
+ "eval_samples_per_second": 115.076,
403
+ "eval_steps_per_second": 28.772,
404
+ "step": 45
405
+ },
406
+ {
407
+ "epoch": 0.001545971651585041,
408
+ "grad_norm": 1.3762067556381226,
409
+ "learning_rate": 1.9369152030840556e-06,
410
+ "loss": 44.1391,
411
+ "step": 46
412
+ },
413
+ {
414
+ "epoch": 0.0015795797309673244,
415
+ "grad_norm": 1.4046872854232788,
416
+ "learning_rate": 1.0926199633097157e-06,
417
+ "loss": 44.1509,
418
+ "step": 47
419
+ },
420
+ {
421
+ "epoch": 0.001613187810349608,
422
+ "grad_norm": 1.41513192653656,
423
+ "learning_rate": 4.865965629214819e-07,
424
+ "loss": 44.1355,
425
+ "step": 48
426
+ },
427
+ {
428
+ "epoch": 0.0016467958897318916,
429
+ "grad_norm": 1.422555923461914,
430
+ "learning_rate": 1.2179748700879012e-07,
431
+ "loss": 44.121,
432
+ "step": 49
433
+ },
434
+ {
435
+ "epoch": 0.001680403969114175,
436
+ "grad_norm": 1.4306371212005615,
437
+ "learning_rate": 0.0,
438
+ "loss": 44.1407,
439
+ "step": 50
440
+ },
441
+ {
442
+ "epoch": 0.001680403969114175,
443
+ "eval_loss": 11.03365707397461,
444
+ "eval_runtime": 218.4711,
445
+ "eval_samples_per_second": 114.693,
446
+ "eval_steps_per_second": 28.677,
447
+ "step": 50
448
  }
449
  ],
450
  "logging_steps": 1,
 
468
  "should_evaluate": false,
469
  "should_log": false,
470
  "should_save": true,
471
+ "should_training_stop": true
472
  },
473
  "attributes": {}
474
  }
475
  },
476
+ "total_flos": 374341632000.0,
477
  "train_batch_size": 4,
478
  "trial_name": null,
479
  "trial_params": null