pilotj commited on
Commit
e942e5f
·
verified ·
1 Parent(s): d2188fe

Training in progress, step 15000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a51e5c274e6759482218b1a9949b06aaa0084e095225e121f04e074cdee3b5d
3
  size 438032472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31882bb59ace60deee154374a27b2586376c58b53c75d49fe8ef4f59d2735ff5
3
  size 438032472
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:185eb7e0d70dd8683e519433b92665da245b53e81ea0fedb59d5efbc30ffd46e
3
  size 876185978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82e4b1cf021ca896eccee4933476e879cc09f7c7ffd7042ea35f89be59ff4e77
3
  size 876185978
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24fc33a64c66d2359c7f8585bd7346f3fdcff245a3033a9734ec1eeb41a538ee
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f0d6edc6d5302a8346a12d9e6fe0565676124247fff5bdb99ce34f828f59ad4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22aa3bee8cb4a09eb99f41e952afb7bd82c1672b47992ef6737d79805582f3b4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e3306facb62f6002f7c25d4399b0fe1fa4166d62925b311563b72ad9931d0b9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.47014790773391724,
3
  "best_model_checkpoint": "results/checkpoint-14000",
4
- "epoch": 0.964608834486429,
5
  "eval_steps": 500,
6
- "global_step": 14500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -442,6 +442,21 @@
442
  "eval_samples_per_second": 236.924,
443
  "eval_steps_per_second": 3.713,
444
  "step": 14500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  }
446
  ],
447
  "logging_steps": 500,
@@ -461,7 +476,7 @@
461
  "attributes": {}
462
  }
463
  },
464
- "total_flos": 1.22109837017088e+17,
465
  "train_batch_size": 32,
466
  "trial_name": null,
467
  "trial_params": null
 
1
  {
2
  "best_metric": 0.47014790773391724,
3
  "best_model_checkpoint": "results/checkpoint-14000",
4
+ "epoch": 0.9978712080894092,
5
  "eval_steps": 500,
6
+ "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
442
  "eval_samples_per_second": 236.924,
443
  "eval_steps_per_second": 3.713,
444
  "step": 14500
445
+ },
446
+ {
447
+ "epoch": 0.9978712080894092,
448
+ "grad_norm": 5.323192596435547,
449
+ "learning_rate": 3.336881319850985e-05,
450
+ "loss": 0.4782,
451
+ "step": 15000
452
+ },
453
+ {
454
+ "epoch": 0.9978712080894092,
455
+ "eval_loss": 0.47367075085639954,
456
+ "eval_runtime": 44.5141,
457
+ "eval_samples_per_second": 233.634,
458
+ "eval_steps_per_second": 3.662,
459
+ "step": 15000
460
  }
461
  ],
462
  "logging_steps": 500,
 
476
  "attributes": {}
477
  }
478
  },
479
+ "total_flos": 1.2632052105216e+17,
480
  "train_batch_size": 32,
481
  "trial_name": null,
482
  "trial_params": null