Training in progress, step 500, checkpoint

Browse files

Files changed (9) hide show

last-checkpoint/config.json +1 -1
last-checkpoint/model.safetensors +2 -2
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/tokenizer.json +0 -0
last-checkpoint/trainer_state.json +5 -47
last-checkpoint/training_args.bin +1 -1
last-checkpoint/vocab.txt +2 -2

last-checkpoint/config.json CHANGED Viewed

@@ -27,5 +27,5 @@
   "transformers_version": "4.41.2",
   "type_vocab_size": 2,
   "use_cache": true,
-  "vocab_size": 176010
 }

   "transformers_version": "4.41.2",
   "type_vocab_size": 2,
   "use_cache": true,
+  "vocab_size": 175991
 }

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a202bd5af148822e1e3e07b2eba74358cbcb64475af47c8f95c2551a78d63805
-size 885602000

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c360d853b22122494c741e378234f5dcb8fff8c95b65b8be9dd20c6904ec60a
+size 885543556

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9dc68bf25f63fc2820f2b6b3cf5ae7618b2dac90481a0e9aa1c0b4635646784e
-size 1771325498

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d9a70b47037089b1fc83a29f0fccb1d23b2e2cef538317eedcb35b38b9e0f2c
+size 1771208634

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a00a5d457468f8e96ffffdc8c00d28024f7b75aca00e6b753bc84869b865ea3e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:78ff5bd3a0068b490c864320566bda7f5fdd2275d25e879fb1f4de3f45b64f79
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9cbdad3847a2cf371eac01931a8217d0a1f823a0f4700aaf5a80943ae3b42e38
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4ec6cc9c7b2f046f413e3061298f47cb8246946a0ef13a247b050ceda37b8fd
 size 1064

last-checkpoint/tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,61 +1,19 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.050897985893986766,
   "eval_steps": 500,
-  "global_step": 3500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.00727114084199811,
-      "grad_norm": 16.903057098388672,
       "learning_rate": 1.9951525727720014e-05,
-      "loss": 7.7051,
       "step": 500
-    },
-    {
-      "epoch": 0.01454228168399622,
-      "grad_norm": 13.898771286010742,
-      "learning_rate": 1.990305145544003e-05,
-      "loss": 6.7587,
-      "step": 1000
-    },
-    {
-      "epoch": 0.021813422525994327,
-      "grad_norm": 15.041173934936523,
-      "learning_rate": 1.985457718316004e-05,
-      "loss": 6.6744,
-      "step": 1500
-    },
-    {
-      "epoch": 0.02908456336799244,
-      "grad_norm": 16.15885353088379,
-      "learning_rate": 1.9806102910880053e-05,
-      "loss": 6.5784,
-      "step": 2000
-    },
-    {
-      "epoch": 0.03635570420999055,
-      "grad_norm": 10.643068313598633,
-      "learning_rate": 1.9757628638600062e-05,
-      "loss": 6.3891,
-      "step": 2500
-    },
-    {
-      "epoch": 0.043626845051988654,
-      "grad_norm": 24.84389305114746,
-      "learning_rate": 1.9709154366320078e-05,
-      "loss": 6.3109,
-      "step": 3000
-    },
-    {
-      "epoch": 0.050897985893986766,
-      "grad_norm": 19.950510025024414,
-      "learning_rate": 1.966068009404009e-05,
-      "loss": 6.2184,
-      "step": 3500
     }
   ],
   "logging_steps": 500,
@@ -75,7 +33,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 752705153984160.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.00727114084199811,
   "eval_steps": 500,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.00727114084199811,
+      "grad_norm": 25.571533203125,
       "learning_rate": 1.9951525727720014e-05,
+      "loss": 7.7581,
       "step": 500
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 113609162581536.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:506a91072dc9c192fc9340a397644c79df2652fd40a31232d3381786d2600a6a
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d95f25e1de35f62e4e204ab538306ecef4ab283e65af475b969cd70e1e4be3e
 size 5176

last-checkpoint/vocab.txt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8eb1a27fddb0f62179356fa7a776bac17897f7b576556dcb7e9e86a79e750698
-size 4759934

 version https://git-lfs.github.com/spec/v1
+oid sha256:e054944723e90105c2050b9d8316d91afe1bf990a0839482c14bf732eb3edc06
+size 4759663