Training in progress, step 2000

Files changed (5) hide show

config.json CHANGED Viewed

@@ -18,7 +18,7 @@
   "intermediate_size": 1536,
   "max_position_embeddings": 1024,
   "model_type": "bit_llama",
-  "n_ctx": 256,
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "num_key_value_heads": 4,
@@ -28,7 +28,7 @@
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.39.3",
   "use_cache": true,
   "vocab_size": 43176
 }

   "intermediate_size": 1536,
   "max_position_embeddings": 1024,
   "model_type": "bit_llama",
+  "n_ctx": 128,
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "num_key_value_heads": 4,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
   "use_cache": true,
   "vocab_size": 43176
 }

log ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d43436d78c0dc5d7009542bd2754d4bc26c91e4a6c53477327da71c67c90e33
 size 510960712

 version https://git-lfs.github.com/spec/v1
+oid sha256:f47b1df051e56f0fef4d183159f5a8a342e2060717bd3df32ad3e197786fccb0
 size 510960712

tokenizer.json CHANGED Viewed

@@ -2,13 +2,13 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 256,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
     "strategy": {
-      "Fixed": 256
     },
     "direction": "Right",
     "pad_to_multiple_of": null,
@@ -148,6 +148,7 @@
     "end_of_word_suffix": null,
     "fuse_unk": true,
     "byte_fallback": true,
     "vocab": {
       "<unk>": 0,
       "<s>": 1,

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 128,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
     "strategy": {
+      "Fixed": 128
     },
     "direction": "Right",
     "pad_to_multiple_of": null,
     "end_of_word_suffix": null,
     "fuse_unk": true,
     "byte_fallback": true,
+    "ignore_merges": false,
     "vocab": {
       "<unk>": 0,
       "<s>": 1,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9af20991b6fec3e9368be5c3f8160272f41f285a93b72c9efd937bf261412e14
-size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:adebc715473a28003a98e4377f04aa546978c01afad102637a13a54a3fc375bf
+size 4539