u-hyszk commited on
Commit
1669cf7
1 Parent(s): c339308

Training in progress, step 2000

Browse files
Files changed (5) hide show
  1. config.json +2 -2
  2. log +0 -0
  3. model.safetensors +1 -1
  4. tokenizer.json +3 -2
  5. training_args.bin +2 -2
config.json CHANGED
@@ -18,7 +18,7 @@
18
  "intermediate_size": 1536,
19
  "max_position_embeddings": 1024,
20
  "model_type": "bit_llama",
21
- "n_ctx": 256,
22
  "num_attention_heads": 12,
23
  "num_hidden_layers": 12,
24
  "num_key_value_heads": 4,
@@ -28,7 +28,7 @@
28
  "rope_theta": 10000.0,
29
  "tie_word_embeddings": false,
30
  "torch_dtype": "float32",
31
- "transformers_version": "4.39.3",
32
  "use_cache": true,
33
  "vocab_size": 43176
34
  }
 
18
  "intermediate_size": 1536,
19
  "max_position_embeddings": 1024,
20
  "model_type": "bit_llama",
21
+ "n_ctx": 128,
22
  "num_attention_heads": 12,
23
  "num_hidden_layers": 12,
24
  "num_key_value_heads": 4,
 
28
  "rope_theta": 10000.0,
29
  "tie_word_embeddings": false,
30
  "torch_dtype": "float32",
31
+ "transformers_version": "4.40.1",
32
  "use_cache": true,
33
  "vocab_size": 43176
34
  }
log ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d43436d78c0dc5d7009542bd2754d4bc26c91e4a6c53477327da71c67c90e33
3
  size 510960712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f47b1df051e56f0fef4d183159f5a8a342e2060717bd3df32ad3e197786fccb0
3
  size 510960712
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 256,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 256
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
@@ -148,6 +148,7 @@
148
  "end_of_word_suffix": null,
149
  "fuse_unk": true,
150
  "byte_fallback": true,
 
151
  "vocab": {
152
  "<unk>": 0,
153
  "<s>": 1,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 128,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 128
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
148
  "end_of_word_suffix": null,
149
  "fuse_unk": true,
150
  "byte_fallback": true,
151
+ "ignore_merges": false,
152
  "vocab": {
153
  "<unk>": 0,
154
  "<s>": 1,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9af20991b6fec3e9368be5c3f8160272f41f285a93b72c9efd937bf261412e14
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adebc715473a28003a98e4377f04aa546978c01afad102637a13a54a3fc375bf
3
+ size 4539