DevilishDaoSaint commited on
Commit
cd5e90d
·
verified ·
1 Parent(s): 4732d8c

Trained with Unsloth

Browse files
config.json CHANGED
@@ -1,3 +1,30 @@
1
  {
2
- "model_type": "mistral"
3
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
2
+ "_name_or_path": "unsloth/OpenHermes-2.5-Mistral-7B-bnb-4bit",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 32000,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 14336,
14
+ "max_position_embeddings": 32768,
15
+ "model_type": "mistral",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 32,
18
+ "num_key_value_heads": 8,
19
+ "pad_token_id": 0,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "sliding_window": 4096,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.43.3",
27
+ "unsloth_version": "2024.8",
28
+ "use_cache": false,
29
+ "vocab_size": 32002
30
+ }
generation_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
- "eos_token_id": 2,
5
- "pad_token_id": 0,
6
  "transformers_version": "4.43.3"
7
  }
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
+ "eos_token_id": 32000,
 
5
  "transformers_version": "4.43.3"
6
  }
pytorch_model-00001-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c89bd0ce8cf1c1995056e670ef108910ef70bec7eed0d7a9f429f5016140a9d6
3
+ size 4943202016
pytorch_model-00002-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a51f4a7ea4d15bcd7b0515e31494479b7d625d09abe16199d3e00ea71ca0ee71
3
+ size 4999844296
pytorch_model-00003-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a82b6eb9627f1d45f38bf834d6fa563f3d4c4ddf527cb2825d3da2506d375a7e
3
+ size 4540552838
pytorch_model.bin.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 12122071040
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "pytorch_model-00003-of-00003.bin",
@@ -23,33 +23,33 @@
23
  "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
24
  "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
25
  "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
26
- "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
27
- "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
28
  "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
29
  "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
30
- "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
31
  "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
32
  "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
33
  "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
34
  "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
35
- "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
36
- "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
37
- "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
38
- "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
39
- "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
40
- "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
41
- "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
42
- "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
43
- "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
44
  "model.layers.12.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
45
  "model.layers.12.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
46
- "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
47
- "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
48
  "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
49
- "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
50
- "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
51
- "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
52
- "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
53
  "model.layers.13.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
54
  "model.layers.13.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
55
  "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -140,60 +140,60 @@
140
  "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
141
  "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
142
  "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
143
- "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
144
- "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
145
- "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
146
- "model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
147
- "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
148
  "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
149
  "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
150
  "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
151
  "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
152
- "model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
153
- "model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
154
- "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
155
- "model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
156
- "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
157
- "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
158
- "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
159
- "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
160
- "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
161
- "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
162
- "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
163
- "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
164
- "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
165
- "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
166
- "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
167
- "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
168
- "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
169
- "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
170
- "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
171
- "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
172
- "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
173
- "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
174
- "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
175
- "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
176
- "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
177
- "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
178
- "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
179
- "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
180
- "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
181
- "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
182
- "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
183
- "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
184
- "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
185
- "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
186
- "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
187
- "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
188
  "model.layers.27.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
189
  "model.layers.27.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
190
  "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
191
  "model.layers.27.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
192
  "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
193
- "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
194
  "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
195
- "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
196
- "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
197
  "model.layers.28.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
198
  "model.layers.28.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
199
  "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
 
1
  {
2
  "metadata": {
3
+ "total_size": 14483496960
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "pytorch_model-00003-of-00003.bin",
 
23
  "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
24
  "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
25
  "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
26
+ "model.layers.10.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
27
+ "model.layers.10.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
28
  "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
29
  "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
30
+ "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
31
  "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
32
  "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
33
  "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
34
  "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
35
+ "model.layers.11.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
36
+ "model.layers.11.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
37
+ "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
38
+ "model.layers.11.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
39
+ "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
40
+ "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
41
+ "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
42
+ "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
43
+ "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
44
  "model.layers.12.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
45
  "model.layers.12.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
46
+ "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
47
+ "model.layers.12.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
48
  "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
49
+ "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
50
+ "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
51
+ "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
52
+ "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
53
  "model.layers.13.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
54
  "model.layers.13.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
55
  "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
 
140
  "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
141
  "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
142
  "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
143
+ "model.layers.22.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
144
+ "model.layers.22.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
145
+ "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
146
+ "model.layers.22.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
147
+ "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
148
  "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
149
  "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
150
  "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
151
  "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
152
+ "model.layers.23.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
153
+ "model.layers.23.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
154
+ "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
155
+ "model.layers.23.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
156
+ "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
157
+ "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
158
+ "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
159
+ "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
160
+ "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
161
+ "model.layers.24.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
162
+ "model.layers.24.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
163
+ "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
164
+ "model.layers.24.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
165
+ "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
166
+ "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
167
+ "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
168
+ "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
169
+ "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
170
+ "model.layers.25.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
171
+ "model.layers.25.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
172
+ "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
173
+ "model.layers.25.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
174
+ "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
175
+ "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
176
+ "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
177
+ "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
178
+ "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
179
+ "model.layers.26.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
180
+ "model.layers.26.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
181
+ "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
182
+ "model.layers.26.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
183
+ "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
184
+ "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
185
+ "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
186
+ "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
187
+ "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
188
  "model.layers.27.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
189
  "model.layers.27.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
190
  "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
191
  "model.layers.27.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
192
  "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
193
+ "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
194
  "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
195
+ "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
196
+ "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
197
  "model.layers.28.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
198
  "model.layers.28.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
199
  "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",