Add model safetensor files
Browse files- .gitattributes +1 -0
- README.md +44 -3
- config.json +3 -0
- mergekit_config.yml +14 -0
- model-00001-of-00004.safetensors +3 -0
- model-00002-of-00004.safetensors +3 -0
- model-00003-of-00004.safetensors +3 -0
- model-00004-of-00004.safetensors +3 -0
- model.safetensors.index.json +3 -0
- special_tokens_map.json +3 -0
- tokenizer.json +3 -0
- tokenizer_config.json +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,3 +1,44 @@
|
|
1 |
-
---
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: []
|
3 |
+
library_name: transformers
|
4 |
+
tags:
|
5 |
+
- mergekit
|
6 |
+
- merge
|
7 |
+
|
8 |
+
---
|
9 |
+
# Llama3.1-SuperDeepFuse
|
10 |
+
|
11 |
+
An 8B parameter language model that merges three high-performance distilled models to boost reasoning, instruction-following, and performance in mathematics and coding.
|
12 |
+
|
13 |
+
## Model Highlights
|
14 |
+
|
15 |
+
- **Size**: 8 billion parameters
|
16 |
+
- **Base**: [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct)
|
17 |
+
- **Merged Sources**:
|
18 |
+
- [arcee-ai/Llama-3.1-**Super**Nova-Lite](https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite)
|
19 |
+
- [deepseek-ai/**Deep**Seek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B)
|
20 |
+
- [FuseAI/**Fuse**Chat-Llama-3.1-8B-Instruct](https://huggingface.co/FuseAI/FuseChat-Llama-3.1-8B-Instruct)
|
21 |
+
- **Merge Method**: `model_stock`
|
22 |
+
|
23 |
+
## Key Capabilities
|
24 |
+
|
25 |
+
- Enhanced multi-task reasoning
|
26 |
+
- Improved mathematical and coding performance
|
27 |
+
- Multilingual support
|
28 |
+
|
29 |
+
## Performance Notes
|
30 |
+
|
31 |
+
- Maintains Llama 3.1 safety standards
|
32 |
+
- Suitable for consumer GPU deployment
|
33 |
+
- Balanced performance across diverse tasks
|
34 |
+
|
35 |
+
## Considerations
|
36 |
+
|
37 |
+
- Still being benchmarked
|
38 |
+
- Capabilities limited compared to larger model variants
|
39 |
+
- Can give misleading output like all other language models
|
40 |
+
- Outputs should be independently verified
|
41 |
+
|
42 |
+
## Licensing
|
43 |
+
|
44 |
+
Follows standard Llama 3.1 usage terms.
|
config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed89e3fc32eac85f28a592e4ea392e7f8e81f1db8ca89ee3b093ca1d9857db95
|
3 |
+
size 931
|
mergekit_config.yml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
merge_method: model_stock
|
2 |
+
models:
|
3 |
+
- model: /drive2/FuseChat-Llama-3.1-8B-Instruct
|
4 |
+
parameters:
|
5 |
+
weight: 1.0
|
6 |
+
- model: /drive2/DeepSeek-R1-Distill-Llama-8B
|
7 |
+
parameters:
|
8 |
+
weight: 1.0
|
9 |
+
- model: /drive2/Llama-3.1-SuperNova-Lite
|
10 |
+
parameters:
|
11 |
+
weight: 1.0
|
12 |
+
base_model: /drive2/Meta-Llama-3.1-8B-Instruct
|
13 |
+
dtype: bfloat16
|
14 |
+
normalize: true
|
model-00001-of-00004.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d585ff01d81176a5efe4822df85be28ad08ae072bdc044f55279151b8be37cc7
|
3 |
+
size 4953586384
|
model-00002-of-00004.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2253d548cc8445cdf0f78377abd75d5e293db7e41aae4a3e81b2467915cd4a8a
|
3 |
+
size 4999819336
|
model-00003-of-00004.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:867dac8f52c69645a5bd25a3f5b59eba7195e38c3fa4a81ad5b929c915796c65
|
3 |
+
size 4915916144
|
model-00004-of-00004.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a7f1935861baf6a2142eb008c7686711f8284d360f90e4fff5783aee8649e1b
|
3 |
+
size 1191234472
|
model.safetensors.index.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97e68b2850e908ed39cb4bec9519ff54552681b62dc3f1677f42a3f642536124
|
3 |
+
size 22798
|
special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f38c73729248f6c127296386e3cdde96e254636cc58b4169d3fd32328d9a8ec
|
3 |
+
size 296
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79e3e522635f3171300913bb421464a87de6222182a0570b9b2ccba2a964b2b4
|
3 |
+
size 9085657
|
tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24e8a6dc2547164b7002e3125f10b415105644fcf02bf9ad8b674c87b1eaaed6
|
3 |
+
size 50870
|