Naozumi0512
commited on
Upload 7 files
Browse files- LICENSE +21 -0
- README.md +21 -1
- adapter_config.json +38 -0
- adapter_model.safetensors +3 -0
- config.json +27 -0
- tokenizer.json +0 -0
- tokenizer_config.json +35 -0
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 DeepSeek
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,3 +1,23 @@
|
|
1 |
---
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
base_model: Qwen/Qwen2.5-32B
|
3 |
+
library_name: transformers
|
4 |
+
tags:
|
5 |
+
- mergekit
|
6 |
+
- peft
|
7 |
+
|
8 |
---
|
9 |
+
# DeepSeek-R1-Distill-Qwen-32B-lora-r64
|
10 |
+
|
11 |
+
This is a LoRA extracted from language model [DeepSeek-R1-Distill-Qwen-32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B). It was extracted using [mergekit](https://github.com/arcee-ai/mergekit).
|
12 |
+
|
13 |
+
## LoRA Details
|
14 |
+
|
15 |
+
This LoRA adapter was extracted from [DeepSeek-R1-Distill-Qwen-32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) and uses [Qwen2.5-32B](https://huggingface.co/Qwen/Qwen2.5-32B) as a base.
|
16 |
+
|
17 |
+
### Parameters
|
18 |
+
|
19 |
+
The following command was used to extract this LoRA adapter:
|
20 |
+
|
21 |
+
```sh
|
22 |
+
mergekit-extract-lora DeepSeek-R1-Distill-Qwen-32B Qwen2.5-32B OUTPUT_PATH --no-lazy-unpickle --rank=64
|
23 |
+
```
|
adapter_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "Qwen2.5-32B",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layers_pattern": null,
|
10 |
+
"layers_to_transform": null,
|
11 |
+
"loftq_config": {},
|
12 |
+
"lora_alpha": 64,
|
13 |
+
"lora_dropout": 0,
|
14 |
+
"megatron_config": null,
|
15 |
+
"megatron_core": "megatron.core",
|
16 |
+
"modules_to_save": [
|
17 |
+
"input_layernorm",
|
18 |
+
"norm",
|
19 |
+
"post_attention_layernorm"
|
20 |
+
],
|
21 |
+
"peft_type": "LORA",
|
22 |
+
"r": 64,
|
23 |
+
"rank_pattern": {},
|
24 |
+
"revision": null,
|
25 |
+
"target_modules": [
|
26 |
+
"gate_proj",
|
27 |
+
"v_proj",
|
28 |
+
"embed_tokens",
|
29 |
+
"up_proj",
|
30 |
+
"k_proj",
|
31 |
+
"q_proj",
|
32 |
+
"lm_head",
|
33 |
+
"o_proj",
|
34 |
+
"down_proj"
|
35 |
+
],
|
36 |
+
"task_type": "CAUSAL_LM",
|
37 |
+
"use_rslora": false
|
38 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc822e089a4a917c93e5a08aca63466381603dda3d6f25bc83e575076c26dc91
|
3 |
+
size 1115441688
|
config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"Qwen2ForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_dropout": 0.0,
|
6 |
+
"bos_token_id": 151643,
|
7 |
+
"eos_token_id": 151643,
|
8 |
+
"hidden_act": "silu",
|
9 |
+
"hidden_size": 5120,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 27648,
|
12 |
+
"max_position_embeddings": 131072,
|
13 |
+
"max_window_layers": 64,
|
14 |
+
"model_type": "qwen2",
|
15 |
+
"num_attention_heads": 40,
|
16 |
+
"num_hidden_layers": 64,
|
17 |
+
"num_key_value_heads": 8,
|
18 |
+
"rms_norm_eps": 1e-05,
|
19 |
+
"rope_theta": 1000000.0,
|
20 |
+
"sliding_window": 131072,
|
21 |
+
"tie_word_embeddings": false,
|
22 |
+
"torch_dtype": "bfloat16",
|
23 |
+
"transformers_version": "4.43.1",
|
24 |
+
"use_cache": true,
|
25 |
+
"use_sliding_window": false,
|
26 |
+
"vocab_size": 152064
|
27 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"bos_token": {
|
5 |
+
"__type": "AddedToken",
|
6 |
+
"content": "<|begin▁of▁sentence|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"clean_up_tokenization_spaces": false,
|
13 |
+
"eos_token": {
|
14 |
+
"__type": "AddedToken",
|
15 |
+
"content": "<|end▁of▁sentence|>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": true,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false
|
20 |
+
},
|
21 |
+
"legacy": true,
|
22 |
+
"model_max_length": 16384,
|
23 |
+
"pad_token": {
|
24 |
+
"__type": "AddedToken",
|
25 |
+
"content": "<|end▁of▁sentence|>",
|
26 |
+
"lstrip": false,
|
27 |
+
"normalized": true,
|
28 |
+
"rstrip": false,
|
29 |
+
"single_word": false
|
30 |
+
},
|
31 |
+
"sp_model_kwargs": {},
|
32 |
+
"unk_token": null,
|
33 |
+
"tokenizer_class": "LlamaTokenizerFast",
|
34 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}"
|
35 |
+
}
|