base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B gate_mode: random dtype: float16 tokenizer_source: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" experts_per_token: 2 experts: - source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B - source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B shared_experts: - source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B positive_prompts: - "" residual_scale: 0.1