awni commited on
Commit
31638ac
·
verified ·
1 Parent(s): 931d100

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +33 -0
  2. config.json +69 -0
  3. model-00001-of-00070.safetensors +3 -0
  4. model-00002-of-00070.safetensors +3 -0
  5. model-00003-of-00070.safetensors +3 -0
  6. model-00004-of-00070.safetensors +3 -0
  7. model-00005-of-00070.safetensors +3 -0
  8. model-00006-of-00070.safetensors +3 -0
  9. model-00007-of-00070.safetensors +3 -0
  10. model-00008-of-00070.safetensors +3 -0
  11. model-00009-of-00070.safetensors +3 -0
  12. model-00010-of-00070.safetensors +3 -0
  13. model-00011-of-00070.safetensors +3 -0
  14. model-00012-of-00070.safetensors +3 -0
  15. model-00013-of-00070.safetensors +3 -0
  16. model-00014-of-00070.safetensors +3 -0
  17. model-00015-of-00070.safetensors +3 -0
  18. model-00016-of-00070.safetensors +3 -0
  19. model-00017-of-00070.safetensors +3 -0
  20. model-00018-of-00070.safetensors +3 -0
  21. model-00019-of-00070.safetensors +3 -0
  22. model-00020-of-00070.safetensors +3 -0
  23. model-00021-of-00070.safetensors +3 -0
  24. model-00022-of-00070.safetensors +3 -0
  25. model-00023-of-00070.safetensors +3 -0
  26. model-00024-of-00070.safetensors +3 -0
  27. model-00025-of-00070.safetensors +3 -0
  28. model-00026-of-00070.safetensors +3 -0
  29. model-00027-of-00070.safetensors +3 -0
  30. model-00028-of-00070.safetensors +3 -0
  31. model-00029-of-00070.safetensors +3 -0
  32. model-00030-of-00070.safetensors +3 -0
  33. model-00031-of-00070.safetensors +3 -0
  34. model-00032-of-00070.safetensors +3 -0
  35. model-00033-of-00070.safetensors +3 -0
  36. model-00034-of-00070.safetensors +3 -0
  37. model-00035-of-00070.safetensors +3 -0
  38. model-00036-of-00070.safetensors +3 -0
  39. model-00037-of-00070.safetensors +3 -0
  40. model-00038-of-00070.safetensors +3 -0
  41. model-00039-of-00070.safetensors +3 -0
  42. model-00040-of-00070.safetensors +3 -0
  43. model-00041-of-00070.safetensors +3 -0
  44. model-00042-of-00070.safetensors +3 -0
  45. model-00043-of-00070.safetensors +3 -0
  46. model-00044-of-00070.safetensors +3 -0
  47. model-00045-of-00070.safetensors +3 -0
  48. model-00046-of-00070.safetensors +3 -0
  49. model-00047-of-00070.safetensors +3 -0
  50. model-00048-of-00070.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: deepseek-ai/DeepSeek-R1
3
+ tags:
4
+ - mlx
5
+ ---
6
+
7
+ # mlx-community/DeepSeek-R1-3bit
8
+
9
+ The Model [mlx-community/DeepSeek-R1-3bit](https://huggingface.co/mlx-community/DeepSeek-R1-3bit) was
10
+ converted to MLX format from [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
11
+ using mlx-lm version **0.21.0**.
12
+
13
+ ## Use with mlx
14
+
15
+ ```bash
16
+ pip install mlx-lm
17
+ ```
18
+
19
+ ```python
20
+ from mlx_lm import load, generate
21
+
22
+ model, tokenizer = load("mlx-community/DeepSeek-R1-3bit")
23
+
24
+ prompt = "hello"
25
+
26
+ if tokenizer.chat_template is not None:
27
+ messages = [{"role": "user", "content": prompt}]
28
+ prompt = tokenizer.apply_chat_template(
29
+ messages, add_generation_prompt=True
30
+ )
31
+
32
+ response = generate(model, tokenizer, prompt=prompt, verbose=True)
33
+ ```
config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "aux_loss_alpha": 0.001,
13
+ "bos_token_id": 0,
14
+ "eos_token_id": 1,
15
+ "ep_size": 1,
16
+ "first_k_dense_replace": 3,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 7168,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 18432,
21
+ "kv_lora_rank": 512,
22
+ "max_position_embeddings": 163840,
23
+ "model_type": "deepseek_v3",
24
+ "moe_intermediate_size": 2048,
25
+ "moe_layer_freq": 1,
26
+ "n_group": 8,
27
+ "n_routed_experts": 256,
28
+ "n_shared_experts": 1,
29
+ "norm_topk_prob": true,
30
+ "num_attention_heads": 128,
31
+ "num_experts_per_tok": 8,
32
+ "num_hidden_layers": 61,
33
+ "num_key_value_heads": 128,
34
+ "num_nextn_predict_layers": 1,
35
+ "pretraining_tp": 1,
36
+ "q_lora_rank": 1536,
37
+ "qk_nope_head_dim": 128,
38
+ "qk_rope_head_dim": 64,
39
+ "quantization": {
40
+ "group_size": 32,
41
+ "bits": 3
42
+ },
43
+ "quantization_config": {
44
+ "group_size": 32,
45
+ "bits": 3
46
+ },
47
+ "rms_norm_eps": 1e-06,
48
+ "rope_scaling": {
49
+ "beta_fast": 32,
50
+ "beta_slow": 1,
51
+ "factor": 40,
52
+ "mscale": 1.0,
53
+ "mscale_all_dim": 1.0,
54
+ "original_max_position_embeddings": 4096,
55
+ "type": "yarn"
56
+ },
57
+ "rope_theta": 10000,
58
+ "routed_scaling_factor": 2.5,
59
+ "scoring_func": "sigmoid",
60
+ "seq_aux": true,
61
+ "tie_word_embeddings": false,
62
+ "topk_group": 4,
63
+ "topk_method": "noaux_tc",
64
+ "torch_dtype": "bfloat16",
65
+ "transformers_version": "4.46.3",
66
+ "use_cache": true,
67
+ "v_head_dim": 128,
68
+ "vocab_size": 129280
69
+ }
model-00001-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdbf59fe5206b7e9d9f8fdfc675a1b167fdd7c4106254f6475f7f336e7e14ce6
3
+ size 5190304316
model-00002-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:465650dfc1c377b7a1ec19172f6126cdee15a03478928b87d2deeed739edc1be
3
+ size 5286663082
model-00003-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:914974aa50437b8cd482517dfad72c1795c34f9dab6374f58997ea042b67d6c9
3
+ size 4347139111
model-00004-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a55ae58e1faa43fb40d747046a9bdd65d8629cf20e2c28ce57e27a490d753aa
3
+ size 5286663080
model-00005-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa5981a4920198bc8590aebbf479d9e1c08d895b50c6af39fc18c9829b5ba370
3
+ size 4347139063
model-00006-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fdfd53c684c61e24b84e510debb7e98259232fd5157e07ea65ba0ef2529825f
3
+ size 5167383441
model-00007-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b7891bbc343b74d1b46f48ea6d6dcfb72efe21cdaa33f397fb298de2b473e6e
3
+ size 4347139109
model-00008-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ab274ffc532ac7ffe100ee7a3593f9d4e1a2e1edb0d87b944381c57b55fe947
3
+ size 5286663084
model-00009-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74b529093d9182ef3691a987f536cc1634172afe1ca62f466e024cf153b9e1ad
3
+ size 4347139131
model-00010-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac831496b0745ad54f38299a6babca0fe81d21763dab1794dc70d6490181c39
3
+ size 5286663069
model-00011-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3814ed7047bc9e75e7fb153bdf667ffd64d5eb38328f38c5183b4278117cd34
3
+ size 4347139059
model-00012-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62bd81a5bc4488ecfa5004ba387a0f6e8c2f4950e7cf946272067526ce59a11
3
+ size 5167383450
model-00013-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b89426287a8ed085c370729ab6f6dd17d9957aeb9000af05b5c0e1ecb18f3b48
3
+ size 4347139129
model-00014-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0799dcc38454e9a4976bb728d77e86c58c3bafbcf29e84f28fe6e9b997078eb5
3
+ size 5286663083
model-00015-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daabc8afaf3096adfedd30454fcfc58ed5fef1a5e9c47a0f0fcc29c5f5776e23
3
+ size 4347139117
model-00016-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91ddc966f98bf32b80840a06f3e5efbccc2cc971568eb03de7213554bcbeeff8
3
+ size 5286663117
model-00017-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:829f4ece499a65e66000190e68a0e4e95e958640f538ed7581bfe8736f7b476b
3
+ size 4347139073
model-00018-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4805117d67eaa00eb07fc7b558947b8e80d87d11fbfb50834525ca4bd1c88990
3
+ size 5167383450
model-00019-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da66c48a19e1c7cb8e1f2b62c46a409d24194a101f47c8046fb1a53084175a22
3
+ size 4347139115
model-00020-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bbd2b12f06187f4e4c91274351cb956ea28f5edc293a5b689b13fbb71727a4f
3
+ size 5286663117
model-00021-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78a4890a3954c9b9a629d01536994eba10a707b7b95dde4435f496d063c44526
3
+ size 4347139147
model-00022-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ca4d366e4d8a262b66c6a1657a403b8814cbe138b61c6c25852aae175f5e912
3
+ size 5286663043
model-00023-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35857c082ce32e171db9a5e4d7b7bfc89806792d69e9d2f10ced9f0a02876c51
3
+ size 4347139039
model-00024-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e84afb71461fd743778da3895431a73a7dd5401292bb06c6c5ca78e41250f3f7
3
+ size 5167383446
model-00025-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:780f1a7f9bdaf05d0b8219326051842c84bbd8cf5bafd4d070c413dd0385b7d9
3
+ size 4347139147
model-00026-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f23be93609b8bce9e74df36b1a1fe8efd7b7ba3d0645419198dc668e5126fb
3
+ size 5286663083
model-00027-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37e0e7f9d9991ba431568822bdf554f171f3221b37cd3c8536e44041afdf98ec
3
+ size 4347139101
model-00028-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f5fd7bbd7582c529045cbdfa883e2c0f528c1861216c8b58ac0236fe2a20351
3
+ size 5286663033
model-00029-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8a138177a425430bedbc507caba9cda5b19affde8cd5bce1a8b8fa73f980435
3
+ size 4347139073
model-00030-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b62e8ee05bc012fd06f1624208cd4ee6e9a917a1a8983ddc2f7436f184e8a7d3
3
+ size 5167383450
model-00031-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01564e1183163276a9c7f18f24c6432f99da140740f213d57214b487af9b2ad3
3
+ size 4347139147
model-00032-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49c1a99b26047598d40998094c723a33b72d04e75d35e3f2504ca5548f8a354b
3
+ size 5286663107
model-00033-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ba7ea2f4298834c63a7a0a4ff4be8e017f82d159ac469dc720ff546962ca564
3
+ size 4347139149
model-00034-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f5f427e8e7a7c43818b907ca2ed8bb2c0b277f5d10fc26055e542d3e4755a9
3
+ size 5286663117
model-00035-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71f2960b68835662ec6013baf8cca4018ce5c6e90e9fbe08518eb233d096bdd9
3
+ size 4347139091
model-00036-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e97cd020533b75a6194fdba94af631f4bfe87b9f8975a0283a3eff7040aa338
3
+ size 5167383448
model-00037-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:719c9d91c0a988331e01b203bcbd689580a3b64cf4077c9cee6fc8fa982f4782
3
+ size 4347139141
model-00038-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:898fc23e751da7d57e08b9ef5e643c15b410eaa4968edd33efb70d0d17281d81
3
+ size 5286663113
model-00039-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a5d7f726ae28a1de7c7866d33333602422a66b48537ebf2e5fdabaf24c99230
3
+ size 4347139109
model-00040-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c0d9a7dee43a88071cca8c3008bd7cc1523c3f4ff71fe34e77522b7d0259f9b
3
+ size 5286663117
model-00041-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a123d07789c54ff6274b8dac615e542d7500f83d33651c8af69004abb9bb3f17
3
+ size 4347139053
model-00042-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec126a5ce754dd526d1e43f29f8ffce1047969298fec73035aea630b7a25927
3
+ size 5167383450
model-00043-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efebbb62131479edb1a1d3f23ab2a6bb7a92961f8ad340e8dffa5636d4efe859
3
+ size 4347139145
model-00044-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c533f1d7262c8fa049c48deba329a662c0ca537c3d12acb01e1dcaf8e9b27410
3
+ size 5286663073
model-00045-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac813fd8c8fe37b91afde5aeddcfd41a30626ebf8633785581ffdc11928fb53c
3
+ size 4347139107
model-00046-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed529eabff86dd71a282f3d1e9799b8b5d7bd0af5436e763d5d3fb8ed913408
3
+ size 5286663117
model-00047-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3e0b9cce4b9b990f144039471be4cb5296110e43db5e2a5f90cd9dd6cae3fab
3
+ size 4347139081
model-00048-of-00070.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc9effcb48a63447505602b51340b3b968b844be1ec23d79333adb0ef13dd94f
3
+ size 5167383450