AndreaUnibo commited on
Commit
a2a8827
·
verified ·
1 Parent(s): be36677

Upload JetMoEForCausalLM

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "AndreaUnibo/JetMoE_rank_infill_full_trained_2",
3
  "activation_function": "silu",
4
  "architectures": [
5
  "JetMoEForCausalLM"
 
1
  {
2
+ "_name_or_path": "rank_infill_base",
3
  "activation_function": "silu",
4
  "architectures": [
5
  "JetMoEForCausalLM"
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:295b25ea62fbd6373b9dc10e621a84ae48143e533eff8b2a980a9493bde2b323
3
- size 4978881230
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0700e37ab4889137484024928cb177b4d0e8e0bf06d7c12a21de81d46c226e7
3
+ size 4978881232
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94d5d1e1f709f904da9762d61a336a4cbbd74d384f9f85984d2c67772c4f942
3
- size 4984201481
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2e63469847c7a242bd4761e3304fa7488051cdaa53e40605abd37becbdc16b9
3
+ size 4984201479
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef6e6de60508c607dfe93703bd52661284a1050802e7862074b8577a2fd286e8
3
- size 4845635182
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3a3adc8dce89f083edc6eb35611c8449603b5073903ca9c4f1eeef5e9f0c6a0
3
+ size 4845635181
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37824c4cbe71d1b46db07a5e18fc7f0913e2941e00475d5147b62088c0437d6f
3
- size 1940295882
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb39d7d95179313144aedefee5e981090bf6dabf7ec6731dd23fe66dafbbdd6
3
+ size 1940295881
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 16748934199
4
  },
5
  "weight_map": {
6
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
@@ -14,12 +14,6 @@
14
  "model.layers.0.mlp.router.layer.weight.nested_quant_map": "model-00001-of-00004.safetensors",
15
  "model.layers.0.mlp.router.layer.weight.quant_map": "model-00001-of-00004.safetensors",
16
  "model.layers.0.mlp.router.layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
17
- "model.layers.0.mlp.router.perturbation.weight": "model-00001-of-00004.safetensors",
18
- "model.layers.0.mlp.router.perturbation.weight.absmax": "model-00001-of-00004.safetensors",
19
- "model.layers.0.mlp.router.perturbation.weight.nested_absmax": "model-00001-of-00004.safetensors",
20
- "model.layers.0.mlp.router.perturbation.weight.nested_quant_map": "model-00001-of-00004.safetensors",
21
- "model.layers.0.mlp.router.perturbation.weight.quant_map": "model-00001-of-00004.safetensors",
22
- "model.layers.0.mlp.router.perturbation.weight.quant_state.bitsandbytes__fp4": "model-00001-of-00004.safetensors",
23
  "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
24
  "model.layers.0.self_attention.experts.bias": "model-00001-of-00004.safetensors",
25
  "model.layers.0.self_attention.experts.input_linear.weight": "model-00001-of-00004.safetensors",
@@ -46,6 +40,12 @@
46
  "model.layers.1.mlp.router.layer.weight.nested_quant_map": "model-00001-of-00004.safetensors",
47
  "model.layers.1.mlp.router.layer.weight.quant_map": "model-00001-of-00004.safetensors",
48
  "model.layers.1.mlp.router.layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
 
 
 
 
 
 
49
  "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
50
  "model.layers.1.self_attention.experts.bias": "model-00001-of-00004.safetensors",
51
  "model.layers.1.self_attention.experts.input_linear.weight": "model-00001-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 16748934197
4
  },
5
  "weight_map": {
6
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
 
14
  "model.layers.0.mlp.router.layer.weight.nested_quant_map": "model-00001-of-00004.safetensors",
15
  "model.layers.0.mlp.router.layer.weight.quant_map": "model-00001-of-00004.safetensors",
16
  "model.layers.0.mlp.router.layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
 
 
 
 
 
 
17
  "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
18
  "model.layers.0.self_attention.experts.bias": "model-00001-of-00004.safetensors",
19
  "model.layers.0.self_attention.experts.input_linear.weight": "model-00001-of-00004.safetensors",
 
40
  "model.layers.1.mlp.router.layer.weight.nested_quant_map": "model-00001-of-00004.safetensors",
41
  "model.layers.1.mlp.router.layer.weight.quant_map": "model-00001-of-00004.safetensors",
42
  "model.layers.1.mlp.router.layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
43
+ "model.layers.1.mlp.router.perturbation.weight": "model-00001-of-00004.safetensors",
44
+ "model.layers.1.mlp.router.perturbation.weight.absmax": "model-00001-of-00004.safetensors",
45
+ "model.layers.1.mlp.router.perturbation.weight.nested_absmax": "model-00001-of-00004.safetensors",
46
+ "model.layers.1.mlp.router.perturbation.weight.nested_quant_map": "model-00001-of-00004.safetensors",
47
+ "model.layers.1.mlp.router.perturbation.weight.quant_map": "model-00001-of-00004.safetensors",
48
+ "model.layers.1.mlp.router.perturbation.weight.quant_state.bitsandbytes__fp4": "model-00001-of-00004.safetensors",
49
  "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
50
  "model.layers.1.self_attention.experts.bias": "model-00001-of-00004.safetensors",
51
  "model.layers.1.self_attention.experts.input_linear.weight": "model-00001-of-00004.safetensors",