Upload JetMoEForCausalLM

Browse files

Files changed (6) hide show

config.json +1 -1
model-00001-of-00004.safetensors +2 -2
model-00002-of-00004.safetensors +2 -2
model-00003-of-00004.safetensors +2 -2
model-00004-of-00004.safetensors +2 -2
model.safetensors.index.json +7 -7

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "AndreaUnibo/JetMoE_rank_infill_full_trained_2",
   "activation_function": "silu",
   "architectures": [
     "JetMoEForCausalLM"

 {
+  "_name_or_path": "rank_infill_base",
   "activation_function": "silu",
   "architectures": [
     "JetMoEForCausalLM"

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:295b25ea62fbd6373b9dc10e621a84ae48143e533eff8b2a980a9493bde2b323
-size 4978881230

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0700e37ab4889137484024928cb177b4d0e8e0bf06d7c12a21de81d46c226e7
+size 4978881232

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f94d5d1e1f709f904da9762d61a336a4cbbd74d384f9f85984d2c67772c4f942
-size 4984201481

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2e63469847c7a242bd4761e3304fa7488051cdaa53e40605abd37becbdc16b9
+size 4984201479

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef6e6de60508c607dfe93703bd52661284a1050802e7862074b8577a2fd286e8
-size 4845635182

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3a3adc8dce89f083edc6eb35611c8449603b5073903ca9c4f1eeef5e9f0c6a0
+size 4845635181

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37824c4cbe71d1b46db07a5e18fc7f0913e2941e00475d5147b62088c0437d6f
-size 1940295882

 version https://git-lfs.github.com/spec/v1
+oid sha256:fcb39d7d95179313144aedefee5e981090bf6dabf7ec6731dd23fe66dafbbdd6
+size 1940295881

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 16748934199
   },
   "weight_map": {
     "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
@@ -14,12 +14,6 @@
     "model.layers.0.mlp.router.layer.weight.nested_quant_map": "model-00001-of-00004.safetensors",
     "model.layers.0.mlp.router.layer.weight.quant_map": "model-00001-of-00004.safetensors",
     "model.layers.0.mlp.router.layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
-    "model.layers.0.mlp.router.perturbation.weight": "model-00001-of-00004.safetensors",
-    "model.layers.0.mlp.router.perturbation.weight.absmax": "model-00001-of-00004.safetensors",
-    "model.layers.0.mlp.router.perturbation.weight.nested_absmax": "model-00001-of-00004.safetensors",
-    "model.layers.0.mlp.router.perturbation.weight.nested_quant_map": "model-00001-of-00004.safetensors",
-    "model.layers.0.mlp.router.perturbation.weight.quant_map": "model-00001-of-00004.safetensors",
-    "model.layers.0.mlp.router.perturbation.weight.quant_state.bitsandbytes__fp4": "model-00001-of-00004.safetensors",
     "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
     "model.layers.0.self_attention.experts.bias": "model-00001-of-00004.safetensors",
     "model.layers.0.self_attention.experts.input_linear.weight": "model-00001-of-00004.safetensors",
@@ -46,6 +40,12 @@
     "model.layers.1.mlp.router.layer.weight.nested_quant_map": "model-00001-of-00004.safetensors",
     "model.layers.1.mlp.router.layer.weight.quant_map": "model-00001-of-00004.safetensors",
     "model.layers.1.mlp.router.layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
     "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
     "model.layers.1.self_attention.experts.bias": "model-00001-of-00004.safetensors",
     "model.layers.1.self_attention.experts.input_linear.weight": "model-00001-of-00004.safetensors",

 {
   "metadata": {
+    "total_size": 16748934197
   },
   "weight_map": {
     "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
     "model.layers.0.mlp.router.layer.weight.nested_quant_map": "model-00001-of-00004.safetensors",
     "model.layers.0.mlp.router.layer.weight.quant_map": "model-00001-of-00004.safetensors",
     "model.layers.0.mlp.router.layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
     "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
     "model.layers.0.self_attention.experts.bias": "model-00001-of-00004.safetensors",
     "model.layers.0.self_attention.experts.input_linear.weight": "model-00001-of-00004.safetensors",
     "model.layers.1.mlp.router.layer.weight.nested_quant_map": "model-00001-of-00004.safetensors",
     "model.layers.1.mlp.router.layer.weight.quant_map": "model-00001-of-00004.safetensors",
     "model.layers.1.mlp.router.layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.router.perturbation.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.router.perturbation.weight.absmax": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.router.perturbation.weight.nested_absmax": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.router.perturbation.weight.nested_quant_map": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.router.perturbation.weight.quant_map": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.router.perturbation.weight.quant_state.bitsandbytes__fp4": "model-00001-of-00004.safetensors",
     "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
     "model.layers.1.self_attention.experts.bias": "model-00001-of-00004.safetensors",
     "model.layers.1.self_attention.experts.input_linear.weight": "model-00001-of-00004.safetensors",