Upload pytorch_model.bin.index.json
Browse files- pytorch_model.bin.index.json +12 -12
pytorch_model.bin.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"embed_out.weight": "pytorch_model-00003-of-00003.bin",
|
@@ -106,27 +106,27 @@
|
|
106 |
"gpt_neox.layers.14.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
|
107 |
"gpt_neox.layers.14.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
|
108 |
"gpt_neox.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
109 |
-
"gpt_neox.layers.14.mlp.dense_4h_to_h.bias": "pytorch_model-
|
110 |
-
"gpt_neox.layers.14.mlp.dense_4h_to_h.weight": "pytorch_model-
|
111 |
"gpt_neox.layers.14.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00003.bin",
|
112 |
"gpt_neox.layers.14.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00003.bin",
|
113 |
"gpt_neox.layers.14.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
|
114 |
"gpt_neox.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
115 |
-
"gpt_neox.layers.15.attention.bias": "pytorch_model-
|
116 |
"gpt_neox.layers.15.attention.dense.bias": "pytorch_model-00002-of-00003.bin",
|
117 |
"gpt_neox.layers.15.attention.dense.weight": "pytorch_model-00002-of-00003.bin",
|
118 |
-
"gpt_neox.layers.15.attention.masked_bias": "pytorch_model-
|
119 |
"gpt_neox.layers.15.attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
|
120 |
"gpt_neox.layers.15.attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
|
121 |
-
"gpt_neox.layers.15.attention.rotary_emb.inv_freq": "pytorch_model-
|
122 |
-
"gpt_neox.layers.15.input_layernorm.bias": "pytorch_model-
|
123 |
-
"gpt_neox.layers.15.input_layernorm.weight": "pytorch_model-
|
124 |
"gpt_neox.layers.15.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
|
125 |
"gpt_neox.layers.15.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
|
126 |
"gpt_neox.layers.15.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
|
127 |
"gpt_neox.layers.15.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
|
128 |
-
"gpt_neox.layers.15.post_attention_layernorm.bias": "pytorch_model-
|
129 |
-
"gpt_neox.layers.15.post_attention_layernorm.weight": "pytorch_model-
|
130 |
"gpt_neox.layers.16.attention.bias": "pytorch_model-00002-of-00003.bin",
|
131 |
"gpt_neox.layers.16.attention.dense.bias": "pytorch_model-00002-of-00003.bin",
|
132 |
"gpt_neox.layers.16.attention.dense.weight": "pytorch_model-00002-of-00003.bin",
|
@@ -378,8 +378,8 @@
|
|
378 |
"gpt_neox.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
379 |
"gpt_neox.layers.30.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
|
380 |
"gpt_neox.layers.30.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
|
381 |
-
"gpt_neox.layers.30.mlp.dense_h_to_4h.bias": "pytorch_model-
|
382 |
-
"gpt_neox.layers.30.mlp.dense_h_to_4h.weight": "pytorch_model-
|
383 |
"gpt_neox.layers.30.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
|
384 |
"gpt_neox.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
385 |
"gpt_neox.layers.31.attention.bias": "pytorch_model-00003-of-00003.bin",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 23711020232.0
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"embed_out.weight": "pytorch_model-00003-of-00003.bin",
|
|
|
106 |
"gpt_neox.layers.14.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
|
107 |
"gpt_neox.layers.14.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
|
108 |
"gpt_neox.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
109 |
+
"gpt_neox.layers.14.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00003.bin",
|
110 |
+
"gpt_neox.layers.14.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00003.bin",
|
111 |
"gpt_neox.layers.14.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00003.bin",
|
112 |
"gpt_neox.layers.14.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00003.bin",
|
113 |
"gpt_neox.layers.14.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
|
114 |
"gpt_neox.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
115 |
+
"gpt_neox.layers.15.attention.bias": "pytorch_model-00001-of-00003.bin",
|
116 |
"gpt_neox.layers.15.attention.dense.bias": "pytorch_model-00002-of-00003.bin",
|
117 |
"gpt_neox.layers.15.attention.dense.weight": "pytorch_model-00002-of-00003.bin",
|
118 |
+
"gpt_neox.layers.15.attention.masked_bias": "pytorch_model-00001-of-00003.bin",
|
119 |
"gpt_neox.layers.15.attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
|
120 |
"gpt_neox.layers.15.attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
|
121 |
+
"gpt_neox.layers.15.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
|
122 |
+
"gpt_neox.layers.15.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
|
123 |
+
"gpt_neox.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
124 |
"gpt_neox.layers.15.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
|
125 |
"gpt_neox.layers.15.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
|
126 |
"gpt_neox.layers.15.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
|
127 |
"gpt_neox.layers.15.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
|
128 |
+
"gpt_neox.layers.15.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
|
129 |
+
"gpt_neox.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
130 |
"gpt_neox.layers.16.attention.bias": "pytorch_model-00002-of-00003.bin",
|
131 |
"gpt_neox.layers.16.attention.dense.bias": "pytorch_model-00002-of-00003.bin",
|
132 |
"gpt_neox.layers.16.attention.dense.weight": "pytorch_model-00002-of-00003.bin",
|
|
|
378 |
"gpt_neox.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
379 |
"gpt_neox.layers.30.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
|
380 |
"gpt_neox.layers.30.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
|
381 |
+
"gpt_neox.layers.30.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
|
382 |
+
"gpt_neox.layers.30.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
|
383 |
"gpt_neox.layers.30.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
|
384 |
"gpt_neox.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
385 |
"gpt_neox.layers.31.attention.bias": "pytorch_model-00003-of-00003.bin",
|