QuietImpostor commited on
Commit
bca7a8f
1 Parent(s): 2fb9864

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +181 -181
config.json CHANGED
@@ -1,182 +1,182 @@
1
- {
2
- "_name_or_path": "Phi-3.5-MoE-instruct",
3
- "architectures": [
4
- "PhiMoEForCausalLM"
5
- ],
6
- "attention_bias": true,
7
- "attention_dropout": 0.0,
8
- "auto_map": {
9
- "AutoConfig": "configuration_phimoe.PhiMoEConfig",
10
- "AutoModelForCausalLM": "modeling_phimoe.PhiMoEForCausalLM"
11
- },
12
- "bos_token_id": 1,
13
- "eos_token_id": 32000,
14
- "hidden_act": "silu",
15
- "hidden_dropout": 0.0,
16
- "hidden_size": 4096,
17
- "initializer_range": 0.02,
18
- "input_jitter_noise": 0.01,
19
- "intermediate_size": 6400,
20
- "lm_head_bias": true,
21
- "max_position_embeddings": 131072,
22
- "model_type": "rasphi",
23
- "num_attention_heads": 32,
24
- "num_experts_per_tok": 2,
25
- "num_hidden_layers": 32,
26
- "num_key_value_heads": 8,
27
- "num_local_experts": 16,
28
- "original_max_position_embeddings": 4096,
29
- "output_router_logits": false,
30
- "rms_norm_eps": 1e-05,
31
- "rope_scaling": {
32
- "long_factor": [
33
- 1.0199999809265137,
34
- 1.0299999713897705,
35
- 1.0399999618530273,
36
- 1.0499999523162842,
37
- 1.0499999523162842,
38
- 1.0499999523162842,
39
- 1.059999942779541,
40
- 1.059999942779541,
41
- 1.059999942779541,
42
- 1.059999942779541,
43
- 1.059999942779541,
44
- 1.059999942779541,
45
- 1.0999999046325684,
46
- 1.1799999475479126,
47
- 1.1799999475479126,
48
- 1.3700000047683716,
49
- 1.4899998903274536,
50
- 2.109999895095825,
51
- 2.8899998664855957,
52
- 3.9499998092651367,
53
- 4.299999713897705,
54
- 6.429999828338623,
55
- 8.09000015258789,
56
- 10.690000534057617,
57
- 12.050000190734863,
58
- 18.229999542236328,
59
- 18.84000015258789,
60
- 19.899999618530273,
61
- 21.420000076293945,
62
- 26.200000762939453,
63
- 34.28000259399414,
64
- 34.590003967285156,
65
- 38.730003356933594,
66
- 40.22000503540039,
67
- 42.54000473022461,
68
- 44.000003814697266,
69
- 47.590003967285156,
70
- 54.750003814697266,
71
- 56.19000244140625,
72
- 57.44000244140625,
73
- 57.4900016784668,
74
- 61.20000076293945,
75
- 61.540000915527344,
76
- 61.75,
77
- 61.779998779296875,
78
- 62.06999969482422,
79
- 63.11000061035156,
80
- 63.43000030517578,
81
- 63.560001373291016,
82
- 63.71000289916992,
83
- 63.92000198364258,
84
- 63.94000244140625,
85
- 63.94000244140625,
86
- 63.96000289916992,
87
- 63.980003356933594,
88
- 64.0300064086914,
89
- 64.0300064086914,
90
- 64.0300064086914,
91
- 64.04000854492188,
92
- 64.10000610351562,
93
- 64.19000244140625,
94
- 64.20999908447266,
95
- 64.75,
96
- 64.95999908447266
97
- ],
98
- "long_mscale": 1.243163121016122,
99
- "original_max_position_embeddings": 4096,
100
- "short_factor": [
101
- 1.0,
102
- 1.0399999618530273,
103
- 1.0399999618530273,
104
- 1.0399999618530273,
105
- 1.0499999523162842,
106
- 1.0499999523162842,
107
- 1.0499999523162842,
108
- 1.0499999523162842,
109
- 1.0499999523162842,
110
- 1.0499999523162842,
111
- 1.0499999523162842,
112
- 1.0499999523162842,
113
- 1.0499999523162842,
114
- 1.0499999523162842,
115
- 1.059999942779541,
116
- 1.059999942779541,
117
- 1.0699999332427979,
118
- 1.0699999332427979,
119
- 1.0699999332427979,
120
- 1.0699999332427979,
121
- 1.1399999856948853,
122
- 1.159999966621399,
123
- 1.159999966621399,
124
- 1.159999966621399,
125
- 1.159999966621399,
126
- 1.1799999475479126,
127
- 1.1999999284744263,
128
- 1.3199999332427979,
129
- 1.3399999141693115,
130
- 1.3499999046325684,
131
- 1.3999998569488525,
132
- 1.4799998998641968,
133
- 1.4999998807907104,
134
- 1.589999794960022,
135
- 1.6499998569488525,
136
- 1.71999990940094,
137
- 1.8999998569488525,
138
- 1.9099998474121094,
139
- 1.9099998474121094,
140
- 1.9899998903274536,
141
- 1.9999998807907104,
142
- 1.9999998807907104,
143
- 2.009999990463257,
144
- 2.009999990463257,
145
- 2.009999990463257,
146
- 2.009999990463257,
147
- 2.009999990463257,
148
- 2.009999990463257,
149
- 2.009999990463257,
150
- 2.009999990463257,
151
- 2.009999990463257,
152
- 2.009999990463257,
153
- 2.009999990463257,
154
- 2.009999990463257,
155
- 2.009999990463257,
156
- 2.009999990463257,
157
- 2.009999990463257,
158
- 2.009999990463257,
159
- 2.009999990463257,
160
- 2.0999999046325684,
161
- 2.319999933242798,
162
- 2.419999837875366,
163
- 2.5899999141693115,
164
- 2.7899999618530273
165
- ],
166
- "short_mscale": 1.243163121016122,
167
- "type": "longrope"
168
- },
169
- "rope_theta": 10000.0,
170
- "router_aux_loss_coef": 0.0,
171
- "router_jitter_noise": 0.01,
172
- "sliding_window": 131072,
173
- "tie_word_embeddings": false,
174
- "torch_dtype": "bfloat16",
175
- "transformers_version": "4.43.3",
176
- "use_cache": true,
177
- "vocab_size": 32064,
178
- "reasoning_hidden_size": 2048,
179
- "content_hidden_size": 2048,
180
- "num_reasoning_experts": 8,
181
- "num_content_experts": 8
182
  }
 
1
+ {
2
+ "_name_or_path": "Rasphi-MoE-instruct",
3
+ "architectures": [
4
+ "RasphiForCausalLM"
5
+ ],
6
+ "attention_bias": true,
7
+ "attention_dropout": 0.0,
8
+ "auto_map": {
9
+ "AutoConfig": "configuration_rasphi.RasphiConfig",
10
+ "AutoModelForCausalLM": "modeling_Rasphi.RasphiForCausalLM"
11
+ },
12
+ "bos_token_id": 1,
13
+ "eos_token_id": 32000,
14
+ "hidden_act": "silu",
15
+ "hidden_dropout": 0.0,
16
+ "hidden_size": 4096,
17
+ "initializer_range": 0.02,
18
+ "input_jitter_noise": 0.01,
19
+ "intermediate_size": 6400,
20
+ "lm_head_bias": true,
21
+ "max_position_embeddings": 131072,
22
+ "model_type": "rasphi",
23
+ "num_attention_heads": 32,
24
+ "num_experts_per_tok": 2,
25
+ "num_hidden_layers": 32,
26
+ "num_key_value_heads": 8,
27
+ "num_local_experts": 16,
28
+ "original_max_position_embeddings": 4096,
29
+ "output_router_logits": false,
30
+ "rms_norm_eps": 1e-05,
31
+ "rope_scaling": {
32
+ "long_factor": [
33
+ 1.0199999809265137,
34
+ 1.0299999713897705,
35
+ 1.0399999618530273,
36
+ 1.0499999523162842,
37
+ 1.0499999523162842,
38
+ 1.0499999523162842,
39
+ 1.059999942779541,
40
+ 1.059999942779541,
41
+ 1.059999942779541,
42
+ 1.059999942779541,
43
+ 1.059999942779541,
44
+ 1.059999942779541,
45
+ 1.0999999046325684,
46
+ 1.1799999475479126,
47
+ 1.1799999475479126,
48
+ 1.3700000047683716,
49
+ 1.4899998903274536,
50
+ 2.109999895095825,
51
+ 2.8899998664855957,
52
+ 3.9499998092651367,
53
+ 4.299999713897705,
54
+ 6.429999828338623,
55
+ 8.09000015258789,
56
+ 10.690000534057617,
57
+ 12.050000190734863,
58
+ 18.229999542236328,
59
+ 18.84000015258789,
60
+ 19.899999618530273,
61
+ 21.420000076293945,
62
+ 26.200000762939453,
63
+ 34.28000259399414,
64
+ 34.590003967285156,
65
+ 38.730003356933594,
66
+ 40.22000503540039,
67
+ 42.54000473022461,
68
+ 44.000003814697266,
69
+ 47.590003967285156,
70
+ 54.750003814697266,
71
+ 56.19000244140625,
72
+ 57.44000244140625,
73
+ 57.4900016784668,
74
+ 61.20000076293945,
75
+ 61.540000915527344,
76
+ 61.75,
77
+ 61.779998779296875,
78
+ 62.06999969482422,
79
+ 63.11000061035156,
80
+ 63.43000030517578,
81
+ 63.560001373291016,
82
+ 63.71000289916992,
83
+ 63.92000198364258,
84
+ 63.94000244140625,
85
+ 63.94000244140625,
86
+ 63.96000289916992,
87
+ 63.980003356933594,
88
+ 64.0300064086914,
89
+ 64.0300064086914,
90
+ 64.0300064086914,
91
+ 64.04000854492188,
92
+ 64.10000610351562,
93
+ 64.19000244140625,
94
+ 64.20999908447266,
95
+ 64.75,
96
+ 64.95999908447266
97
+ ],
98
+ "long_mscale": 1.243163121016122,
99
+ "original_max_position_embeddings": 4096,
100
+ "short_factor": [
101
+ 1.0,
102
+ 1.0399999618530273,
103
+ 1.0399999618530273,
104
+ 1.0399999618530273,
105
+ 1.0499999523162842,
106
+ 1.0499999523162842,
107
+ 1.0499999523162842,
108
+ 1.0499999523162842,
109
+ 1.0499999523162842,
110
+ 1.0499999523162842,
111
+ 1.0499999523162842,
112
+ 1.0499999523162842,
113
+ 1.0499999523162842,
114
+ 1.0499999523162842,
115
+ 1.059999942779541,
116
+ 1.059999942779541,
117
+ 1.0699999332427979,
118
+ 1.0699999332427979,
119
+ 1.0699999332427979,
120
+ 1.0699999332427979,
121
+ 1.1399999856948853,
122
+ 1.159999966621399,
123
+ 1.159999966621399,
124
+ 1.159999966621399,
125
+ 1.159999966621399,
126
+ 1.1799999475479126,
127
+ 1.1999999284744263,
128
+ 1.3199999332427979,
129
+ 1.3399999141693115,
130
+ 1.3499999046325684,
131
+ 1.3999998569488525,
132
+ 1.4799998998641968,
133
+ 1.4999998807907104,
134
+ 1.589999794960022,
135
+ 1.6499998569488525,
136
+ 1.71999990940094,
137
+ 1.8999998569488525,
138
+ 1.9099998474121094,
139
+ 1.9099998474121094,
140
+ 1.9899998903274536,
141
+ 1.9999998807907104,
142
+ 1.9999998807907104,
143
+ 2.009999990463257,
144
+ 2.009999990463257,
145
+ 2.009999990463257,
146
+ 2.009999990463257,
147
+ 2.009999990463257,
148
+ 2.009999990463257,
149
+ 2.009999990463257,
150
+ 2.009999990463257,
151
+ 2.009999990463257,
152
+ 2.009999990463257,
153
+ 2.009999990463257,
154
+ 2.009999990463257,
155
+ 2.009999990463257,
156
+ 2.009999990463257,
157
+ 2.009999990463257,
158
+ 2.009999990463257,
159
+ 2.009999990463257,
160
+ 2.0999999046325684,
161
+ 2.319999933242798,
162
+ 2.419999837875366,
163
+ 2.5899999141693115,
164
+ 2.7899999618530273
165
+ ],
166
+ "short_mscale": 1.243163121016122,
167
+ "type": "longrope"
168
+ },
169
+ "rope_theta": 10000.0,
170
+ "router_aux_loss_coef": 0.0,
171
+ "router_jitter_noise": 0.01,
172
+ "sliding_window": 131072,
173
+ "tie_word_embeddings": false,
174
+ "torch_dtype": "bfloat16",
175
+ "transformers_version": "4.43.3",
176
+ "use_cache": true,
177
+ "vocab_size": 32064,
178
+ "reasoning_hidden_size": 2048,
179
+ "content_hidden_size": 2048,
180
+ "num_reasoning_experts": 8,
181
+ "num_content_experts": 8
182
  }