wanzin commited on
Commit
da067a7
·
1 Parent(s): b61fd90

adding opt 6.7b model configurations

Browse files
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_remove_final_layer_norm": false,
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "OPTForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 2,
10
+ "do_layer_norm_before": true,
11
+ "dropout": 0.1,
12
+ "eos_token_id": 2,
13
+ "ffn_dim": 16384,
14
+ "hidden_size": 4096,
15
+ "init_std": 0.02,
16
+ "layerdrop": 0.0,
17
+ "max_position_embeddings": 2048,
18
+ "model_type": "opt",
19
+ "num_attention_heads": 32,
20
+ "num_hidden_layers": 32,
21
+ "pad_token_id": 1,
22
+ "prefix": "</s>",
23
+ "torch_dtype": "float16",
24
+ "transformers_version": "4.21.0.dev0",
25
+ "use_cache": true,
26
+ "vocab_size": 50272,
27
+ "word_embed_proj_dim": 4096
28
+ }
configs/BASELINE.yaml ADDED
@@ -0,0 +1,2832 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ lm_head:
3
+ accum_format: SAME
4
+ approximation_function: NONE
5
+ input_format: SAME
6
+ instance: Linear
7
+ output_format: SAME
8
+ weight_format: SAME
9
+ weight_sparseness: DENSE
10
+ model.decoder.final_layer_norm:
11
+ approximation_function: NONE
12
+ bias_format: SAME
13
+ input_format: SAME
14
+ instance: LayerNorm
15
+ output_format: SAME
16
+ weight_format: SAME
17
+ model.decoder.layers.0.activation_fn:
18
+ approximation_function: NONE
19
+ input_format: SAME
20
+ instance: ReLU
21
+ output_format: SAME
22
+ model.decoder.layers.0.dropout:
23
+ approximation_function: NONE
24
+ input_format: SAME
25
+ instance: Dropout
26
+ output_format: SAME
27
+ model.decoder.layers.0.fc1:
28
+ accum_format: SAME
29
+ approximation_function: NONE
30
+ bias_format: SAME
31
+ input_format: SAME
32
+ instance: Linear
33
+ output_format: SAME
34
+ weight_format: SAME
35
+ weight_sparseness: DENSE
36
+ model.decoder.layers.0.fc2:
37
+ accum_format: SAME
38
+ approximation_function: NONE
39
+ bias_format: SAME
40
+ input_format: SAME
41
+ instance: Linear
42
+ output_format: SAME
43
+ weight_format: SAME
44
+ weight_sparseness: DENSE
45
+ model.decoder.layers.0.final_layer_norm:
46
+ approximation_function: NONE
47
+ bias_format: SAME
48
+ input_format: SAME
49
+ instance: LayerNorm
50
+ output_format: SAME
51
+ weight_format: SAME
52
+ model.decoder.layers.0.self_attn.dropout:
53
+ approximation_function: NONE
54
+ input_format: SAME
55
+ instance: Dropout
56
+ output_format: SAME
57
+ model.decoder.layers.0.self_attn.k_proj:
58
+ accum_format: SAME
59
+ approximation_function: NONE
60
+ bias_format: SAME
61
+ input_format: SAME
62
+ instance: Linear
63
+ output_format: SAME
64
+ weight_format: SAME
65
+ weight_sparseness: DENSE
66
+ model.decoder.layers.0.self_attn.out_proj:
67
+ accum_format: SAME
68
+ approximation_function: NONE
69
+ bias_format: SAME
70
+ input_format: SAME
71
+ instance: Linear
72
+ output_format: SAME
73
+ weight_format: SAME
74
+ weight_sparseness: DENSE
75
+ model.decoder.layers.0.self_attn.q_proj:
76
+ accum_format: SAME
77
+ approximation_function: NONE
78
+ bias_format: SAME
79
+ input_format: SAME
80
+ instance: Linear
81
+ output_format: SAME
82
+ weight_format: SAME
83
+ weight_sparseness: DENSE
84
+ model.decoder.layers.0.self_attn.softmax:
85
+ approximation_function: NONE
86
+ input_format: SAME
87
+ instance: Softmax
88
+ output_format: SAME
89
+ model.decoder.layers.0.self_attn.v_proj:
90
+ accum_format: SAME
91
+ approximation_function: NONE
92
+ bias_format: SAME
93
+ input_format: SAME
94
+ instance: Linear
95
+ output_format: SAME
96
+ weight_format: SAME
97
+ weight_sparseness: DENSE
98
+ model.decoder.layers.0.self_attn_layer_norm:
99
+ approximation_function: NONE
100
+ bias_format: SAME
101
+ input_format: SAME
102
+ instance: LayerNorm
103
+ output_format: SAME
104
+ weight_format: SAME
105
+ model.decoder.layers.1.activation_fn:
106
+ approximation_function: NONE
107
+ input_format: SAME
108
+ instance: ReLU
109
+ output_format: SAME
110
+ model.decoder.layers.1.dropout:
111
+ approximation_function: NONE
112
+ input_format: SAME
113
+ instance: Dropout
114
+ output_format: SAME
115
+ model.decoder.layers.1.fc1:
116
+ accum_format: SAME
117
+ approximation_function: NONE
118
+ bias_format: SAME
119
+ input_format: SAME
120
+ instance: Linear
121
+ output_format: SAME
122
+ weight_format: SAME
123
+ weight_sparseness: DENSE
124
+ model.decoder.layers.1.fc2:
125
+ accum_format: SAME
126
+ approximation_function: NONE
127
+ bias_format: SAME
128
+ input_format: SAME
129
+ instance: Linear
130
+ output_format: SAME
131
+ weight_format: SAME
132
+ weight_sparseness: DENSE
133
+ model.decoder.layers.1.final_layer_norm:
134
+ approximation_function: NONE
135
+ bias_format: SAME
136
+ input_format: SAME
137
+ instance: LayerNorm
138
+ output_format: SAME
139
+ weight_format: SAME
140
+ model.decoder.layers.1.self_attn.dropout:
141
+ approximation_function: NONE
142
+ input_format: SAME
143
+ instance: Dropout
144
+ output_format: SAME
145
+ model.decoder.layers.1.self_attn.k_proj:
146
+ accum_format: SAME
147
+ approximation_function: NONE
148
+ bias_format: SAME
149
+ input_format: SAME
150
+ instance: Linear
151
+ output_format: SAME
152
+ weight_format: SAME
153
+ weight_sparseness: DENSE
154
+ model.decoder.layers.1.self_attn.out_proj:
155
+ accum_format: SAME
156
+ approximation_function: NONE
157
+ bias_format: SAME
158
+ input_format: SAME
159
+ instance: Linear
160
+ output_format: SAME
161
+ weight_format: SAME
162
+ weight_sparseness: DENSE
163
+ model.decoder.layers.1.self_attn.q_proj:
164
+ accum_format: SAME
165
+ approximation_function: NONE
166
+ bias_format: SAME
167
+ input_format: SAME
168
+ instance: Linear
169
+ output_format: SAME
170
+ weight_format: SAME
171
+ weight_sparseness: DENSE
172
+ model.decoder.layers.1.self_attn.softmax:
173
+ approximation_function: NONE
174
+ input_format: SAME
175
+ instance: Softmax
176
+ output_format: SAME
177
+ model.decoder.layers.1.self_attn.v_proj:
178
+ accum_format: SAME
179
+ approximation_function: NONE
180
+ bias_format: SAME
181
+ input_format: SAME
182
+ instance: Linear
183
+ output_format: SAME
184
+ weight_format: SAME
185
+ weight_sparseness: DENSE
186
+ model.decoder.layers.1.self_attn_layer_norm:
187
+ approximation_function: NONE
188
+ bias_format: SAME
189
+ input_format: SAME
190
+ instance: LayerNorm
191
+ output_format: SAME
192
+ weight_format: SAME
193
+ model.decoder.layers.10.activation_fn:
194
+ approximation_function: NONE
195
+ input_format: SAME
196
+ instance: ReLU
197
+ output_format: SAME
198
+ model.decoder.layers.10.dropout:
199
+ approximation_function: NONE
200
+ input_format: SAME
201
+ instance: Dropout
202
+ output_format: SAME
203
+ model.decoder.layers.10.fc1:
204
+ accum_format: SAME
205
+ approximation_function: NONE
206
+ bias_format: SAME
207
+ input_format: SAME
208
+ instance: Linear
209
+ output_format: SAME
210
+ weight_format: SAME
211
+ weight_sparseness: DENSE
212
+ model.decoder.layers.10.fc2:
213
+ accum_format: SAME
214
+ approximation_function: NONE
215
+ bias_format: SAME
216
+ input_format: SAME
217
+ instance: Linear
218
+ output_format: SAME
219
+ weight_format: SAME
220
+ weight_sparseness: DENSE
221
+ model.decoder.layers.10.final_layer_norm:
222
+ approximation_function: NONE
223
+ bias_format: SAME
224
+ input_format: SAME
225
+ instance: LayerNorm
226
+ output_format: SAME
227
+ weight_format: SAME
228
+ model.decoder.layers.10.self_attn.dropout:
229
+ approximation_function: NONE
230
+ input_format: SAME
231
+ instance: Dropout
232
+ output_format: SAME
233
+ model.decoder.layers.10.self_attn.k_proj:
234
+ accum_format: SAME
235
+ approximation_function: NONE
236
+ bias_format: SAME
237
+ input_format: SAME
238
+ instance: Linear
239
+ output_format: SAME
240
+ weight_format: SAME
241
+ weight_sparseness: DENSE
242
+ model.decoder.layers.10.self_attn.out_proj:
243
+ accum_format: SAME
244
+ approximation_function: NONE
245
+ bias_format: SAME
246
+ input_format: SAME
247
+ instance: Linear
248
+ output_format: SAME
249
+ weight_format: SAME
250
+ weight_sparseness: DENSE
251
+ model.decoder.layers.10.self_attn.q_proj:
252
+ accum_format: SAME
253
+ approximation_function: NONE
254
+ bias_format: SAME
255
+ input_format: SAME
256
+ instance: Linear
257
+ output_format: SAME
258
+ weight_format: SAME
259
+ weight_sparseness: DENSE
260
+ model.decoder.layers.10.self_attn.softmax:
261
+ approximation_function: NONE
262
+ input_format: SAME
263
+ instance: Softmax
264
+ output_format: SAME
265
+ model.decoder.layers.10.self_attn.v_proj:
266
+ accum_format: SAME
267
+ approximation_function: NONE
268
+ bias_format: SAME
269
+ input_format: SAME
270
+ instance: Linear
271
+ output_format: SAME
272
+ weight_format: SAME
273
+ weight_sparseness: DENSE
274
+ model.decoder.layers.10.self_attn_layer_norm:
275
+ approximation_function: NONE
276
+ bias_format: SAME
277
+ input_format: SAME
278
+ instance: LayerNorm
279
+ output_format: SAME
280
+ weight_format: SAME
281
+ model.decoder.layers.11.activation_fn:
282
+ approximation_function: NONE
283
+ input_format: SAME
284
+ instance: ReLU
285
+ output_format: SAME
286
+ model.decoder.layers.11.dropout:
287
+ approximation_function: NONE
288
+ input_format: SAME
289
+ instance: Dropout
290
+ output_format: SAME
291
+ model.decoder.layers.11.fc1:
292
+ accum_format: SAME
293
+ approximation_function: NONE
294
+ bias_format: SAME
295
+ input_format: SAME
296
+ instance: Linear
297
+ output_format: SAME
298
+ weight_format: SAME
299
+ weight_sparseness: DENSE
300
+ model.decoder.layers.11.fc2:
301
+ accum_format: SAME
302
+ approximation_function: NONE
303
+ bias_format: SAME
304
+ input_format: SAME
305
+ instance: Linear
306
+ output_format: SAME
307
+ weight_format: SAME
308
+ weight_sparseness: DENSE
309
+ model.decoder.layers.11.final_layer_norm:
310
+ approximation_function: NONE
311
+ bias_format: SAME
312
+ input_format: SAME
313
+ instance: LayerNorm
314
+ output_format: SAME
315
+ weight_format: SAME
316
+ model.decoder.layers.11.self_attn.dropout:
317
+ approximation_function: NONE
318
+ input_format: SAME
319
+ instance: Dropout
320
+ output_format: SAME
321
+ model.decoder.layers.11.self_attn.k_proj:
322
+ accum_format: SAME
323
+ approximation_function: NONE
324
+ bias_format: SAME
325
+ input_format: SAME
326
+ instance: Linear
327
+ output_format: SAME
328
+ weight_format: SAME
329
+ weight_sparseness: DENSE
330
+ model.decoder.layers.11.self_attn.out_proj:
331
+ accum_format: SAME
332
+ approximation_function: NONE
333
+ bias_format: SAME
334
+ input_format: SAME
335
+ instance: Linear
336
+ output_format: SAME
337
+ weight_format: SAME
338
+ weight_sparseness: DENSE
339
+ model.decoder.layers.11.self_attn.q_proj:
340
+ accum_format: SAME
341
+ approximation_function: NONE
342
+ bias_format: SAME
343
+ input_format: SAME
344
+ instance: Linear
345
+ output_format: SAME
346
+ weight_format: SAME
347
+ weight_sparseness: DENSE
348
+ model.decoder.layers.11.self_attn.softmax:
349
+ approximation_function: NONE
350
+ input_format: SAME
351
+ instance: Softmax
352
+ output_format: SAME
353
+ model.decoder.layers.11.self_attn.v_proj:
354
+ accum_format: SAME
355
+ approximation_function: NONE
356
+ bias_format: SAME
357
+ input_format: SAME
358
+ instance: Linear
359
+ output_format: SAME
360
+ weight_format: SAME
361
+ weight_sparseness: DENSE
362
+ model.decoder.layers.11.self_attn_layer_norm:
363
+ approximation_function: NONE
364
+ bias_format: SAME
365
+ input_format: SAME
366
+ instance: LayerNorm
367
+ output_format: SAME
368
+ weight_format: SAME
369
+ model.decoder.layers.12.activation_fn:
370
+ approximation_function: NONE
371
+ input_format: SAME
372
+ instance: ReLU
373
+ output_format: SAME
374
+ model.decoder.layers.12.dropout:
375
+ approximation_function: NONE
376
+ input_format: SAME
377
+ instance: Dropout
378
+ output_format: SAME
379
+ model.decoder.layers.12.fc1:
380
+ accum_format: SAME
381
+ approximation_function: NONE
382
+ bias_format: SAME
383
+ input_format: SAME
384
+ instance: Linear
385
+ output_format: SAME
386
+ weight_format: SAME
387
+ weight_sparseness: DENSE
388
+ model.decoder.layers.12.fc2:
389
+ accum_format: SAME
390
+ approximation_function: NONE
391
+ bias_format: SAME
392
+ input_format: SAME
393
+ instance: Linear
394
+ output_format: SAME
395
+ weight_format: SAME
396
+ weight_sparseness: DENSE
397
+ model.decoder.layers.12.final_layer_norm:
398
+ approximation_function: NONE
399
+ bias_format: SAME
400
+ input_format: SAME
401
+ instance: LayerNorm
402
+ output_format: SAME
403
+ weight_format: SAME
404
+ model.decoder.layers.12.self_attn.dropout:
405
+ approximation_function: NONE
406
+ input_format: SAME
407
+ instance: Dropout
408
+ output_format: SAME
409
+ model.decoder.layers.12.self_attn.k_proj:
410
+ accum_format: SAME
411
+ approximation_function: NONE
412
+ bias_format: SAME
413
+ input_format: SAME
414
+ instance: Linear
415
+ output_format: SAME
416
+ weight_format: SAME
417
+ weight_sparseness: DENSE
418
+ model.decoder.layers.12.self_attn.out_proj:
419
+ accum_format: SAME
420
+ approximation_function: NONE
421
+ bias_format: SAME
422
+ input_format: SAME
423
+ instance: Linear
424
+ output_format: SAME
425
+ weight_format: SAME
426
+ weight_sparseness: DENSE
427
+ model.decoder.layers.12.self_attn.q_proj:
428
+ accum_format: SAME
429
+ approximation_function: NONE
430
+ bias_format: SAME
431
+ input_format: SAME
432
+ instance: Linear
433
+ output_format: SAME
434
+ weight_format: SAME
435
+ weight_sparseness: DENSE
436
+ model.decoder.layers.12.self_attn.softmax:
437
+ approximation_function: NONE
438
+ input_format: SAME
439
+ instance: Softmax
440
+ output_format: SAME
441
+ model.decoder.layers.12.self_attn.v_proj:
442
+ accum_format: SAME
443
+ approximation_function: NONE
444
+ bias_format: SAME
445
+ input_format: SAME
446
+ instance: Linear
447
+ output_format: SAME
448
+ weight_format: SAME
449
+ weight_sparseness: DENSE
450
+ model.decoder.layers.12.self_attn_layer_norm:
451
+ approximation_function: NONE
452
+ bias_format: SAME
453
+ input_format: SAME
454
+ instance: LayerNorm
455
+ output_format: SAME
456
+ weight_format: SAME
457
+ model.decoder.layers.13.activation_fn:
458
+ approximation_function: NONE
459
+ input_format: SAME
460
+ instance: ReLU
461
+ output_format: SAME
462
+ model.decoder.layers.13.dropout:
463
+ approximation_function: NONE
464
+ input_format: SAME
465
+ instance: Dropout
466
+ output_format: SAME
467
+ model.decoder.layers.13.fc1:
468
+ accum_format: SAME
469
+ approximation_function: NONE
470
+ bias_format: SAME
471
+ input_format: SAME
472
+ instance: Linear
473
+ output_format: SAME
474
+ weight_format: SAME
475
+ weight_sparseness: DENSE
476
+ model.decoder.layers.13.fc2:
477
+ accum_format: SAME
478
+ approximation_function: NONE
479
+ bias_format: SAME
480
+ input_format: SAME
481
+ instance: Linear
482
+ output_format: SAME
483
+ weight_format: SAME
484
+ weight_sparseness: DENSE
485
+ model.decoder.layers.13.final_layer_norm:
486
+ approximation_function: NONE
487
+ bias_format: SAME
488
+ input_format: SAME
489
+ instance: LayerNorm
490
+ output_format: SAME
491
+ weight_format: SAME
492
+ model.decoder.layers.13.self_attn.dropout:
493
+ approximation_function: NONE
494
+ input_format: SAME
495
+ instance: Dropout
496
+ output_format: SAME
497
+ model.decoder.layers.13.self_attn.k_proj:
498
+ accum_format: SAME
499
+ approximation_function: NONE
500
+ bias_format: SAME
501
+ input_format: SAME
502
+ instance: Linear
503
+ output_format: SAME
504
+ weight_format: SAME
505
+ weight_sparseness: DENSE
506
+ model.decoder.layers.13.self_attn.out_proj:
507
+ accum_format: SAME
508
+ approximation_function: NONE
509
+ bias_format: SAME
510
+ input_format: SAME
511
+ instance: Linear
512
+ output_format: SAME
513
+ weight_format: SAME
514
+ weight_sparseness: DENSE
515
+ model.decoder.layers.13.self_attn.q_proj:
516
+ accum_format: SAME
517
+ approximation_function: NONE
518
+ bias_format: SAME
519
+ input_format: SAME
520
+ instance: Linear
521
+ output_format: SAME
522
+ weight_format: SAME
523
+ weight_sparseness: DENSE
524
+ model.decoder.layers.13.self_attn.softmax:
525
+ approximation_function: NONE
526
+ input_format: SAME
527
+ instance: Softmax
528
+ output_format: SAME
529
+ model.decoder.layers.13.self_attn.v_proj:
530
+ accum_format: SAME
531
+ approximation_function: NONE
532
+ bias_format: SAME
533
+ input_format: SAME
534
+ instance: Linear
535
+ output_format: SAME
536
+ weight_format: SAME
537
+ weight_sparseness: DENSE
538
+ model.decoder.layers.13.self_attn_layer_norm:
539
+ approximation_function: NONE
540
+ bias_format: SAME
541
+ input_format: SAME
542
+ instance: LayerNorm
543
+ output_format: SAME
544
+ weight_format: SAME
545
+ model.decoder.layers.14.activation_fn:
546
+ approximation_function: NONE
547
+ input_format: SAME
548
+ instance: ReLU
549
+ output_format: SAME
550
+ model.decoder.layers.14.dropout:
551
+ approximation_function: NONE
552
+ input_format: SAME
553
+ instance: Dropout
554
+ output_format: SAME
555
+ model.decoder.layers.14.fc1:
556
+ accum_format: SAME
557
+ approximation_function: NONE
558
+ bias_format: SAME
559
+ input_format: SAME
560
+ instance: Linear
561
+ output_format: SAME
562
+ weight_format: SAME
563
+ weight_sparseness: DENSE
564
+ model.decoder.layers.14.fc2:
565
+ accum_format: SAME
566
+ approximation_function: NONE
567
+ bias_format: SAME
568
+ input_format: SAME
569
+ instance: Linear
570
+ output_format: SAME
571
+ weight_format: SAME
572
+ weight_sparseness: DENSE
573
+ model.decoder.layers.14.final_layer_norm:
574
+ approximation_function: NONE
575
+ bias_format: SAME
576
+ input_format: SAME
577
+ instance: LayerNorm
578
+ output_format: SAME
579
+ weight_format: SAME
580
+ model.decoder.layers.14.self_attn.dropout:
581
+ approximation_function: NONE
582
+ input_format: SAME
583
+ instance: Dropout
584
+ output_format: SAME
585
+ model.decoder.layers.14.self_attn.k_proj:
586
+ accum_format: SAME
587
+ approximation_function: NONE
588
+ bias_format: SAME
589
+ input_format: SAME
590
+ instance: Linear
591
+ output_format: SAME
592
+ weight_format: SAME
593
+ weight_sparseness: DENSE
594
+ model.decoder.layers.14.self_attn.out_proj:
595
+ accum_format: SAME
596
+ approximation_function: NONE
597
+ bias_format: SAME
598
+ input_format: SAME
599
+ instance: Linear
600
+ output_format: SAME
601
+ weight_format: SAME
602
+ weight_sparseness: DENSE
603
+ model.decoder.layers.14.self_attn.q_proj:
604
+ accum_format: SAME
605
+ approximation_function: NONE
606
+ bias_format: SAME
607
+ input_format: SAME
608
+ instance: Linear
609
+ output_format: SAME
610
+ weight_format: SAME
611
+ weight_sparseness: DENSE
612
+ model.decoder.layers.14.self_attn.softmax:
613
+ approximation_function: NONE
614
+ input_format: SAME
615
+ instance: Softmax
616
+ output_format: SAME
617
+ model.decoder.layers.14.self_attn.v_proj:
618
+ accum_format: SAME
619
+ approximation_function: NONE
620
+ bias_format: SAME
621
+ input_format: SAME
622
+ instance: Linear
623
+ output_format: SAME
624
+ weight_format: SAME
625
+ weight_sparseness: DENSE
626
+ model.decoder.layers.14.self_attn_layer_norm:
627
+ approximation_function: NONE
628
+ bias_format: SAME
629
+ input_format: SAME
630
+ instance: LayerNorm
631
+ output_format: SAME
632
+ weight_format: SAME
633
+ model.decoder.layers.15.activation_fn:
634
+ approximation_function: NONE
635
+ input_format: SAME
636
+ instance: ReLU
637
+ output_format: SAME
638
+ model.decoder.layers.15.dropout:
639
+ approximation_function: NONE
640
+ input_format: SAME
641
+ instance: Dropout
642
+ output_format: SAME
643
+ model.decoder.layers.15.fc1:
644
+ accum_format: SAME
645
+ approximation_function: NONE
646
+ bias_format: SAME
647
+ input_format: SAME
648
+ instance: Linear
649
+ output_format: SAME
650
+ weight_format: SAME
651
+ weight_sparseness: DENSE
652
+ model.decoder.layers.15.fc2:
653
+ accum_format: SAME
654
+ approximation_function: NONE
655
+ bias_format: SAME
656
+ input_format: SAME
657
+ instance: Linear
658
+ output_format: SAME
659
+ weight_format: SAME
660
+ weight_sparseness: DENSE
661
+ model.decoder.layers.15.final_layer_norm:
662
+ approximation_function: NONE
663
+ bias_format: SAME
664
+ input_format: SAME
665
+ instance: LayerNorm
666
+ output_format: SAME
667
+ weight_format: SAME
668
+ model.decoder.layers.15.self_attn.dropout:
669
+ approximation_function: NONE
670
+ input_format: SAME
671
+ instance: Dropout
672
+ output_format: SAME
673
+ model.decoder.layers.15.self_attn.k_proj:
674
+ accum_format: SAME
675
+ approximation_function: NONE
676
+ bias_format: SAME
677
+ input_format: SAME
678
+ instance: Linear
679
+ output_format: SAME
680
+ weight_format: SAME
681
+ weight_sparseness: DENSE
682
+ model.decoder.layers.15.self_attn.out_proj:
683
+ accum_format: SAME
684
+ approximation_function: NONE
685
+ bias_format: SAME
686
+ input_format: SAME
687
+ instance: Linear
688
+ output_format: SAME
689
+ weight_format: SAME
690
+ weight_sparseness: DENSE
691
+ model.decoder.layers.15.self_attn.q_proj:
692
+ accum_format: SAME
693
+ approximation_function: NONE
694
+ bias_format: SAME
695
+ input_format: SAME
696
+ instance: Linear
697
+ output_format: SAME
698
+ weight_format: SAME
699
+ weight_sparseness: DENSE
700
+ model.decoder.layers.15.self_attn.softmax:
701
+ approximation_function: NONE
702
+ input_format: SAME
703
+ instance: Softmax
704
+ output_format: SAME
705
+ model.decoder.layers.15.self_attn.v_proj:
706
+ accum_format: SAME
707
+ approximation_function: NONE
708
+ bias_format: SAME
709
+ input_format: SAME
710
+ instance: Linear
711
+ output_format: SAME
712
+ weight_format: SAME
713
+ weight_sparseness: DENSE
714
+ model.decoder.layers.15.self_attn_layer_norm:
715
+ approximation_function: NONE
716
+ bias_format: SAME
717
+ input_format: SAME
718
+ instance: LayerNorm
719
+ output_format: SAME
720
+ weight_format: SAME
721
+ model.decoder.layers.16.activation_fn:
722
+ approximation_function: NONE
723
+ input_format: SAME
724
+ instance: ReLU
725
+ output_format: SAME
726
+ model.decoder.layers.16.dropout:
727
+ approximation_function: NONE
728
+ input_format: SAME
729
+ instance: Dropout
730
+ output_format: SAME
731
+ model.decoder.layers.16.fc1:
732
+ accum_format: SAME
733
+ approximation_function: NONE
734
+ bias_format: SAME
735
+ input_format: SAME
736
+ instance: Linear
737
+ output_format: SAME
738
+ weight_format: SAME
739
+ weight_sparseness: DENSE
740
+ model.decoder.layers.16.fc2:
741
+ accum_format: SAME
742
+ approximation_function: NONE
743
+ bias_format: SAME
744
+ input_format: SAME
745
+ instance: Linear
746
+ output_format: SAME
747
+ weight_format: SAME
748
+ weight_sparseness: DENSE
749
+ model.decoder.layers.16.final_layer_norm:
750
+ approximation_function: NONE
751
+ bias_format: SAME
752
+ input_format: SAME
753
+ instance: LayerNorm
754
+ output_format: SAME
755
+ weight_format: SAME
756
+ model.decoder.layers.16.self_attn.dropout:
757
+ approximation_function: NONE
758
+ input_format: SAME
759
+ instance: Dropout
760
+ output_format: SAME
761
+ model.decoder.layers.16.self_attn.k_proj:
762
+ accum_format: SAME
763
+ approximation_function: NONE
764
+ bias_format: SAME
765
+ input_format: SAME
766
+ instance: Linear
767
+ output_format: SAME
768
+ weight_format: SAME
769
+ weight_sparseness: DENSE
770
+ model.decoder.layers.16.self_attn.out_proj:
771
+ accum_format: SAME
772
+ approximation_function: NONE
773
+ bias_format: SAME
774
+ input_format: SAME
775
+ instance: Linear
776
+ output_format: SAME
777
+ weight_format: SAME
778
+ weight_sparseness: DENSE
779
+ model.decoder.layers.16.self_attn.q_proj:
780
+ accum_format: SAME
781
+ approximation_function: NONE
782
+ bias_format: SAME
783
+ input_format: SAME
784
+ instance: Linear
785
+ output_format: SAME
786
+ weight_format: SAME
787
+ weight_sparseness: DENSE
788
+ model.decoder.layers.16.self_attn.softmax:
789
+ approximation_function: NONE
790
+ input_format: SAME
791
+ instance: Softmax
792
+ output_format: SAME
793
+ model.decoder.layers.16.self_attn.v_proj:
794
+ accum_format: SAME
795
+ approximation_function: NONE
796
+ bias_format: SAME
797
+ input_format: SAME
798
+ instance: Linear
799
+ output_format: SAME
800
+ weight_format: SAME
801
+ weight_sparseness: DENSE
802
+ model.decoder.layers.16.self_attn_layer_norm:
803
+ approximation_function: NONE
804
+ bias_format: SAME
805
+ input_format: SAME
806
+ instance: LayerNorm
807
+ output_format: SAME
808
+ weight_format: SAME
809
+ model.decoder.layers.17.activation_fn:
810
+ approximation_function: NONE
811
+ input_format: SAME
812
+ instance: ReLU
813
+ output_format: SAME
814
+ model.decoder.layers.17.dropout:
815
+ approximation_function: NONE
816
+ input_format: SAME
817
+ instance: Dropout
818
+ output_format: SAME
819
+ model.decoder.layers.17.fc1:
820
+ accum_format: SAME
821
+ approximation_function: NONE
822
+ bias_format: SAME
823
+ input_format: SAME
824
+ instance: Linear
825
+ output_format: SAME
826
+ weight_format: SAME
827
+ weight_sparseness: DENSE
828
+ model.decoder.layers.17.fc2:
829
+ accum_format: SAME
830
+ approximation_function: NONE
831
+ bias_format: SAME
832
+ input_format: SAME
833
+ instance: Linear
834
+ output_format: SAME
835
+ weight_format: SAME
836
+ weight_sparseness: DENSE
837
+ model.decoder.layers.17.final_layer_norm:
838
+ approximation_function: NONE
839
+ bias_format: SAME
840
+ input_format: SAME
841
+ instance: LayerNorm
842
+ output_format: SAME
843
+ weight_format: SAME
844
+ model.decoder.layers.17.self_attn.dropout:
845
+ approximation_function: NONE
846
+ input_format: SAME
847
+ instance: Dropout
848
+ output_format: SAME
849
+ model.decoder.layers.17.self_attn.k_proj:
850
+ accum_format: SAME
851
+ approximation_function: NONE
852
+ bias_format: SAME
853
+ input_format: SAME
854
+ instance: Linear
855
+ output_format: SAME
856
+ weight_format: SAME
857
+ weight_sparseness: DENSE
858
+ model.decoder.layers.17.self_attn.out_proj:
859
+ accum_format: SAME
860
+ approximation_function: NONE
861
+ bias_format: SAME
862
+ input_format: SAME
863
+ instance: Linear
864
+ output_format: SAME
865
+ weight_format: SAME
866
+ weight_sparseness: DENSE
867
+ model.decoder.layers.17.self_attn.q_proj:
868
+ accum_format: SAME
869
+ approximation_function: NONE
870
+ bias_format: SAME
871
+ input_format: SAME
872
+ instance: Linear
873
+ output_format: SAME
874
+ weight_format: SAME
875
+ weight_sparseness: DENSE
876
+ model.decoder.layers.17.self_attn.softmax:
877
+ approximation_function: NONE
878
+ input_format: SAME
879
+ instance: Softmax
880
+ output_format: SAME
881
+ model.decoder.layers.17.self_attn.v_proj:
882
+ accum_format: SAME
883
+ approximation_function: NONE
884
+ bias_format: SAME
885
+ input_format: SAME
886
+ instance: Linear
887
+ output_format: SAME
888
+ weight_format: SAME
889
+ weight_sparseness: DENSE
890
+ model.decoder.layers.17.self_attn_layer_norm:
891
+ approximation_function: NONE
892
+ bias_format: SAME
893
+ input_format: SAME
894
+ instance: LayerNorm
895
+ output_format: SAME
896
+ weight_format: SAME
897
+ model.decoder.layers.18.activation_fn:
898
+ approximation_function: NONE
899
+ input_format: SAME
900
+ instance: ReLU
901
+ output_format: SAME
902
+ model.decoder.layers.18.dropout:
903
+ approximation_function: NONE
904
+ input_format: SAME
905
+ instance: Dropout
906
+ output_format: SAME
907
+ model.decoder.layers.18.fc1:
908
+ accum_format: SAME
909
+ approximation_function: NONE
910
+ bias_format: SAME
911
+ input_format: SAME
912
+ instance: Linear
913
+ output_format: SAME
914
+ weight_format: SAME
915
+ weight_sparseness: DENSE
916
+ model.decoder.layers.18.fc2:
917
+ accum_format: SAME
918
+ approximation_function: NONE
919
+ bias_format: SAME
920
+ input_format: SAME
921
+ instance: Linear
922
+ output_format: SAME
923
+ weight_format: SAME
924
+ weight_sparseness: DENSE
925
+ model.decoder.layers.18.final_layer_norm:
926
+ approximation_function: NONE
927
+ bias_format: SAME
928
+ input_format: SAME
929
+ instance: LayerNorm
930
+ output_format: SAME
931
+ weight_format: SAME
932
+ model.decoder.layers.18.self_attn.dropout:
933
+ approximation_function: NONE
934
+ input_format: SAME
935
+ instance: Dropout
936
+ output_format: SAME
937
+ model.decoder.layers.18.self_attn.k_proj:
938
+ accum_format: SAME
939
+ approximation_function: NONE
940
+ bias_format: SAME
941
+ input_format: SAME
942
+ instance: Linear
943
+ output_format: SAME
944
+ weight_format: SAME
945
+ weight_sparseness: DENSE
946
+ model.decoder.layers.18.self_attn.out_proj:
947
+ accum_format: SAME
948
+ approximation_function: NONE
949
+ bias_format: SAME
950
+ input_format: SAME
951
+ instance: Linear
952
+ output_format: SAME
953
+ weight_format: SAME
954
+ weight_sparseness: DENSE
955
+ model.decoder.layers.18.self_attn.q_proj:
956
+ accum_format: SAME
957
+ approximation_function: NONE
958
+ bias_format: SAME
959
+ input_format: SAME
960
+ instance: Linear
961
+ output_format: SAME
962
+ weight_format: SAME
963
+ weight_sparseness: DENSE
964
+ model.decoder.layers.18.self_attn.softmax:
965
+ approximation_function: NONE
966
+ input_format: SAME
967
+ instance: Softmax
968
+ output_format: SAME
969
+ model.decoder.layers.18.self_attn.v_proj:
970
+ accum_format: SAME
971
+ approximation_function: NONE
972
+ bias_format: SAME
973
+ input_format: SAME
974
+ instance: Linear
975
+ output_format: SAME
976
+ weight_format: SAME
977
+ weight_sparseness: DENSE
978
+ model.decoder.layers.18.self_attn_layer_norm:
979
+ approximation_function: NONE
980
+ bias_format: SAME
981
+ input_format: SAME
982
+ instance: LayerNorm
983
+ output_format: SAME
984
+ weight_format: SAME
985
+ model.decoder.layers.19.activation_fn:
986
+ approximation_function: NONE
987
+ input_format: SAME
988
+ instance: ReLU
989
+ output_format: SAME
990
+ model.decoder.layers.19.dropout:
991
+ approximation_function: NONE
992
+ input_format: SAME
993
+ instance: Dropout
994
+ output_format: SAME
995
+ model.decoder.layers.19.fc1:
996
+ accum_format: SAME
997
+ approximation_function: NONE
998
+ bias_format: SAME
999
+ input_format: SAME
1000
+ instance: Linear
1001
+ output_format: SAME
1002
+ weight_format: SAME
1003
+ weight_sparseness: DENSE
1004
+ model.decoder.layers.19.fc2:
1005
+ accum_format: SAME
1006
+ approximation_function: NONE
1007
+ bias_format: SAME
1008
+ input_format: SAME
1009
+ instance: Linear
1010
+ output_format: SAME
1011
+ weight_format: SAME
1012
+ weight_sparseness: DENSE
1013
+ model.decoder.layers.19.final_layer_norm:
1014
+ approximation_function: NONE
1015
+ bias_format: SAME
1016
+ input_format: SAME
1017
+ instance: LayerNorm
1018
+ output_format: SAME
1019
+ weight_format: SAME
1020
+ model.decoder.layers.19.self_attn.dropout:
1021
+ approximation_function: NONE
1022
+ input_format: SAME
1023
+ instance: Dropout
1024
+ output_format: SAME
1025
+ model.decoder.layers.19.self_attn.k_proj:
1026
+ accum_format: SAME
1027
+ approximation_function: NONE
1028
+ bias_format: SAME
1029
+ input_format: SAME
1030
+ instance: Linear
1031
+ output_format: SAME
1032
+ weight_format: SAME
1033
+ weight_sparseness: DENSE
1034
+ model.decoder.layers.19.self_attn.out_proj:
1035
+ accum_format: SAME
1036
+ approximation_function: NONE
1037
+ bias_format: SAME
1038
+ input_format: SAME
1039
+ instance: Linear
1040
+ output_format: SAME
1041
+ weight_format: SAME
1042
+ weight_sparseness: DENSE
1043
+ model.decoder.layers.19.self_attn.q_proj:
1044
+ accum_format: SAME
1045
+ approximation_function: NONE
1046
+ bias_format: SAME
1047
+ input_format: SAME
1048
+ instance: Linear
1049
+ output_format: SAME
1050
+ weight_format: SAME
1051
+ weight_sparseness: DENSE
1052
+ model.decoder.layers.19.self_attn.softmax:
1053
+ approximation_function: NONE
1054
+ input_format: SAME
1055
+ instance: Softmax
1056
+ output_format: SAME
1057
+ model.decoder.layers.19.self_attn.v_proj:
1058
+ accum_format: SAME
1059
+ approximation_function: NONE
1060
+ bias_format: SAME
1061
+ input_format: SAME
1062
+ instance: Linear
1063
+ output_format: SAME
1064
+ weight_format: SAME
1065
+ weight_sparseness: DENSE
1066
+ model.decoder.layers.19.self_attn_layer_norm:
1067
+ approximation_function: NONE
1068
+ bias_format: SAME
1069
+ input_format: SAME
1070
+ instance: LayerNorm
1071
+ output_format: SAME
1072
+ weight_format: SAME
1073
+ model.decoder.layers.2.activation_fn:
1074
+ approximation_function: NONE
1075
+ input_format: SAME
1076
+ instance: ReLU
1077
+ output_format: SAME
1078
+ model.decoder.layers.2.dropout:
1079
+ approximation_function: NONE
1080
+ input_format: SAME
1081
+ instance: Dropout
1082
+ output_format: SAME
1083
+ model.decoder.layers.2.fc1:
1084
+ accum_format: SAME
1085
+ approximation_function: NONE
1086
+ bias_format: SAME
1087
+ input_format: SAME
1088
+ instance: Linear
1089
+ output_format: SAME
1090
+ weight_format: SAME
1091
+ weight_sparseness: DENSE
1092
+ model.decoder.layers.2.fc2:
1093
+ accum_format: SAME
1094
+ approximation_function: NONE
1095
+ bias_format: SAME
1096
+ input_format: SAME
1097
+ instance: Linear
1098
+ output_format: SAME
1099
+ weight_format: SAME
1100
+ weight_sparseness: DENSE
1101
+ model.decoder.layers.2.final_layer_norm:
1102
+ approximation_function: NONE
1103
+ bias_format: SAME
1104
+ input_format: SAME
1105
+ instance: LayerNorm
1106
+ output_format: SAME
1107
+ weight_format: SAME
1108
+ model.decoder.layers.2.self_attn.dropout:
1109
+ approximation_function: NONE
1110
+ input_format: SAME
1111
+ instance: Dropout
1112
+ output_format: SAME
1113
+ model.decoder.layers.2.self_attn.k_proj:
1114
+ accum_format: SAME
1115
+ approximation_function: NONE
1116
+ bias_format: SAME
1117
+ input_format: SAME
1118
+ instance: Linear
1119
+ output_format: SAME
1120
+ weight_format: SAME
1121
+ weight_sparseness: DENSE
1122
+ model.decoder.layers.2.self_attn.out_proj:
1123
+ accum_format: SAME
1124
+ approximation_function: NONE
1125
+ bias_format: SAME
1126
+ input_format: SAME
1127
+ instance: Linear
1128
+ output_format: SAME
1129
+ weight_format: SAME
1130
+ weight_sparseness: DENSE
1131
+ model.decoder.layers.2.self_attn.q_proj:
1132
+ accum_format: SAME
1133
+ approximation_function: NONE
1134
+ bias_format: SAME
1135
+ input_format: SAME
1136
+ instance: Linear
1137
+ output_format: SAME
1138
+ weight_format: SAME
1139
+ weight_sparseness: DENSE
1140
+ model.decoder.layers.2.self_attn.softmax:
1141
+ approximation_function: NONE
1142
+ input_format: SAME
1143
+ instance: Softmax
1144
+ output_format: SAME
1145
+ model.decoder.layers.2.self_attn.v_proj:
1146
+ accum_format: SAME
1147
+ approximation_function: NONE
1148
+ bias_format: SAME
1149
+ input_format: SAME
1150
+ instance: Linear
1151
+ output_format: SAME
1152
+ weight_format: SAME
1153
+ weight_sparseness: DENSE
1154
+ model.decoder.layers.2.self_attn_layer_norm:
1155
+ approximation_function: NONE
1156
+ bias_format: SAME
1157
+ input_format: SAME
1158
+ instance: LayerNorm
1159
+ output_format: SAME
1160
+ weight_format: SAME
1161
+ model.decoder.layers.20.activation_fn:
1162
+ approximation_function: NONE
1163
+ input_format: SAME
1164
+ instance: ReLU
1165
+ output_format: SAME
1166
+ model.decoder.layers.20.dropout:
1167
+ approximation_function: NONE
1168
+ input_format: SAME
1169
+ instance: Dropout
1170
+ output_format: SAME
1171
+ model.decoder.layers.20.fc1:
1172
+ accum_format: SAME
1173
+ approximation_function: NONE
1174
+ bias_format: SAME
1175
+ input_format: SAME
1176
+ instance: Linear
1177
+ output_format: SAME
1178
+ weight_format: SAME
1179
+ weight_sparseness: DENSE
1180
+ model.decoder.layers.20.fc2:
1181
+ accum_format: SAME
1182
+ approximation_function: NONE
1183
+ bias_format: SAME
1184
+ input_format: SAME
1185
+ instance: Linear
1186
+ output_format: SAME
1187
+ weight_format: SAME
1188
+ weight_sparseness: DENSE
1189
+ model.decoder.layers.20.final_layer_norm:
1190
+ approximation_function: NONE
1191
+ bias_format: SAME
1192
+ input_format: SAME
1193
+ instance: LayerNorm
1194
+ output_format: SAME
1195
+ weight_format: SAME
1196
+ model.decoder.layers.20.self_attn.dropout:
1197
+ approximation_function: NONE
1198
+ input_format: SAME
1199
+ instance: Dropout
1200
+ output_format: SAME
1201
+ model.decoder.layers.20.self_attn.k_proj:
1202
+ accum_format: SAME
1203
+ approximation_function: NONE
1204
+ bias_format: SAME
1205
+ input_format: SAME
1206
+ instance: Linear
1207
+ output_format: SAME
1208
+ weight_format: SAME
1209
+ weight_sparseness: DENSE
1210
+ model.decoder.layers.20.self_attn.out_proj:
1211
+ accum_format: SAME
1212
+ approximation_function: NONE
1213
+ bias_format: SAME
1214
+ input_format: SAME
1215
+ instance: Linear
1216
+ output_format: SAME
1217
+ weight_format: SAME
1218
+ weight_sparseness: DENSE
1219
+ model.decoder.layers.20.self_attn.q_proj:
1220
+ accum_format: SAME
1221
+ approximation_function: NONE
1222
+ bias_format: SAME
1223
+ input_format: SAME
1224
+ instance: Linear
1225
+ output_format: SAME
1226
+ weight_format: SAME
1227
+ weight_sparseness: DENSE
1228
+ model.decoder.layers.20.self_attn.softmax:
1229
+ approximation_function: NONE
1230
+ input_format: SAME
1231
+ instance: Softmax
1232
+ output_format: SAME
1233
+ model.decoder.layers.20.self_attn.v_proj:
1234
+ accum_format: SAME
1235
+ approximation_function: NONE
1236
+ bias_format: SAME
1237
+ input_format: SAME
1238
+ instance: Linear
1239
+ output_format: SAME
1240
+ weight_format: SAME
1241
+ weight_sparseness: DENSE
1242
+ model.decoder.layers.20.self_attn_layer_norm:
1243
+ approximation_function: NONE
1244
+ bias_format: SAME
1245
+ input_format: SAME
1246
+ instance: LayerNorm
1247
+ output_format: SAME
1248
+ weight_format: SAME
1249
+ model.decoder.layers.21.activation_fn:
1250
+ approximation_function: NONE
1251
+ input_format: SAME
1252
+ instance: ReLU
1253
+ output_format: SAME
1254
+ model.decoder.layers.21.dropout:
1255
+ approximation_function: NONE
1256
+ input_format: SAME
1257
+ instance: Dropout
1258
+ output_format: SAME
1259
+ model.decoder.layers.21.fc1:
1260
+ accum_format: SAME
1261
+ approximation_function: NONE
1262
+ bias_format: SAME
1263
+ input_format: SAME
1264
+ instance: Linear
1265
+ output_format: SAME
1266
+ weight_format: SAME
1267
+ weight_sparseness: DENSE
1268
+ model.decoder.layers.21.fc2:
1269
+ accum_format: SAME
1270
+ approximation_function: NONE
1271
+ bias_format: SAME
1272
+ input_format: SAME
1273
+ instance: Linear
1274
+ output_format: SAME
1275
+ weight_format: SAME
1276
+ weight_sparseness: DENSE
1277
+ model.decoder.layers.21.final_layer_norm:
1278
+ approximation_function: NONE
1279
+ bias_format: SAME
1280
+ input_format: SAME
1281
+ instance: LayerNorm
1282
+ output_format: SAME
1283
+ weight_format: SAME
1284
+ model.decoder.layers.21.self_attn.dropout:
1285
+ approximation_function: NONE
1286
+ input_format: SAME
1287
+ instance: Dropout
1288
+ output_format: SAME
1289
+ model.decoder.layers.21.self_attn.k_proj:
1290
+ accum_format: SAME
1291
+ approximation_function: NONE
1292
+ bias_format: SAME
1293
+ input_format: SAME
1294
+ instance: Linear
1295
+ output_format: SAME
1296
+ weight_format: SAME
1297
+ weight_sparseness: DENSE
1298
+ model.decoder.layers.21.self_attn.out_proj:
1299
+ accum_format: SAME
1300
+ approximation_function: NONE
1301
+ bias_format: SAME
1302
+ input_format: SAME
1303
+ instance: Linear
1304
+ output_format: SAME
1305
+ weight_format: SAME
1306
+ weight_sparseness: DENSE
1307
+ model.decoder.layers.21.self_attn.q_proj:
1308
+ accum_format: SAME
1309
+ approximation_function: NONE
1310
+ bias_format: SAME
1311
+ input_format: SAME
1312
+ instance: Linear
1313
+ output_format: SAME
1314
+ weight_format: SAME
1315
+ weight_sparseness: DENSE
1316
+ model.decoder.layers.21.self_attn.softmax:
1317
+ approximation_function: NONE
1318
+ input_format: SAME
1319
+ instance: Softmax
1320
+ output_format: SAME
1321
+ model.decoder.layers.21.self_attn.v_proj:
1322
+ accum_format: SAME
1323
+ approximation_function: NONE
1324
+ bias_format: SAME
1325
+ input_format: SAME
1326
+ instance: Linear
1327
+ output_format: SAME
1328
+ weight_format: SAME
1329
+ weight_sparseness: DENSE
1330
+ model.decoder.layers.21.self_attn_layer_norm:
1331
+ approximation_function: NONE
1332
+ bias_format: SAME
1333
+ input_format: SAME
1334
+ instance: LayerNorm
1335
+ output_format: SAME
1336
+ weight_format: SAME
1337
+ model.decoder.layers.22.activation_fn:
1338
+ approximation_function: NONE
1339
+ input_format: SAME
1340
+ instance: ReLU
1341
+ output_format: SAME
1342
+ model.decoder.layers.22.dropout:
1343
+ approximation_function: NONE
1344
+ input_format: SAME
1345
+ instance: Dropout
1346
+ output_format: SAME
1347
+ model.decoder.layers.22.fc1:
1348
+ accum_format: SAME
1349
+ approximation_function: NONE
1350
+ bias_format: SAME
1351
+ input_format: SAME
1352
+ instance: Linear
1353
+ output_format: SAME
1354
+ weight_format: SAME
1355
+ weight_sparseness: DENSE
1356
+ model.decoder.layers.22.fc2:
1357
+ accum_format: SAME
1358
+ approximation_function: NONE
1359
+ bias_format: SAME
1360
+ input_format: SAME
1361
+ instance: Linear
1362
+ output_format: SAME
1363
+ weight_format: SAME
1364
+ weight_sparseness: DENSE
1365
+ model.decoder.layers.22.final_layer_norm:
1366
+ approximation_function: NONE
1367
+ bias_format: SAME
1368
+ input_format: SAME
1369
+ instance: LayerNorm
1370
+ output_format: SAME
1371
+ weight_format: SAME
1372
+ model.decoder.layers.22.self_attn.dropout:
1373
+ approximation_function: NONE
1374
+ input_format: SAME
1375
+ instance: Dropout
1376
+ output_format: SAME
1377
+ model.decoder.layers.22.self_attn.k_proj:
1378
+ accum_format: SAME
1379
+ approximation_function: NONE
1380
+ bias_format: SAME
1381
+ input_format: SAME
1382
+ instance: Linear
1383
+ output_format: SAME
1384
+ weight_format: SAME
1385
+ weight_sparseness: DENSE
1386
+ model.decoder.layers.22.self_attn.out_proj:
1387
+ accum_format: SAME
1388
+ approximation_function: NONE
1389
+ bias_format: SAME
1390
+ input_format: SAME
1391
+ instance: Linear
1392
+ output_format: SAME
1393
+ weight_format: SAME
1394
+ weight_sparseness: DENSE
1395
+ model.decoder.layers.22.self_attn.q_proj:
1396
+ accum_format: SAME
1397
+ approximation_function: NONE
1398
+ bias_format: SAME
1399
+ input_format: SAME
1400
+ instance: Linear
1401
+ output_format: SAME
1402
+ weight_format: SAME
1403
+ weight_sparseness: DENSE
1404
+ model.decoder.layers.22.self_attn.softmax:
1405
+ approximation_function: NONE
1406
+ input_format: SAME
1407
+ instance: Softmax
1408
+ output_format: SAME
1409
+ model.decoder.layers.22.self_attn.v_proj:
1410
+ accum_format: SAME
1411
+ approximation_function: NONE
1412
+ bias_format: SAME
1413
+ input_format: SAME
1414
+ instance: Linear
1415
+ output_format: SAME
1416
+ weight_format: SAME
1417
+ weight_sparseness: DENSE
1418
+ model.decoder.layers.22.self_attn_layer_norm:
1419
+ approximation_function: NONE
1420
+ bias_format: SAME
1421
+ input_format: SAME
1422
+ instance: LayerNorm
1423
+ output_format: SAME
1424
+ weight_format: SAME
1425
+ model.decoder.layers.23.activation_fn:
1426
+ approximation_function: NONE
1427
+ input_format: SAME
1428
+ instance: ReLU
1429
+ output_format: SAME
1430
+ model.decoder.layers.23.dropout:
1431
+ approximation_function: NONE
1432
+ input_format: SAME
1433
+ instance: Dropout
1434
+ output_format: SAME
1435
+ model.decoder.layers.23.fc1:
1436
+ accum_format: SAME
1437
+ approximation_function: NONE
1438
+ bias_format: SAME
1439
+ input_format: SAME
1440
+ instance: Linear
1441
+ output_format: SAME
1442
+ weight_format: SAME
1443
+ weight_sparseness: DENSE
1444
+ model.decoder.layers.23.fc2:
1445
+ accum_format: SAME
1446
+ approximation_function: NONE
1447
+ bias_format: SAME
1448
+ input_format: SAME
1449
+ instance: Linear
1450
+ output_format: SAME
1451
+ weight_format: SAME
1452
+ weight_sparseness: DENSE
1453
+ model.decoder.layers.23.final_layer_norm:
1454
+ approximation_function: NONE
1455
+ bias_format: SAME
1456
+ input_format: SAME
1457
+ instance: LayerNorm
1458
+ output_format: SAME
1459
+ weight_format: SAME
1460
+ model.decoder.layers.23.self_attn.dropout:
1461
+ approximation_function: NONE
1462
+ input_format: SAME
1463
+ instance: Dropout
1464
+ output_format: SAME
1465
+ model.decoder.layers.23.self_attn.k_proj:
1466
+ accum_format: SAME
1467
+ approximation_function: NONE
1468
+ bias_format: SAME
1469
+ input_format: SAME
1470
+ instance: Linear
1471
+ output_format: SAME
1472
+ weight_format: SAME
1473
+ weight_sparseness: DENSE
1474
+ model.decoder.layers.23.self_attn.out_proj:
1475
+ accum_format: SAME
1476
+ approximation_function: NONE
1477
+ bias_format: SAME
1478
+ input_format: SAME
1479
+ instance: Linear
1480
+ output_format: SAME
1481
+ weight_format: SAME
1482
+ weight_sparseness: DENSE
1483
+ model.decoder.layers.23.self_attn.q_proj:
1484
+ accum_format: SAME
1485
+ approximation_function: NONE
1486
+ bias_format: SAME
1487
+ input_format: SAME
1488
+ instance: Linear
1489
+ output_format: SAME
1490
+ weight_format: SAME
1491
+ weight_sparseness: DENSE
1492
+ model.decoder.layers.23.self_attn.softmax:
1493
+ approximation_function: NONE
1494
+ input_format: SAME
1495
+ instance: Softmax
1496
+ output_format: SAME
1497
+ model.decoder.layers.23.self_attn.v_proj:
1498
+ accum_format: SAME
1499
+ approximation_function: NONE
1500
+ bias_format: SAME
1501
+ input_format: SAME
1502
+ instance: Linear
1503
+ output_format: SAME
1504
+ weight_format: SAME
1505
+ weight_sparseness: DENSE
1506
+ model.decoder.layers.23.self_attn_layer_norm:
1507
+ approximation_function: NONE
1508
+ bias_format: SAME
1509
+ input_format: SAME
1510
+ instance: LayerNorm
1511
+ output_format: SAME
1512
+ weight_format: SAME
1513
+ model.decoder.layers.24.activation_fn:
1514
+ approximation_function: NONE
1515
+ input_format: SAME
1516
+ instance: ReLU
1517
+ output_format: SAME
1518
+ model.decoder.layers.24.dropout:
1519
+ approximation_function: NONE
1520
+ input_format: SAME
1521
+ instance: Dropout
1522
+ output_format: SAME
1523
+ model.decoder.layers.24.fc1:
1524
+ accum_format: SAME
1525
+ approximation_function: NONE
1526
+ bias_format: SAME
1527
+ input_format: SAME
1528
+ instance: Linear
1529
+ output_format: SAME
1530
+ weight_format: SAME
1531
+ weight_sparseness: DENSE
1532
+ model.decoder.layers.24.fc2:
1533
+ accum_format: SAME
1534
+ approximation_function: NONE
1535
+ bias_format: SAME
1536
+ input_format: SAME
1537
+ instance: Linear
1538
+ output_format: SAME
1539
+ weight_format: SAME
1540
+ weight_sparseness: DENSE
1541
+ model.decoder.layers.24.final_layer_norm:
1542
+ approximation_function: NONE
1543
+ bias_format: SAME
1544
+ input_format: SAME
1545
+ instance: LayerNorm
1546
+ output_format: SAME
1547
+ weight_format: SAME
1548
+ model.decoder.layers.24.self_attn.dropout:
1549
+ approximation_function: NONE
1550
+ input_format: SAME
1551
+ instance: Dropout
1552
+ output_format: SAME
1553
+ model.decoder.layers.24.self_attn.k_proj:
1554
+ accum_format: SAME
1555
+ approximation_function: NONE
1556
+ bias_format: SAME
1557
+ input_format: SAME
1558
+ instance: Linear
1559
+ output_format: SAME
1560
+ weight_format: SAME
1561
+ weight_sparseness: DENSE
1562
+ model.decoder.layers.24.self_attn.out_proj:
1563
+ accum_format: SAME
1564
+ approximation_function: NONE
1565
+ bias_format: SAME
1566
+ input_format: SAME
1567
+ instance: Linear
1568
+ output_format: SAME
1569
+ weight_format: SAME
1570
+ weight_sparseness: DENSE
1571
+ model.decoder.layers.24.self_attn.q_proj:
1572
+ accum_format: SAME
1573
+ approximation_function: NONE
1574
+ bias_format: SAME
1575
+ input_format: SAME
1576
+ instance: Linear
1577
+ output_format: SAME
1578
+ weight_format: SAME
1579
+ weight_sparseness: DENSE
1580
+ model.decoder.layers.24.self_attn.softmax:
1581
+ approximation_function: NONE
1582
+ input_format: SAME
1583
+ instance: Softmax
1584
+ output_format: SAME
1585
+ model.decoder.layers.24.self_attn.v_proj:
1586
+ accum_format: SAME
1587
+ approximation_function: NONE
1588
+ bias_format: SAME
1589
+ input_format: SAME
1590
+ instance: Linear
1591
+ output_format: SAME
1592
+ weight_format: SAME
1593
+ weight_sparseness: DENSE
1594
+ model.decoder.layers.24.self_attn_layer_norm:
1595
+ approximation_function: NONE
1596
+ bias_format: SAME
1597
+ input_format: SAME
1598
+ instance: LayerNorm
1599
+ output_format: SAME
1600
+ weight_format: SAME
1601
+ model.decoder.layers.25.activation_fn:
1602
+ approximation_function: NONE
1603
+ input_format: SAME
1604
+ instance: ReLU
1605
+ output_format: SAME
1606
+ model.decoder.layers.25.dropout:
1607
+ approximation_function: NONE
1608
+ input_format: SAME
1609
+ instance: Dropout
1610
+ output_format: SAME
1611
+ model.decoder.layers.25.fc1:
1612
+ accum_format: SAME
1613
+ approximation_function: NONE
1614
+ bias_format: SAME
1615
+ input_format: SAME
1616
+ instance: Linear
1617
+ output_format: SAME
1618
+ weight_format: SAME
1619
+ weight_sparseness: DENSE
1620
+ model.decoder.layers.25.fc2:
1621
+ accum_format: SAME
1622
+ approximation_function: NONE
1623
+ bias_format: SAME
1624
+ input_format: SAME
1625
+ instance: Linear
1626
+ output_format: SAME
1627
+ weight_format: SAME
1628
+ weight_sparseness: DENSE
1629
+ model.decoder.layers.25.final_layer_norm:
1630
+ approximation_function: NONE
1631
+ bias_format: SAME
1632
+ input_format: SAME
1633
+ instance: LayerNorm
1634
+ output_format: SAME
1635
+ weight_format: SAME
1636
+ model.decoder.layers.25.self_attn.dropout:
1637
+ approximation_function: NONE
1638
+ input_format: SAME
1639
+ instance: Dropout
1640
+ output_format: SAME
1641
+ model.decoder.layers.25.self_attn.k_proj:
1642
+ accum_format: SAME
1643
+ approximation_function: NONE
1644
+ bias_format: SAME
1645
+ input_format: SAME
1646
+ instance: Linear
1647
+ output_format: SAME
1648
+ weight_format: SAME
1649
+ weight_sparseness: DENSE
1650
+ model.decoder.layers.25.self_attn.out_proj:
1651
+ accum_format: SAME
1652
+ approximation_function: NONE
1653
+ bias_format: SAME
1654
+ input_format: SAME
1655
+ instance: Linear
1656
+ output_format: SAME
1657
+ weight_format: SAME
1658
+ weight_sparseness: DENSE
1659
+ model.decoder.layers.25.self_attn.q_proj:
1660
+ accum_format: SAME
1661
+ approximation_function: NONE
1662
+ bias_format: SAME
1663
+ input_format: SAME
1664
+ instance: Linear
1665
+ output_format: SAME
1666
+ weight_format: SAME
1667
+ weight_sparseness: DENSE
1668
+ model.decoder.layers.25.self_attn.softmax:
1669
+ approximation_function: NONE
1670
+ input_format: SAME
1671
+ instance: Softmax
1672
+ output_format: SAME
1673
+ model.decoder.layers.25.self_attn.v_proj:
1674
+ accum_format: SAME
1675
+ approximation_function: NONE
1676
+ bias_format: SAME
1677
+ input_format: SAME
1678
+ instance: Linear
1679
+ output_format: SAME
1680
+ weight_format: SAME
1681
+ weight_sparseness: DENSE
1682
+ model.decoder.layers.25.self_attn_layer_norm:
1683
+ approximation_function: NONE
1684
+ bias_format: SAME
1685
+ input_format: SAME
1686
+ instance: LayerNorm
1687
+ output_format: SAME
1688
+ weight_format: SAME
1689
+ model.decoder.layers.26.activation_fn:
1690
+ approximation_function: NONE
1691
+ input_format: SAME
1692
+ instance: ReLU
1693
+ output_format: SAME
1694
+ model.decoder.layers.26.dropout:
1695
+ approximation_function: NONE
1696
+ input_format: SAME
1697
+ instance: Dropout
1698
+ output_format: SAME
1699
+ model.decoder.layers.26.fc1:
1700
+ accum_format: SAME
1701
+ approximation_function: NONE
1702
+ bias_format: SAME
1703
+ input_format: SAME
1704
+ instance: Linear
1705
+ output_format: SAME
1706
+ weight_format: SAME
1707
+ weight_sparseness: DENSE
1708
+ model.decoder.layers.26.fc2:
1709
+ accum_format: SAME
1710
+ approximation_function: NONE
1711
+ bias_format: SAME
1712
+ input_format: SAME
1713
+ instance: Linear
1714
+ output_format: SAME
1715
+ weight_format: SAME
1716
+ weight_sparseness: DENSE
1717
+ model.decoder.layers.26.final_layer_norm:
1718
+ approximation_function: NONE
1719
+ bias_format: SAME
1720
+ input_format: SAME
1721
+ instance: LayerNorm
1722
+ output_format: SAME
1723
+ weight_format: SAME
1724
+ model.decoder.layers.26.self_attn.dropout:
1725
+ approximation_function: NONE
1726
+ input_format: SAME
1727
+ instance: Dropout
1728
+ output_format: SAME
1729
+ model.decoder.layers.26.self_attn.k_proj:
1730
+ accum_format: SAME
1731
+ approximation_function: NONE
1732
+ bias_format: SAME
1733
+ input_format: SAME
1734
+ instance: Linear
1735
+ output_format: SAME
1736
+ weight_format: SAME
1737
+ weight_sparseness: DENSE
1738
+ model.decoder.layers.26.self_attn.out_proj:
1739
+ accum_format: SAME
1740
+ approximation_function: NONE
1741
+ bias_format: SAME
1742
+ input_format: SAME
1743
+ instance: Linear
1744
+ output_format: SAME
1745
+ weight_format: SAME
1746
+ weight_sparseness: DENSE
1747
+ model.decoder.layers.26.self_attn.q_proj:
1748
+ accum_format: SAME
1749
+ approximation_function: NONE
1750
+ bias_format: SAME
1751
+ input_format: SAME
1752
+ instance: Linear
1753
+ output_format: SAME
1754
+ weight_format: SAME
1755
+ weight_sparseness: DENSE
1756
+ model.decoder.layers.26.self_attn.softmax:
1757
+ approximation_function: NONE
1758
+ input_format: SAME
1759
+ instance: Softmax
1760
+ output_format: SAME
1761
+ model.decoder.layers.26.self_attn.v_proj:
1762
+ accum_format: SAME
1763
+ approximation_function: NONE
1764
+ bias_format: SAME
1765
+ input_format: SAME
1766
+ instance: Linear
1767
+ output_format: SAME
1768
+ weight_format: SAME
1769
+ weight_sparseness: DENSE
1770
+ model.decoder.layers.26.self_attn_layer_norm:
1771
+ approximation_function: NONE
1772
+ bias_format: SAME
1773
+ input_format: SAME
1774
+ instance: LayerNorm
1775
+ output_format: SAME
1776
+ weight_format: SAME
1777
+ model.decoder.layers.27.activation_fn:
1778
+ approximation_function: NONE
1779
+ input_format: SAME
1780
+ instance: ReLU
1781
+ output_format: SAME
1782
+ model.decoder.layers.27.dropout:
1783
+ approximation_function: NONE
1784
+ input_format: SAME
1785
+ instance: Dropout
1786
+ output_format: SAME
1787
+ model.decoder.layers.27.fc1:
1788
+ accum_format: SAME
1789
+ approximation_function: NONE
1790
+ bias_format: SAME
1791
+ input_format: SAME
1792
+ instance: Linear
1793
+ output_format: SAME
1794
+ weight_format: SAME
1795
+ weight_sparseness: DENSE
1796
+ model.decoder.layers.27.fc2:
1797
+ accum_format: SAME
1798
+ approximation_function: NONE
1799
+ bias_format: SAME
1800
+ input_format: SAME
1801
+ instance: Linear
1802
+ output_format: SAME
1803
+ weight_format: SAME
1804
+ weight_sparseness: DENSE
1805
+ model.decoder.layers.27.final_layer_norm:
1806
+ approximation_function: NONE
1807
+ bias_format: SAME
1808
+ input_format: SAME
1809
+ instance: LayerNorm
1810
+ output_format: SAME
1811
+ weight_format: SAME
1812
+ model.decoder.layers.27.self_attn.dropout:
1813
+ approximation_function: NONE
1814
+ input_format: SAME
1815
+ instance: Dropout
1816
+ output_format: SAME
1817
+ model.decoder.layers.27.self_attn.k_proj:
1818
+ accum_format: SAME
1819
+ approximation_function: NONE
1820
+ bias_format: SAME
1821
+ input_format: SAME
1822
+ instance: Linear
1823
+ output_format: SAME
1824
+ weight_format: SAME
1825
+ weight_sparseness: DENSE
1826
+ model.decoder.layers.27.self_attn.out_proj:
1827
+ accum_format: SAME
1828
+ approximation_function: NONE
1829
+ bias_format: SAME
1830
+ input_format: SAME
1831
+ instance: Linear
1832
+ output_format: SAME
1833
+ weight_format: SAME
1834
+ weight_sparseness: DENSE
1835
+ model.decoder.layers.27.self_attn.q_proj:
1836
+ accum_format: SAME
1837
+ approximation_function: NONE
1838
+ bias_format: SAME
1839
+ input_format: SAME
1840
+ instance: Linear
1841
+ output_format: SAME
1842
+ weight_format: SAME
1843
+ weight_sparseness: DENSE
1844
+ model.decoder.layers.27.self_attn.softmax:
1845
+ approximation_function: NONE
1846
+ input_format: SAME
1847
+ instance: Softmax
1848
+ output_format: SAME
1849
+ model.decoder.layers.27.self_attn.v_proj:
1850
+ accum_format: SAME
1851
+ approximation_function: NONE
1852
+ bias_format: SAME
1853
+ input_format: SAME
1854
+ instance: Linear
1855
+ output_format: SAME
1856
+ weight_format: SAME
1857
+ weight_sparseness: DENSE
1858
+ model.decoder.layers.27.self_attn_layer_norm:
1859
+ approximation_function: NONE
1860
+ bias_format: SAME
1861
+ input_format: SAME
1862
+ instance: LayerNorm
1863
+ output_format: SAME
1864
+ weight_format: SAME
1865
+ model.decoder.layers.28.activation_fn:
1866
+ approximation_function: NONE
1867
+ input_format: SAME
1868
+ instance: ReLU
1869
+ output_format: SAME
1870
+ model.decoder.layers.28.dropout:
1871
+ approximation_function: NONE
1872
+ input_format: SAME
1873
+ instance: Dropout
1874
+ output_format: SAME
1875
+ model.decoder.layers.28.fc1:
1876
+ accum_format: SAME
1877
+ approximation_function: NONE
1878
+ bias_format: SAME
1879
+ input_format: SAME
1880
+ instance: Linear
1881
+ output_format: SAME
1882
+ weight_format: SAME
1883
+ weight_sparseness: DENSE
1884
+ model.decoder.layers.28.fc2:
1885
+ accum_format: SAME
1886
+ approximation_function: NONE
1887
+ bias_format: SAME
1888
+ input_format: SAME
1889
+ instance: Linear
1890
+ output_format: SAME
1891
+ weight_format: SAME
1892
+ weight_sparseness: DENSE
1893
+ model.decoder.layers.28.final_layer_norm:
1894
+ approximation_function: NONE
1895
+ bias_format: SAME
1896
+ input_format: SAME
1897
+ instance: LayerNorm
1898
+ output_format: SAME
1899
+ weight_format: SAME
1900
+ model.decoder.layers.28.self_attn.dropout:
1901
+ approximation_function: NONE
1902
+ input_format: SAME
1903
+ instance: Dropout
1904
+ output_format: SAME
1905
+ model.decoder.layers.28.self_attn.k_proj:
1906
+ accum_format: SAME
1907
+ approximation_function: NONE
1908
+ bias_format: SAME
1909
+ input_format: SAME
1910
+ instance: Linear
1911
+ output_format: SAME
1912
+ weight_format: SAME
1913
+ weight_sparseness: DENSE
1914
+ model.decoder.layers.28.self_attn.out_proj:
1915
+ accum_format: SAME
1916
+ approximation_function: NONE
1917
+ bias_format: SAME
1918
+ input_format: SAME
1919
+ instance: Linear
1920
+ output_format: SAME
1921
+ weight_format: SAME
1922
+ weight_sparseness: DENSE
1923
+ model.decoder.layers.28.self_attn.q_proj:
1924
+ accum_format: SAME
1925
+ approximation_function: NONE
1926
+ bias_format: SAME
1927
+ input_format: SAME
1928
+ instance: Linear
1929
+ output_format: SAME
1930
+ weight_format: SAME
1931
+ weight_sparseness: DENSE
1932
+ model.decoder.layers.28.self_attn.softmax:
1933
+ approximation_function: NONE
1934
+ input_format: SAME
1935
+ instance: Softmax
1936
+ output_format: SAME
1937
+ model.decoder.layers.28.self_attn.v_proj:
1938
+ accum_format: SAME
1939
+ approximation_function: NONE
1940
+ bias_format: SAME
1941
+ input_format: SAME
1942
+ instance: Linear
1943
+ output_format: SAME
1944
+ weight_format: SAME
1945
+ weight_sparseness: DENSE
1946
+ model.decoder.layers.28.self_attn_layer_norm:
1947
+ approximation_function: NONE
1948
+ bias_format: SAME
1949
+ input_format: SAME
1950
+ instance: LayerNorm
1951
+ output_format: SAME
1952
+ weight_format: SAME
1953
+ model.decoder.layers.29.activation_fn:
1954
+ approximation_function: NONE
1955
+ input_format: SAME
1956
+ instance: ReLU
1957
+ output_format: SAME
1958
+ model.decoder.layers.29.dropout:
1959
+ approximation_function: NONE
1960
+ input_format: SAME
1961
+ instance: Dropout
1962
+ output_format: SAME
1963
+ model.decoder.layers.29.fc1:
1964
+ accum_format: SAME
1965
+ approximation_function: NONE
1966
+ bias_format: SAME
1967
+ input_format: SAME
1968
+ instance: Linear
1969
+ output_format: SAME
1970
+ weight_format: SAME
1971
+ weight_sparseness: DENSE
1972
+ model.decoder.layers.29.fc2:
1973
+ accum_format: SAME
1974
+ approximation_function: NONE
1975
+ bias_format: SAME
1976
+ input_format: SAME
1977
+ instance: Linear
1978
+ output_format: SAME
1979
+ weight_format: SAME
1980
+ weight_sparseness: DENSE
1981
+ model.decoder.layers.29.final_layer_norm:
1982
+ approximation_function: NONE
1983
+ bias_format: SAME
1984
+ input_format: SAME
1985
+ instance: LayerNorm
1986
+ output_format: SAME
1987
+ weight_format: SAME
1988
+ model.decoder.layers.29.self_attn.dropout:
1989
+ approximation_function: NONE
1990
+ input_format: SAME
1991
+ instance: Dropout
1992
+ output_format: SAME
1993
+ model.decoder.layers.29.self_attn.k_proj:
1994
+ accum_format: SAME
1995
+ approximation_function: NONE
1996
+ bias_format: SAME
1997
+ input_format: SAME
1998
+ instance: Linear
1999
+ output_format: SAME
2000
+ weight_format: SAME
2001
+ weight_sparseness: DENSE
2002
+ model.decoder.layers.29.self_attn.out_proj:
2003
+ accum_format: SAME
2004
+ approximation_function: NONE
2005
+ bias_format: SAME
2006
+ input_format: SAME
2007
+ instance: Linear
2008
+ output_format: SAME
2009
+ weight_format: SAME
2010
+ weight_sparseness: DENSE
2011
+ model.decoder.layers.29.self_attn.q_proj:
2012
+ accum_format: SAME
2013
+ approximation_function: NONE
2014
+ bias_format: SAME
2015
+ input_format: SAME
2016
+ instance: Linear
2017
+ output_format: SAME
2018
+ weight_format: SAME
2019
+ weight_sparseness: DENSE
2020
+ model.decoder.layers.29.self_attn.softmax:
2021
+ approximation_function: NONE
2022
+ input_format: SAME
2023
+ instance: Softmax
2024
+ output_format: SAME
2025
+ model.decoder.layers.29.self_attn.v_proj:
2026
+ accum_format: SAME
2027
+ approximation_function: NONE
2028
+ bias_format: SAME
2029
+ input_format: SAME
2030
+ instance: Linear
2031
+ output_format: SAME
2032
+ weight_format: SAME
2033
+ weight_sparseness: DENSE
2034
+ model.decoder.layers.29.self_attn_layer_norm:
2035
+ approximation_function: NONE
2036
+ bias_format: SAME
2037
+ input_format: SAME
2038
+ instance: LayerNorm
2039
+ output_format: SAME
2040
+ weight_format: SAME
2041
+ model.decoder.layers.3.activation_fn:
2042
+ approximation_function: NONE
2043
+ input_format: SAME
2044
+ instance: ReLU
2045
+ output_format: SAME
2046
+ model.decoder.layers.3.dropout:
2047
+ approximation_function: NONE
2048
+ input_format: SAME
2049
+ instance: Dropout
2050
+ output_format: SAME
2051
+ model.decoder.layers.3.fc1:
2052
+ accum_format: SAME
2053
+ approximation_function: NONE
2054
+ bias_format: SAME
2055
+ input_format: SAME
2056
+ instance: Linear
2057
+ output_format: SAME
2058
+ weight_format: SAME
2059
+ weight_sparseness: DENSE
2060
+ model.decoder.layers.3.fc2:
2061
+ accum_format: SAME
2062
+ approximation_function: NONE
2063
+ bias_format: SAME
2064
+ input_format: SAME
2065
+ instance: Linear
2066
+ output_format: SAME
2067
+ weight_format: SAME
2068
+ weight_sparseness: DENSE
2069
+ model.decoder.layers.3.final_layer_norm:
2070
+ approximation_function: NONE
2071
+ bias_format: SAME
2072
+ input_format: SAME
2073
+ instance: LayerNorm
2074
+ output_format: SAME
2075
+ weight_format: SAME
2076
+ model.decoder.layers.3.self_attn.dropout:
2077
+ approximation_function: NONE
2078
+ input_format: SAME
2079
+ instance: Dropout
2080
+ output_format: SAME
2081
+ model.decoder.layers.3.self_attn.k_proj:
2082
+ accum_format: SAME
2083
+ approximation_function: NONE
2084
+ bias_format: SAME
2085
+ input_format: SAME
2086
+ instance: Linear
2087
+ output_format: SAME
2088
+ weight_format: SAME
2089
+ weight_sparseness: DENSE
2090
+ model.decoder.layers.3.self_attn.out_proj:
2091
+ accum_format: SAME
2092
+ approximation_function: NONE
2093
+ bias_format: SAME
2094
+ input_format: SAME
2095
+ instance: Linear
2096
+ output_format: SAME
2097
+ weight_format: SAME
2098
+ weight_sparseness: DENSE
2099
+ model.decoder.layers.3.self_attn.q_proj:
2100
+ accum_format: SAME
2101
+ approximation_function: NONE
2102
+ bias_format: SAME
2103
+ input_format: SAME
2104
+ instance: Linear
2105
+ output_format: SAME
2106
+ weight_format: SAME
2107
+ weight_sparseness: DENSE
2108
+ model.decoder.layers.3.self_attn.softmax:
2109
+ approximation_function: NONE
2110
+ input_format: SAME
2111
+ instance: Softmax
2112
+ output_format: SAME
2113
+ model.decoder.layers.3.self_attn.v_proj:
2114
+ accum_format: SAME
2115
+ approximation_function: NONE
2116
+ bias_format: SAME
2117
+ input_format: SAME
2118
+ instance: Linear
2119
+ output_format: SAME
2120
+ weight_format: SAME
2121
+ weight_sparseness: DENSE
2122
+ model.decoder.layers.3.self_attn_layer_norm:
2123
+ approximation_function: NONE
2124
+ bias_format: SAME
2125
+ input_format: SAME
2126
+ instance: LayerNorm
2127
+ output_format: SAME
2128
+ weight_format: SAME
2129
+ model.decoder.layers.30.activation_fn:
2130
+ approximation_function: NONE
2131
+ input_format: SAME
2132
+ instance: ReLU
2133
+ output_format: SAME
2134
+ model.decoder.layers.30.dropout:
2135
+ approximation_function: NONE
2136
+ input_format: SAME
2137
+ instance: Dropout
2138
+ output_format: SAME
2139
+ model.decoder.layers.30.fc1:
2140
+ accum_format: SAME
2141
+ approximation_function: NONE
2142
+ bias_format: SAME
2143
+ input_format: SAME
2144
+ instance: Linear
2145
+ output_format: SAME
2146
+ weight_format: SAME
2147
+ weight_sparseness: DENSE
2148
+ model.decoder.layers.30.fc2:
2149
+ accum_format: SAME
2150
+ approximation_function: NONE
2151
+ bias_format: SAME
2152
+ input_format: SAME
2153
+ instance: Linear
2154
+ output_format: SAME
2155
+ weight_format: SAME
2156
+ weight_sparseness: DENSE
2157
+ model.decoder.layers.30.final_layer_norm:
2158
+ approximation_function: NONE
2159
+ bias_format: SAME
2160
+ input_format: SAME
2161
+ instance: LayerNorm
2162
+ output_format: SAME
2163
+ weight_format: SAME
2164
+ model.decoder.layers.30.self_attn.dropout:
2165
+ approximation_function: NONE
2166
+ input_format: SAME
2167
+ instance: Dropout
2168
+ output_format: SAME
2169
+ model.decoder.layers.30.self_attn.k_proj:
2170
+ accum_format: SAME
2171
+ approximation_function: NONE
2172
+ bias_format: SAME
2173
+ input_format: SAME
2174
+ instance: Linear
2175
+ output_format: SAME
2176
+ weight_format: SAME
2177
+ weight_sparseness: DENSE
2178
+ model.decoder.layers.30.self_attn.out_proj:
2179
+ accum_format: SAME
2180
+ approximation_function: NONE
2181
+ bias_format: SAME
2182
+ input_format: SAME
2183
+ instance: Linear
2184
+ output_format: SAME
2185
+ weight_format: SAME
2186
+ weight_sparseness: DENSE
2187
+ model.decoder.layers.30.self_attn.q_proj:
2188
+ accum_format: SAME
2189
+ approximation_function: NONE
2190
+ bias_format: SAME
2191
+ input_format: SAME
2192
+ instance: Linear
2193
+ output_format: SAME
2194
+ weight_format: SAME
2195
+ weight_sparseness: DENSE
2196
+ model.decoder.layers.30.self_attn.softmax:
2197
+ approximation_function: NONE
2198
+ input_format: SAME
2199
+ instance: Softmax
2200
+ output_format: SAME
2201
+ model.decoder.layers.30.self_attn.v_proj:
2202
+ accum_format: SAME
2203
+ approximation_function: NONE
2204
+ bias_format: SAME
2205
+ input_format: SAME
2206
+ instance: Linear
2207
+ output_format: SAME
2208
+ weight_format: SAME
2209
+ weight_sparseness: DENSE
2210
+ model.decoder.layers.30.self_attn_layer_norm:
2211
+ approximation_function: NONE
2212
+ bias_format: SAME
2213
+ input_format: SAME
2214
+ instance: LayerNorm
2215
+ output_format: SAME
2216
+ weight_format: SAME
2217
+ model.decoder.layers.31.activation_fn:
2218
+ approximation_function: NONE
2219
+ input_format: SAME
2220
+ instance: ReLU
2221
+ output_format: SAME
2222
+ model.decoder.layers.31.dropout:
2223
+ approximation_function: NONE
2224
+ input_format: SAME
2225
+ instance: Dropout
2226
+ output_format: SAME
2227
+ model.decoder.layers.31.fc1:
2228
+ accum_format: SAME
2229
+ approximation_function: NONE
2230
+ bias_format: SAME
2231
+ input_format: SAME
2232
+ instance: Linear
2233
+ output_format: SAME
2234
+ weight_format: SAME
2235
+ weight_sparseness: DENSE
2236
+ model.decoder.layers.31.fc2:
2237
+ accum_format: SAME
2238
+ approximation_function: NONE
2239
+ bias_format: SAME
2240
+ input_format: SAME
2241
+ instance: Linear
2242
+ output_format: SAME
2243
+ weight_format: SAME
2244
+ weight_sparseness: DENSE
2245
+ model.decoder.layers.31.final_layer_norm:
2246
+ approximation_function: NONE
2247
+ bias_format: SAME
2248
+ input_format: SAME
2249
+ instance: LayerNorm
2250
+ output_format: SAME
2251
+ weight_format: SAME
2252
+ model.decoder.layers.31.self_attn.dropout:
2253
+ approximation_function: NONE
2254
+ input_format: SAME
2255
+ instance: Dropout
2256
+ output_format: SAME
2257
+ model.decoder.layers.31.self_attn.k_proj:
2258
+ accum_format: SAME
2259
+ approximation_function: NONE
2260
+ bias_format: SAME
2261
+ input_format: SAME
2262
+ instance: Linear
2263
+ output_format: SAME
2264
+ weight_format: SAME
2265
+ weight_sparseness: DENSE
2266
+ model.decoder.layers.31.self_attn.out_proj:
2267
+ accum_format: SAME
2268
+ approximation_function: NONE
2269
+ bias_format: SAME
2270
+ input_format: SAME
2271
+ instance: Linear
2272
+ output_format: SAME
2273
+ weight_format: SAME
2274
+ weight_sparseness: DENSE
2275
+ model.decoder.layers.31.self_attn.q_proj:
2276
+ accum_format: SAME
2277
+ approximation_function: NONE
2278
+ bias_format: SAME
2279
+ input_format: SAME
2280
+ instance: Linear
2281
+ output_format: SAME
2282
+ weight_format: SAME
2283
+ weight_sparseness: DENSE
2284
+ model.decoder.layers.31.self_attn.softmax:
2285
+ approximation_function: NONE
2286
+ input_format: SAME
2287
+ instance: Softmax
2288
+ output_format: SAME
2289
+ model.decoder.layers.31.self_attn.v_proj:
2290
+ accum_format: SAME
2291
+ approximation_function: NONE
2292
+ bias_format: SAME
2293
+ input_format: SAME
2294
+ instance: Linear
2295
+ output_format: SAME
2296
+ weight_format: SAME
2297
+ weight_sparseness: DENSE
2298
+ model.decoder.layers.31.self_attn_layer_norm:
2299
+ approximation_function: NONE
2300
+ bias_format: SAME
2301
+ input_format: SAME
2302
+ instance: LayerNorm
2303
+ output_format: SAME
2304
+ weight_format: SAME
2305
+ model.decoder.layers.4.activation_fn:
2306
+ approximation_function: NONE
2307
+ input_format: SAME
2308
+ instance: ReLU
2309
+ output_format: SAME
2310
+ model.decoder.layers.4.dropout:
2311
+ approximation_function: NONE
2312
+ input_format: SAME
2313
+ instance: Dropout
2314
+ output_format: SAME
2315
+ model.decoder.layers.4.fc1:
2316
+ accum_format: SAME
2317
+ approximation_function: NONE
2318
+ bias_format: SAME
2319
+ input_format: SAME
2320
+ instance: Linear
2321
+ output_format: SAME
2322
+ weight_format: SAME
2323
+ weight_sparseness: DENSE
2324
+ model.decoder.layers.4.fc2:
2325
+ accum_format: SAME
2326
+ approximation_function: NONE
2327
+ bias_format: SAME
2328
+ input_format: SAME
2329
+ instance: Linear
2330
+ output_format: SAME
2331
+ weight_format: SAME
2332
+ weight_sparseness: DENSE
2333
+ model.decoder.layers.4.final_layer_norm:
2334
+ approximation_function: NONE
2335
+ bias_format: SAME
2336
+ input_format: SAME
2337
+ instance: LayerNorm
2338
+ output_format: SAME
2339
+ weight_format: SAME
2340
+ model.decoder.layers.4.self_attn.dropout:
2341
+ approximation_function: NONE
2342
+ input_format: SAME
2343
+ instance: Dropout
2344
+ output_format: SAME
2345
+ model.decoder.layers.4.self_attn.k_proj:
2346
+ accum_format: SAME
2347
+ approximation_function: NONE
2348
+ bias_format: SAME
2349
+ input_format: SAME
2350
+ instance: Linear
2351
+ output_format: SAME
2352
+ weight_format: SAME
2353
+ weight_sparseness: DENSE
2354
+ model.decoder.layers.4.self_attn.out_proj:
2355
+ accum_format: SAME
2356
+ approximation_function: NONE
2357
+ bias_format: SAME
2358
+ input_format: SAME
2359
+ instance: Linear
2360
+ output_format: SAME
2361
+ weight_format: SAME
2362
+ weight_sparseness: DENSE
2363
+ model.decoder.layers.4.self_attn.q_proj:
2364
+ accum_format: SAME
2365
+ approximation_function: NONE
2366
+ bias_format: SAME
2367
+ input_format: SAME
2368
+ instance: Linear
2369
+ output_format: SAME
2370
+ weight_format: SAME
2371
+ weight_sparseness: DENSE
2372
+ model.decoder.layers.4.self_attn.softmax:
2373
+ approximation_function: NONE
2374
+ input_format: SAME
2375
+ instance: Softmax
2376
+ output_format: SAME
2377
+ model.decoder.layers.4.self_attn.v_proj:
2378
+ accum_format: SAME
2379
+ approximation_function: NONE
2380
+ bias_format: SAME
2381
+ input_format: SAME
2382
+ instance: Linear
2383
+ output_format: SAME
2384
+ weight_format: SAME
2385
+ weight_sparseness: DENSE
2386
+ model.decoder.layers.4.self_attn_layer_norm:
2387
+ approximation_function: NONE
2388
+ bias_format: SAME
2389
+ input_format: SAME
2390
+ instance: LayerNorm
2391
+ output_format: SAME
2392
+ weight_format: SAME
2393
+ model.decoder.layers.5.activation_fn:
2394
+ approximation_function: NONE
2395
+ input_format: SAME
2396
+ instance: ReLU
2397
+ output_format: SAME
2398
+ model.decoder.layers.5.dropout:
2399
+ approximation_function: NONE
2400
+ input_format: SAME
2401
+ instance: Dropout
2402
+ output_format: SAME
2403
+ model.decoder.layers.5.fc1:
2404
+ accum_format: SAME
2405
+ approximation_function: NONE
2406
+ bias_format: SAME
2407
+ input_format: SAME
2408
+ instance: Linear
2409
+ output_format: SAME
2410
+ weight_format: SAME
2411
+ weight_sparseness: DENSE
2412
+ model.decoder.layers.5.fc2:
2413
+ accum_format: SAME
2414
+ approximation_function: NONE
2415
+ bias_format: SAME
2416
+ input_format: SAME
2417
+ instance: Linear
2418
+ output_format: SAME
2419
+ weight_format: SAME
2420
+ weight_sparseness: DENSE
2421
+ model.decoder.layers.5.final_layer_norm:
2422
+ approximation_function: NONE
2423
+ bias_format: SAME
2424
+ input_format: SAME
2425
+ instance: LayerNorm
2426
+ output_format: SAME
2427
+ weight_format: SAME
2428
+ model.decoder.layers.5.self_attn.dropout:
2429
+ approximation_function: NONE
2430
+ input_format: SAME
2431
+ instance: Dropout
2432
+ output_format: SAME
2433
+ model.decoder.layers.5.self_attn.k_proj:
2434
+ accum_format: SAME
2435
+ approximation_function: NONE
2436
+ bias_format: SAME
2437
+ input_format: SAME
2438
+ instance: Linear
2439
+ output_format: SAME
2440
+ weight_format: SAME
2441
+ weight_sparseness: DENSE
2442
+ model.decoder.layers.5.self_attn.out_proj:
2443
+ accum_format: SAME
2444
+ approximation_function: NONE
2445
+ bias_format: SAME
2446
+ input_format: SAME
2447
+ instance: Linear
2448
+ output_format: SAME
2449
+ weight_format: SAME
2450
+ weight_sparseness: DENSE
2451
+ model.decoder.layers.5.self_attn.q_proj:
2452
+ accum_format: SAME
2453
+ approximation_function: NONE
2454
+ bias_format: SAME
2455
+ input_format: SAME
2456
+ instance: Linear
2457
+ output_format: SAME
2458
+ weight_format: SAME
2459
+ weight_sparseness: DENSE
2460
+ model.decoder.layers.5.self_attn.softmax:
2461
+ approximation_function: NONE
2462
+ input_format: SAME
2463
+ instance: Softmax
2464
+ output_format: SAME
2465
+ model.decoder.layers.5.self_attn.v_proj:
2466
+ accum_format: SAME
2467
+ approximation_function: NONE
2468
+ bias_format: SAME
2469
+ input_format: SAME
2470
+ instance: Linear
2471
+ output_format: SAME
2472
+ weight_format: SAME
2473
+ weight_sparseness: DENSE
2474
+ model.decoder.layers.5.self_attn_layer_norm:
2475
+ approximation_function: NONE
2476
+ bias_format: SAME
2477
+ input_format: SAME
2478
+ instance: LayerNorm
2479
+ output_format: SAME
2480
+ weight_format: SAME
2481
+ model.decoder.layers.6.activation_fn:
2482
+ approximation_function: NONE
2483
+ input_format: SAME
2484
+ instance: ReLU
2485
+ output_format: SAME
2486
+ model.decoder.layers.6.dropout:
2487
+ approximation_function: NONE
2488
+ input_format: SAME
2489
+ instance: Dropout
2490
+ output_format: SAME
2491
+ model.decoder.layers.6.fc1:
2492
+ accum_format: SAME
2493
+ approximation_function: NONE
2494
+ bias_format: SAME
2495
+ input_format: SAME
2496
+ instance: Linear
2497
+ output_format: SAME
2498
+ weight_format: SAME
2499
+ weight_sparseness: DENSE
2500
+ model.decoder.layers.6.fc2:
2501
+ accum_format: SAME
2502
+ approximation_function: NONE
2503
+ bias_format: SAME
2504
+ input_format: SAME
2505
+ instance: Linear
2506
+ output_format: SAME
2507
+ weight_format: SAME
2508
+ weight_sparseness: DENSE
2509
+ model.decoder.layers.6.final_layer_norm:
2510
+ approximation_function: NONE
2511
+ bias_format: SAME
2512
+ input_format: SAME
2513
+ instance: LayerNorm
2514
+ output_format: SAME
2515
+ weight_format: SAME
2516
+ model.decoder.layers.6.self_attn.dropout:
2517
+ approximation_function: NONE
2518
+ input_format: SAME
2519
+ instance: Dropout
2520
+ output_format: SAME
2521
+ model.decoder.layers.6.self_attn.k_proj:
2522
+ accum_format: SAME
2523
+ approximation_function: NONE
2524
+ bias_format: SAME
2525
+ input_format: SAME
2526
+ instance: Linear
2527
+ output_format: SAME
2528
+ weight_format: SAME
2529
+ weight_sparseness: DENSE
2530
+ model.decoder.layers.6.self_attn.out_proj:
2531
+ accum_format: SAME
2532
+ approximation_function: NONE
2533
+ bias_format: SAME
2534
+ input_format: SAME
2535
+ instance: Linear
2536
+ output_format: SAME
2537
+ weight_format: SAME
2538
+ weight_sparseness: DENSE
2539
+ model.decoder.layers.6.self_attn.q_proj:
2540
+ accum_format: SAME
2541
+ approximation_function: NONE
2542
+ bias_format: SAME
2543
+ input_format: SAME
2544
+ instance: Linear
2545
+ output_format: SAME
2546
+ weight_format: SAME
2547
+ weight_sparseness: DENSE
2548
+ model.decoder.layers.6.self_attn.softmax:
2549
+ approximation_function: NONE
2550
+ input_format: SAME
2551
+ instance: Softmax
2552
+ output_format: SAME
2553
+ model.decoder.layers.6.self_attn.v_proj:
2554
+ accum_format: SAME
2555
+ approximation_function: NONE
2556
+ bias_format: SAME
2557
+ input_format: SAME
2558
+ instance: Linear
2559
+ output_format: SAME
2560
+ weight_format: SAME
2561
+ weight_sparseness: DENSE
2562
+ model.decoder.layers.6.self_attn_layer_norm:
2563
+ approximation_function: NONE
2564
+ bias_format: SAME
2565
+ input_format: SAME
2566
+ instance: LayerNorm
2567
+ output_format: SAME
2568
+ weight_format: SAME
2569
+ model.decoder.layers.7.activation_fn:
2570
+ approximation_function: NONE
2571
+ input_format: SAME
2572
+ instance: ReLU
2573
+ output_format: SAME
2574
+ model.decoder.layers.7.dropout:
2575
+ approximation_function: NONE
2576
+ input_format: SAME
2577
+ instance: Dropout
2578
+ output_format: SAME
2579
+ model.decoder.layers.7.fc1:
2580
+ accum_format: SAME
2581
+ approximation_function: NONE
2582
+ bias_format: SAME
2583
+ input_format: SAME
2584
+ instance: Linear
2585
+ output_format: SAME
2586
+ weight_format: SAME
2587
+ weight_sparseness: DENSE
2588
+ model.decoder.layers.7.fc2:
2589
+ accum_format: SAME
2590
+ approximation_function: NONE
2591
+ bias_format: SAME
2592
+ input_format: SAME
2593
+ instance: Linear
2594
+ output_format: SAME
2595
+ weight_format: SAME
2596
+ weight_sparseness: DENSE
2597
+ model.decoder.layers.7.final_layer_norm:
2598
+ approximation_function: NONE
2599
+ bias_format: SAME
2600
+ input_format: SAME
2601
+ instance: LayerNorm
2602
+ output_format: SAME
2603
+ weight_format: SAME
2604
+ model.decoder.layers.7.self_attn.dropout:
2605
+ approximation_function: NONE
2606
+ input_format: SAME
2607
+ instance: Dropout
2608
+ output_format: SAME
2609
+ model.decoder.layers.7.self_attn.k_proj:
2610
+ accum_format: SAME
2611
+ approximation_function: NONE
2612
+ bias_format: SAME
2613
+ input_format: SAME
2614
+ instance: Linear
2615
+ output_format: SAME
2616
+ weight_format: SAME
2617
+ weight_sparseness: DENSE
2618
+ model.decoder.layers.7.self_attn.out_proj:
2619
+ accum_format: SAME
2620
+ approximation_function: NONE
2621
+ bias_format: SAME
2622
+ input_format: SAME
2623
+ instance: Linear
2624
+ output_format: SAME
2625
+ weight_format: SAME
2626
+ weight_sparseness: DENSE
2627
+ model.decoder.layers.7.self_attn.q_proj:
2628
+ accum_format: SAME
2629
+ approximation_function: NONE
2630
+ bias_format: SAME
2631
+ input_format: SAME
2632
+ instance: Linear
2633
+ output_format: SAME
2634
+ weight_format: SAME
2635
+ weight_sparseness: DENSE
2636
+ model.decoder.layers.7.self_attn.softmax:
2637
+ approximation_function: NONE
2638
+ input_format: SAME
2639
+ instance: Softmax
2640
+ output_format: SAME
2641
+ model.decoder.layers.7.self_attn.v_proj:
2642
+ accum_format: SAME
2643
+ approximation_function: NONE
2644
+ bias_format: SAME
2645
+ input_format: SAME
2646
+ instance: Linear
2647
+ output_format: SAME
2648
+ weight_format: SAME
2649
+ weight_sparseness: DENSE
2650
+ model.decoder.layers.7.self_attn_layer_norm:
2651
+ approximation_function: NONE
2652
+ bias_format: SAME
2653
+ input_format: SAME
2654
+ instance: LayerNorm
2655
+ output_format: SAME
2656
+ weight_format: SAME
2657
+ model.decoder.layers.8.activation_fn:
2658
+ approximation_function: NONE
2659
+ input_format: SAME
2660
+ instance: ReLU
2661
+ output_format: SAME
2662
+ model.decoder.layers.8.dropout:
2663
+ approximation_function: NONE
2664
+ input_format: SAME
2665
+ instance: Dropout
2666
+ output_format: SAME
2667
+ model.decoder.layers.8.fc1:
2668
+ accum_format: SAME
2669
+ approximation_function: NONE
2670
+ bias_format: SAME
2671
+ input_format: SAME
2672
+ instance: Linear
2673
+ output_format: SAME
2674
+ weight_format: SAME
2675
+ weight_sparseness: DENSE
2676
+ model.decoder.layers.8.fc2:
2677
+ accum_format: SAME
2678
+ approximation_function: NONE
2679
+ bias_format: SAME
2680
+ input_format: SAME
2681
+ instance: Linear
2682
+ output_format: SAME
2683
+ weight_format: SAME
2684
+ weight_sparseness: DENSE
2685
+ model.decoder.layers.8.final_layer_norm:
2686
+ approximation_function: NONE
2687
+ bias_format: SAME
2688
+ input_format: SAME
2689
+ instance: LayerNorm
2690
+ output_format: SAME
2691
+ weight_format: SAME
2692
+ model.decoder.layers.8.self_attn.dropout:
2693
+ approximation_function: NONE
2694
+ input_format: SAME
2695
+ instance: Dropout
2696
+ output_format: SAME
2697
+ model.decoder.layers.8.self_attn.k_proj:
2698
+ accum_format: SAME
2699
+ approximation_function: NONE
2700
+ bias_format: SAME
2701
+ input_format: SAME
2702
+ instance: Linear
2703
+ output_format: SAME
2704
+ weight_format: SAME
2705
+ weight_sparseness: DENSE
2706
+ model.decoder.layers.8.self_attn.out_proj:
2707
+ accum_format: SAME
2708
+ approximation_function: NONE
2709
+ bias_format: SAME
2710
+ input_format: SAME
2711
+ instance: Linear
2712
+ output_format: SAME
2713
+ weight_format: SAME
2714
+ weight_sparseness: DENSE
2715
+ model.decoder.layers.8.self_attn.q_proj:
2716
+ accum_format: SAME
2717
+ approximation_function: NONE
2718
+ bias_format: SAME
2719
+ input_format: SAME
2720
+ instance: Linear
2721
+ output_format: SAME
2722
+ weight_format: SAME
2723
+ weight_sparseness: DENSE
2724
+ model.decoder.layers.8.self_attn.softmax:
2725
+ approximation_function: NONE
2726
+ input_format: SAME
2727
+ instance: Softmax
2728
+ output_format: SAME
2729
+ model.decoder.layers.8.self_attn.v_proj:
2730
+ accum_format: SAME
2731
+ approximation_function: NONE
2732
+ bias_format: SAME
2733
+ input_format: SAME
2734
+ instance: Linear
2735
+ output_format: SAME
2736
+ weight_format: SAME
2737
+ weight_sparseness: DENSE
2738
+ model.decoder.layers.8.self_attn_layer_norm:
2739
+ approximation_function: NONE
2740
+ bias_format: SAME
2741
+ input_format: SAME
2742
+ instance: LayerNorm
2743
+ output_format: SAME
2744
+ weight_format: SAME
2745
+ model.decoder.layers.9.activation_fn:
2746
+ approximation_function: NONE
2747
+ input_format: SAME
2748
+ instance: ReLU
2749
+ output_format: SAME
2750
+ model.decoder.layers.9.dropout:
2751
+ approximation_function: NONE
2752
+ input_format: SAME
2753
+ instance: Dropout
2754
+ output_format: SAME
2755
+ model.decoder.layers.9.fc1:
2756
+ accum_format: SAME
2757
+ approximation_function: NONE
2758
+ bias_format: SAME
2759
+ input_format: SAME
2760
+ instance: Linear
2761
+ output_format: SAME
2762
+ weight_format: SAME
2763
+ weight_sparseness: DENSE
2764
+ model.decoder.layers.9.fc2:
2765
+ accum_format: SAME
2766
+ approximation_function: NONE
2767
+ bias_format: SAME
2768
+ input_format: SAME
2769
+ instance: Linear
2770
+ output_format: SAME
2771
+ weight_format: SAME
2772
+ weight_sparseness: DENSE
2773
+ model.decoder.layers.9.final_layer_norm:
2774
+ approximation_function: NONE
2775
+ bias_format: SAME
2776
+ input_format: SAME
2777
+ instance: LayerNorm
2778
+ output_format: SAME
2779
+ weight_format: SAME
2780
+ model.decoder.layers.9.self_attn.dropout:
2781
+ approximation_function: NONE
2782
+ input_format: SAME
2783
+ instance: Dropout
2784
+ output_format: SAME
2785
+ model.decoder.layers.9.self_attn.k_proj:
2786
+ accum_format: SAME
2787
+ approximation_function: NONE
2788
+ bias_format: SAME
2789
+ input_format: SAME
2790
+ instance: Linear
2791
+ output_format: SAME
2792
+ weight_format: SAME
2793
+ weight_sparseness: DENSE
2794
+ model.decoder.layers.9.self_attn.out_proj:
2795
+ accum_format: SAME
2796
+ approximation_function: NONE
2797
+ bias_format: SAME
2798
+ input_format: SAME
2799
+ instance: Linear
2800
+ output_format: SAME
2801
+ weight_format: SAME
2802
+ weight_sparseness: DENSE
2803
+ model.decoder.layers.9.self_attn.q_proj:
2804
+ accum_format: SAME
2805
+ approximation_function: NONE
2806
+ bias_format: SAME
2807
+ input_format: SAME
2808
+ instance: Linear
2809
+ output_format: SAME
2810
+ weight_format: SAME
2811
+ weight_sparseness: DENSE
2812
+ model.decoder.layers.9.self_attn.softmax:
2813
+ approximation_function: NONE
2814
+ input_format: SAME
2815
+ instance: Softmax
2816
+ output_format: SAME
2817
+ model.decoder.layers.9.self_attn.v_proj:
2818
+ accum_format: SAME
2819
+ approximation_function: NONE
2820
+ bias_format: SAME
2821
+ input_format: SAME
2822
+ instance: Linear
2823
+ output_format: SAME
2824
+ weight_format: SAME
2825
+ weight_sparseness: DENSE
2826
+ model.decoder.layers.9.self_attn_layer_norm:
2827
+ approximation_function: NONE
2828
+ bias_format: SAME
2829
+ input_format: SAME
2830
+ instance: LayerNorm
2831
+ output_format: SAME
2832
+ weight_format: SAME
configs/BASIC.yaml ADDED
@@ -0,0 +1,2832 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ lm_head:
3
+ accum_format: SAME
4
+ approximation_function: NONE
5
+ input_format: SAME
6
+ instance: Linear
7
+ output_format: SAME
8
+ weight_format: SAME
9
+ weight_sparseness: DENSE
10
+ model.decoder.final_layer_norm:
11
+ approximation_function: LAYERNORM(fallback,4,float16)
12
+ bias_format: SAME
13
+ input_format: SAME
14
+ instance: LayerNorm
15
+ output_format: SAME
16
+ weight_format: SAME
17
+ model.decoder.layers.0.activation_fn:
18
+ approximation_function: NONE
19
+ input_format: SAME
20
+ instance: ReLU
21
+ output_format: SAME
22
+ model.decoder.layers.0.dropout:
23
+ approximation_function: NONE
24
+ input_format: SAME
25
+ instance: Dropout
26
+ output_format: SAME
27
+ model.decoder.layers.0.fc1:
28
+ accum_format: SAME
29
+ approximation_function: NONE
30
+ bias_format: SAME
31
+ input_format: BFP[8|8]{64,-1}(SN)
32
+ instance: Linear
33
+ output_format: SAME
34
+ weight_format: BFP[8|8]{64,-1}(SN)
35
+ weight_sparseness: DENSE
36
+ model.decoder.layers.0.fc2:
37
+ accum_format: SAME
38
+ approximation_function: NONE
39
+ bias_format: SAME
40
+ input_format: BFP[8|8]{64,-1}(SN)
41
+ instance: Linear
42
+ output_format: SAME
43
+ weight_format: BFP[8|8]{64,-1}(SN)
44
+ weight_sparseness: DENSE
45
+ model.decoder.layers.0.final_layer_norm:
46
+ approximation_function: LAYERNORM(fallback,4,float16)
47
+ bias_format: SAME
48
+ input_format: SAME
49
+ instance: LayerNorm
50
+ output_format: SAME
51
+ weight_format: SAME
52
+ model.decoder.layers.0.self_attn.dropout:
53
+ approximation_function: NONE
54
+ input_format: SAME
55
+ instance: Dropout
56
+ output_format: BFP[8|8]{64,-1}(SN)
57
+ model.decoder.layers.0.self_attn.k_proj:
58
+ accum_format: SAME
59
+ approximation_function: NONE
60
+ bias_format: SAME
61
+ input_format: BFP[8|8]{64,-1}(SN)
62
+ instance: Linear
63
+ output_format: BFP[8|8]{64,-1}(SN)
64
+ weight_format: BFP[8|8]{64,-1}(SN)
65
+ weight_sparseness: DENSE
66
+ model.decoder.layers.0.self_attn.out_proj:
67
+ accum_format: SAME
68
+ approximation_function: NONE
69
+ bias_format: SAME
70
+ input_format: BFP[8|8]{64,-1}(SN)
71
+ instance: Linear
72
+ output_format: SAME
73
+ weight_format: BFP[8|8]{64,-1}(SN)
74
+ weight_sparseness: DENSE
75
+ model.decoder.layers.0.self_attn.q_proj:
76
+ accum_format: SAME
77
+ approximation_function: NONE
78
+ bias_format: SAME
79
+ input_format: BFP[8|8]{64,-1}(SN)
80
+ instance: Linear
81
+ output_format: BFP[8|8]{64,-1}(SN)
82
+ weight_format: BFP[8|8]{64,-1}(SN)
83
+ weight_sparseness: DENSE
84
+ model.decoder.layers.0.self_attn.softmax:
85
+ approximation_function: SOFTMAX(base2,float16)
86
+ input_format: SAME
87
+ instance: Softmax
88
+ output_format: SAME
89
+ model.decoder.layers.0.self_attn.v_proj:
90
+ accum_format: SAME
91
+ approximation_function: NONE
92
+ bias_format: SAME
93
+ input_format: BFP[8|8]{64,-1}(SN)
94
+ instance: Linear
95
+ output_format: BFP[8|8]{64,-1}(SN)
96
+ weight_format: BFP[8|8]{64,-1}(SN)
97
+ weight_sparseness: DENSE
98
+ model.decoder.layers.0.self_attn_layer_norm:
99
+ approximation_function: LAYERNORM(fallback,4,float16)
100
+ bias_format: SAME
101
+ input_format: SAME
102
+ instance: LayerNorm
103
+ output_format: SAME
104
+ weight_format: SAME
105
+ model.decoder.layers.1.activation_fn:
106
+ approximation_function: NONE
107
+ input_format: SAME
108
+ instance: ReLU
109
+ output_format: SAME
110
+ model.decoder.layers.1.dropout:
111
+ approximation_function: NONE
112
+ input_format: SAME
113
+ instance: Dropout
114
+ output_format: SAME
115
+ model.decoder.layers.1.fc1:
116
+ accum_format: SAME
117
+ approximation_function: NONE
118
+ bias_format: SAME
119
+ input_format: BFP[8|8]{64,-1}(SN)
120
+ instance: Linear
121
+ output_format: SAME
122
+ weight_format: BFP[8|8]{64,-1}(SN)
123
+ weight_sparseness: DENSE
124
+ model.decoder.layers.1.fc2:
125
+ accum_format: SAME
126
+ approximation_function: NONE
127
+ bias_format: SAME
128
+ input_format: BFP[8|8]{64,-1}(SN)
129
+ instance: Linear
130
+ output_format: SAME
131
+ weight_format: BFP[8|8]{64,-1}(SN)
132
+ weight_sparseness: DENSE
133
+ model.decoder.layers.1.final_layer_norm:
134
+ approximation_function: LAYERNORM(fallback,4,float16)
135
+ bias_format: SAME
136
+ input_format: SAME
137
+ instance: LayerNorm
138
+ output_format: SAME
139
+ weight_format: SAME
140
+ model.decoder.layers.1.self_attn.dropout:
141
+ approximation_function: NONE
142
+ input_format: SAME
143
+ instance: Dropout
144
+ output_format: BFP[8|8]{64,-1}(SN)
145
+ model.decoder.layers.1.self_attn.k_proj:
146
+ accum_format: SAME
147
+ approximation_function: NONE
148
+ bias_format: SAME
149
+ input_format: BFP[8|8]{64,-1}(SN)
150
+ instance: Linear
151
+ output_format: BFP[8|8]{64,-1}(SN)
152
+ weight_format: BFP[8|8]{64,-1}(SN)
153
+ weight_sparseness: DENSE
154
+ model.decoder.layers.1.self_attn.out_proj:
155
+ accum_format: SAME
156
+ approximation_function: NONE
157
+ bias_format: SAME
158
+ input_format: BFP[8|8]{64,-1}(SN)
159
+ instance: Linear
160
+ output_format: SAME
161
+ weight_format: BFP[8|8]{64,-1}(SN)
162
+ weight_sparseness: DENSE
163
+ model.decoder.layers.1.self_attn.q_proj:
164
+ accum_format: SAME
165
+ approximation_function: NONE
166
+ bias_format: SAME
167
+ input_format: BFP[8|8]{64,-1}(SN)
168
+ instance: Linear
169
+ output_format: BFP[8|8]{64,-1}(SN)
170
+ weight_format: BFP[8|8]{64,-1}(SN)
171
+ weight_sparseness: DENSE
172
+ model.decoder.layers.1.self_attn.softmax:
173
+ approximation_function: SOFTMAX(base2,float16)
174
+ input_format: SAME
175
+ instance: Softmax
176
+ output_format: SAME
177
+ model.decoder.layers.1.self_attn.v_proj:
178
+ accum_format: SAME
179
+ approximation_function: NONE
180
+ bias_format: SAME
181
+ input_format: BFP[8|8]{64,-1}(SN)
182
+ instance: Linear
183
+ output_format: BFP[8|8]{64,-1}(SN)
184
+ weight_format: BFP[8|8]{64,-1}(SN)
185
+ weight_sparseness: DENSE
186
+ model.decoder.layers.1.self_attn_layer_norm:
187
+ approximation_function: LAYERNORM(fallback,4,float16)
188
+ bias_format: SAME
189
+ input_format: SAME
190
+ instance: LayerNorm
191
+ output_format: SAME
192
+ weight_format: SAME
193
+ model.decoder.layers.10.activation_fn:
194
+ approximation_function: NONE
195
+ input_format: SAME
196
+ instance: ReLU
197
+ output_format: SAME
198
+ model.decoder.layers.10.dropout:
199
+ approximation_function: NONE
200
+ input_format: SAME
201
+ instance: Dropout
202
+ output_format: SAME
203
+ model.decoder.layers.10.fc1:
204
+ accum_format: SAME
205
+ approximation_function: NONE
206
+ bias_format: SAME
207
+ input_format: BFP[8|8]{64,-1}(SN)
208
+ instance: Linear
209
+ output_format: SAME
210
+ weight_format: BFP[8|8]{64,-1}(SN)
211
+ weight_sparseness: DENSE
212
+ model.decoder.layers.10.fc2:
213
+ accum_format: SAME
214
+ approximation_function: NONE
215
+ bias_format: SAME
216
+ input_format: BFP[8|8]{64,-1}(SN)
217
+ instance: Linear
218
+ output_format: SAME
219
+ weight_format: BFP[8|8]{64,-1}(SN)
220
+ weight_sparseness: DENSE
221
+ model.decoder.layers.10.final_layer_norm:
222
+ approximation_function: LAYERNORM(fallback,4,float16)
223
+ bias_format: SAME
224
+ input_format: SAME
225
+ instance: LayerNorm
226
+ output_format: SAME
227
+ weight_format: SAME
228
+ model.decoder.layers.10.self_attn.dropout:
229
+ approximation_function: NONE
230
+ input_format: SAME
231
+ instance: Dropout
232
+ output_format: BFP[8|8]{64,-1}(SN)
233
+ model.decoder.layers.10.self_attn.k_proj:
234
+ accum_format: SAME
235
+ approximation_function: NONE
236
+ bias_format: SAME
237
+ input_format: BFP[8|8]{64,-1}(SN)
238
+ instance: Linear
239
+ output_format: BFP[8|8]{64,-1}(SN)
240
+ weight_format: BFP[8|8]{64,-1}(SN)
241
+ weight_sparseness: DENSE
242
+ model.decoder.layers.10.self_attn.out_proj:
243
+ accum_format: SAME
244
+ approximation_function: NONE
245
+ bias_format: SAME
246
+ input_format: BFP[8|8]{64,-1}(SN)
247
+ instance: Linear
248
+ output_format: SAME
249
+ weight_format: BFP[8|8]{64,-1}(SN)
250
+ weight_sparseness: DENSE
251
+ model.decoder.layers.10.self_attn.q_proj:
252
+ accum_format: SAME
253
+ approximation_function: NONE
254
+ bias_format: SAME
255
+ input_format: BFP[8|8]{64,-1}(SN)
256
+ instance: Linear
257
+ output_format: BFP[8|8]{64,-1}(SN)
258
+ weight_format: BFP[8|8]{64,-1}(SN)
259
+ weight_sparseness: DENSE
260
+ model.decoder.layers.10.self_attn.softmax:
261
+ approximation_function: SOFTMAX(base2,float16)
262
+ input_format: SAME
263
+ instance: Softmax
264
+ output_format: SAME
265
+ model.decoder.layers.10.self_attn.v_proj:
266
+ accum_format: SAME
267
+ approximation_function: NONE
268
+ bias_format: SAME
269
+ input_format: BFP[8|8]{64,-1}(SN)
270
+ instance: Linear
271
+ output_format: BFP[8|8]{64,-1}(SN)
272
+ weight_format: BFP[8|8]{64,-1}(SN)
273
+ weight_sparseness: DENSE
274
+ model.decoder.layers.10.self_attn_layer_norm:
275
+ approximation_function: LAYERNORM(fallback,4,float16)
276
+ bias_format: SAME
277
+ input_format: SAME
278
+ instance: LayerNorm
279
+ output_format: SAME
280
+ weight_format: SAME
281
+ model.decoder.layers.11.activation_fn:
282
+ approximation_function: NONE
283
+ input_format: SAME
284
+ instance: ReLU
285
+ output_format: SAME
286
+ model.decoder.layers.11.dropout:
287
+ approximation_function: NONE
288
+ input_format: SAME
289
+ instance: Dropout
290
+ output_format: SAME
291
+ model.decoder.layers.11.fc1:
292
+ accum_format: SAME
293
+ approximation_function: NONE
294
+ bias_format: SAME
295
+ input_format: BFP[8|8]{64,-1}(SN)
296
+ instance: Linear
297
+ output_format: SAME
298
+ weight_format: BFP[8|8]{64,-1}(SN)
299
+ weight_sparseness: DENSE
300
+ model.decoder.layers.11.fc2:
301
+ accum_format: SAME
302
+ approximation_function: NONE
303
+ bias_format: SAME
304
+ input_format: BFP[8|8]{64,-1}(SN)
305
+ instance: Linear
306
+ output_format: SAME
307
+ weight_format: BFP[8|8]{64,-1}(SN)
308
+ weight_sparseness: DENSE
309
+ model.decoder.layers.11.final_layer_norm:
310
+ approximation_function: LAYERNORM(fallback,4,float16)
311
+ bias_format: SAME
312
+ input_format: SAME
313
+ instance: LayerNorm
314
+ output_format: SAME
315
+ weight_format: SAME
316
+ model.decoder.layers.11.self_attn.dropout:
317
+ approximation_function: NONE
318
+ input_format: SAME
319
+ instance: Dropout
320
+ output_format: BFP[8|8]{64,-1}(SN)
321
+ model.decoder.layers.11.self_attn.k_proj:
322
+ accum_format: SAME
323
+ approximation_function: NONE
324
+ bias_format: SAME
325
+ input_format: BFP[8|8]{64,-1}(SN)
326
+ instance: Linear
327
+ output_format: BFP[8|8]{64,-1}(SN)
328
+ weight_format: BFP[8|8]{64,-1}(SN)
329
+ weight_sparseness: DENSE
330
+ model.decoder.layers.11.self_attn.out_proj:
331
+ accum_format: SAME
332
+ approximation_function: NONE
333
+ bias_format: SAME
334
+ input_format: BFP[8|8]{64,-1}(SN)
335
+ instance: Linear
336
+ output_format: SAME
337
+ weight_format: BFP[8|8]{64,-1}(SN)
338
+ weight_sparseness: DENSE
339
+ model.decoder.layers.11.self_attn.q_proj:
340
+ accum_format: SAME
341
+ approximation_function: NONE
342
+ bias_format: SAME
343
+ input_format: BFP[8|8]{64,-1}(SN)
344
+ instance: Linear
345
+ output_format: BFP[8|8]{64,-1}(SN)
346
+ weight_format: BFP[8|8]{64,-1}(SN)
347
+ weight_sparseness: DENSE
348
+ model.decoder.layers.11.self_attn.softmax:
349
+ approximation_function: SOFTMAX(base2,float16)
350
+ input_format: SAME
351
+ instance: Softmax
352
+ output_format: SAME
353
+ model.decoder.layers.11.self_attn.v_proj:
354
+ accum_format: SAME
355
+ approximation_function: NONE
356
+ bias_format: SAME
357
+ input_format: BFP[8|8]{64,-1}(SN)
358
+ instance: Linear
359
+ output_format: BFP[8|8]{64,-1}(SN)
360
+ weight_format: BFP[8|8]{64,-1}(SN)
361
+ weight_sparseness: DENSE
362
+ model.decoder.layers.11.self_attn_layer_norm:
363
+ approximation_function: LAYERNORM(fallback,4,float16)
364
+ bias_format: SAME
365
+ input_format: SAME
366
+ instance: LayerNorm
367
+ output_format: SAME
368
+ weight_format: SAME
369
+ model.decoder.layers.12.activation_fn:
370
+ approximation_function: NONE
371
+ input_format: SAME
372
+ instance: ReLU
373
+ output_format: SAME
374
+ model.decoder.layers.12.dropout:
375
+ approximation_function: NONE
376
+ input_format: SAME
377
+ instance: Dropout
378
+ output_format: SAME
379
+ model.decoder.layers.12.fc1:
380
+ accum_format: SAME
381
+ approximation_function: NONE
382
+ bias_format: SAME
383
+ input_format: BFP[8|8]{64,-1}(SN)
384
+ instance: Linear
385
+ output_format: SAME
386
+ weight_format: BFP[8|8]{64,-1}(SN)
387
+ weight_sparseness: DENSE
388
+ model.decoder.layers.12.fc2:
389
+ accum_format: SAME
390
+ approximation_function: NONE
391
+ bias_format: SAME
392
+ input_format: BFP[8|8]{64,-1}(SN)
393
+ instance: Linear
394
+ output_format: SAME
395
+ weight_format: BFP[8|8]{64,-1}(SN)
396
+ weight_sparseness: DENSE
397
+ model.decoder.layers.12.final_layer_norm:
398
+ approximation_function: LAYERNORM(fallback,4,float16)
399
+ bias_format: SAME
400
+ input_format: SAME
401
+ instance: LayerNorm
402
+ output_format: SAME
403
+ weight_format: SAME
404
+ model.decoder.layers.12.self_attn.dropout:
405
+ approximation_function: NONE
406
+ input_format: SAME
407
+ instance: Dropout
408
+ output_format: BFP[8|8]{64,-1}(SN)
409
+ model.decoder.layers.12.self_attn.k_proj:
410
+ accum_format: SAME
411
+ approximation_function: NONE
412
+ bias_format: SAME
413
+ input_format: BFP[8|8]{64,-1}(SN)
414
+ instance: Linear
415
+ output_format: BFP[8|8]{64,-1}(SN)
416
+ weight_format: BFP[8|8]{64,-1}(SN)
417
+ weight_sparseness: DENSE
418
+ model.decoder.layers.12.self_attn.out_proj:
419
+ accum_format: SAME
420
+ approximation_function: NONE
421
+ bias_format: SAME
422
+ input_format: BFP[8|8]{64,-1}(SN)
423
+ instance: Linear
424
+ output_format: SAME
425
+ weight_format: BFP[8|8]{64,-1}(SN)
426
+ weight_sparseness: DENSE
427
+ model.decoder.layers.12.self_attn.q_proj:
428
+ accum_format: SAME
429
+ approximation_function: NONE
430
+ bias_format: SAME
431
+ input_format: BFP[8|8]{64,-1}(SN)
432
+ instance: Linear
433
+ output_format: BFP[8|8]{64,-1}(SN)
434
+ weight_format: BFP[8|8]{64,-1}(SN)
435
+ weight_sparseness: DENSE
436
+ model.decoder.layers.12.self_attn.softmax:
437
+ approximation_function: SOFTMAX(base2,float16)
438
+ input_format: SAME
439
+ instance: Softmax
440
+ output_format: SAME
441
+ model.decoder.layers.12.self_attn.v_proj:
442
+ accum_format: SAME
443
+ approximation_function: NONE
444
+ bias_format: SAME
445
+ input_format: BFP[8|8]{64,-1}(SN)
446
+ instance: Linear
447
+ output_format: BFP[8|8]{64,-1}(SN)
448
+ weight_format: BFP[8|8]{64,-1}(SN)
449
+ weight_sparseness: DENSE
450
+ model.decoder.layers.12.self_attn_layer_norm:
451
+ approximation_function: LAYERNORM(fallback,4,float16)
452
+ bias_format: SAME
453
+ input_format: SAME
454
+ instance: LayerNorm
455
+ output_format: SAME
456
+ weight_format: SAME
457
+ model.decoder.layers.13.activation_fn:
458
+ approximation_function: NONE
459
+ input_format: SAME
460
+ instance: ReLU
461
+ output_format: SAME
462
+ model.decoder.layers.13.dropout:
463
+ approximation_function: NONE
464
+ input_format: SAME
465
+ instance: Dropout
466
+ output_format: SAME
467
+ model.decoder.layers.13.fc1:
468
+ accum_format: SAME
469
+ approximation_function: NONE
470
+ bias_format: SAME
471
+ input_format: BFP[8|8]{64,-1}(SN)
472
+ instance: Linear
473
+ output_format: SAME
474
+ weight_format: BFP[8|8]{64,-1}(SN)
475
+ weight_sparseness: DENSE
476
+ model.decoder.layers.13.fc2:
477
+ accum_format: SAME
478
+ approximation_function: NONE
479
+ bias_format: SAME
480
+ input_format: BFP[8|8]{64,-1}(SN)
481
+ instance: Linear
482
+ output_format: SAME
483
+ weight_format: BFP[8|8]{64,-1}(SN)
484
+ weight_sparseness: DENSE
485
+ model.decoder.layers.13.final_layer_norm:
486
+ approximation_function: LAYERNORM(fallback,4,float16)
487
+ bias_format: SAME
488
+ input_format: SAME
489
+ instance: LayerNorm
490
+ output_format: SAME
491
+ weight_format: SAME
492
+ model.decoder.layers.13.self_attn.dropout:
493
+ approximation_function: NONE
494
+ input_format: SAME
495
+ instance: Dropout
496
+ output_format: BFP[8|8]{64,-1}(SN)
497
+ model.decoder.layers.13.self_attn.k_proj:
498
+ accum_format: SAME
499
+ approximation_function: NONE
500
+ bias_format: SAME
501
+ input_format: BFP[8|8]{64,-1}(SN)
502
+ instance: Linear
503
+ output_format: BFP[8|8]{64,-1}(SN)
504
+ weight_format: BFP[8|8]{64,-1}(SN)
505
+ weight_sparseness: DENSE
506
+ model.decoder.layers.13.self_attn.out_proj:
507
+ accum_format: SAME
508
+ approximation_function: NONE
509
+ bias_format: SAME
510
+ input_format: BFP[8|8]{64,-1}(SN)
511
+ instance: Linear
512
+ output_format: SAME
513
+ weight_format: BFP[8|8]{64,-1}(SN)
514
+ weight_sparseness: DENSE
515
+ model.decoder.layers.13.self_attn.q_proj:
516
+ accum_format: SAME
517
+ approximation_function: NONE
518
+ bias_format: SAME
519
+ input_format: BFP[8|8]{64,-1}(SN)
520
+ instance: Linear
521
+ output_format: BFP[8|8]{64,-1}(SN)
522
+ weight_format: BFP[8|8]{64,-1}(SN)
523
+ weight_sparseness: DENSE
524
+ model.decoder.layers.13.self_attn.softmax:
525
+ approximation_function: SOFTMAX(base2,float16)
526
+ input_format: SAME
527
+ instance: Softmax
528
+ output_format: SAME
529
+ model.decoder.layers.13.self_attn.v_proj:
530
+ accum_format: SAME
531
+ approximation_function: NONE
532
+ bias_format: SAME
533
+ input_format: BFP[8|8]{64,-1}(SN)
534
+ instance: Linear
535
+ output_format: BFP[8|8]{64,-1}(SN)
536
+ weight_format: BFP[8|8]{64,-1}(SN)
537
+ weight_sparseness: DENSE
538
+ model.decoder.layers.13.self_attn_layer_norm:
539
+ approximation_function: LAYERNORM(fallback,4,float16)
540
+ bias_format: SAME
541
+ input_format: SAME
542
+ instance: LayerNorm
543
+ output_format: SAME
544
+ weight_format: SAME
545
+ model.decoder.layers.14.activation_fn:
546
+ approximation_function: NONE
547
+ input_format: SAME
548
+ instance: ReLU
549
+ output_format: SAME
550
+ model.decoder.layers.14.dropout:
551
+ approximation_function: NONE
552
+ input_format: SAME
553
+ instance: Dropout
554
+ output_format: SAME
555
+ model.decoder.layers.14.fc1:
556
+ accum_format: SAME
557
+ approximation_function: NONE
558
+ bias_format: SAME
559
+ input_format: BFP[8|8]{64,-1}(SN)
560
+ instance: Linear
561
+ output_format: SAME
562
+ weight_format: BFP[8|8]{64,-1}(SN)
563
+ weight_sparseness: DENSE
564
+ model.decoder.layers.14.fc2:
565
+ accum_format: SAME
566
+ approximation_function: NONE
567
+ bias_format: SAME
568
+ input_format: BFP[8|8]{64,-1}(SN)
569
+ instance: Linear
570
+ output_format: SAME
571
+ weight_format: BFP[8|8]{64,-1}(SN)
572
+ weight_sparseness: DENSE
573
+ model.decoder.layers.14.final_layer_norm:
574
+ approximation_function: LAYERNORM(fallback,4,float16)
575
+ bias_format: SAME
576
+ input_format: SAME
577
+ instance: LayerNorm
578
+ output_format: SAME
579
+ weight_format: SAME
580
+ model.decoder.layers.14.self_attn.dropout:
581
+ approximation_function: NONE
582
+ input_format: SAME
583
+ instance: Dropout
584
+ output_format: BFP[8|8]{64,-1}(SN)
585
+ model.decoder.layers.14.self_attn.k_proj:
586
+ accum_format: SAME
587
+ approximation_function: NONE
588
+ bias_format: SAME
589
+ input_format: BFP[8|8]{64,-1}(SN)
590
+ instance: Linear
591
+ output_format: BFP[8|8]{64,-1}(SN)
592
+ weight_format: BFP[8|8]{64,-1}(SN)
593
+ weight_sparseness: DENSE
594
+ model.decoder.layers.14.self_attn.out_proj:
595
+ accum_format: SAME
596
+ approximation_function: NONE
597
+ bias_format: SAME
598
+ input_format: BFP[8|8]{64,-1}(SN)
599
+ instance: Linear
600
+ output_format: SAME
601
+ weight_format: BFP[8|8]{64,-1}(SN)
602
+ weight_sparseness: DENSE
603
+ model.decoder.layers.14.self_attn.q_proj:
604
+ accum_format: SAME
605
+ approximation_function: NONE
606
+ bias_format: SAME
607
+ input_format: BFP[8|8]{64,-1}(SN)
608
+ instance: Linear
609
+ output_format: BFP[8|8]{64,-1}(SN)
610
+ weight_format: BFP[8|8]{64,-1}(SN)
611
+ weight_sparseness: DENSE
612
+ model.decoder.layers.14.self_attn.softmax:
613
+ approximation_function: SOFTMAX(base2,float16)
614
+ input_format: SAME
615
+ instance: Softmax
616
+ output_format: SAME
617
+ model.decoder.layers.14.self_attn.v_proj:
618
+ accum_format: SAME
619
+ approximation_function: NONE
620
+ bias_format: SAME
621
+ input_format: BFP[8|8]{64,-1}(SN)
622
+ instance: Linear
623
+ output_format: BFP[8|8]{64,-1}(SN)
624
+ weight_format: BFP[8|8]{64,-1}(SN)
625
+ weight_sparseness: DENSE
626
+ model.decoder.layers.14.self_attn_layer_norm:
627
+ approximation_function: LAYERNORM(fallback,4,float16)
628
+ bias_format: SAME
629
+ input_format: SAME
630
+ instance: LayerNorm
631
+ output_format: SAME
632
+ weight_format: SAME
633
+ model.decoder.layers.15.activation_fn:
634
+ approximation_function: NONE
635
+ input_format: SAME
636
+ instance: ReLU
637
+ output_format: SAME
638
+ model.decoder.layers.15.dropout:
639
+ approximation_function: NONE
640
+ input_format: SAME
641
+ instance: Dropout
642
+ output_format: SAME
643
+ model.decoder.layers.15.fc1:
644
+ accum_format: SAME
645
+ approximation_function: NONE
646
+ bias_format: SAME
647
+ input_format: BFP[8|8]{64,-1}(SN)
648
+ instance: Linear
649
+ output_format: SAME
650
+ weight_format: BFP[8|8]{64,-1}(SN)
651
+ weight_sparseness: DENSE
652
+ model.decoder.layers.15.fc2:
653
+ accum_format: SAME
654
+ approximation_function: NONE
655
+ bias_format: SAME
656
+ input_format: BFP[8|8]{64,-1}(SN)
657
+ instance: Linear
658
+ output_format: SAME
659
+ weight_format: BFP[8|8]{64,-1}(SN)
660
+ weight_sparseness: DENSE
661
+ model.decoder.layers.15.final_layer_norm:
662
+ approximation_function: LAYERNORM(fallback,4,float16)
663
+ bias_format: SAME
664
+ input_format: SAME
665
+ instance: LayerNorm
666
+ output_format: SAME
667
+ weight_format: SAME
668
+ model.decoder.layers.15.self_attn.dropout:
669
+ approximation_function: NONE
670
+ input_format: SAME
671
+ instance: Dropout
672
+ output_format: BFP[8|8]{64,-1}(SN)
673
+ model.decoder.layers.15.self_attn.k_proj:
674
+ accum_format: SAME
675
+ approximation_function: NONE
676
+ bias_format: SAME
677
+ input_format: BFP[8|8]{64,-1}(SN)
678
+ instance: Linear
679
+ output_format: BFP[8|8]{64,-1}(SN)
680
+ weight_format: BFP[8|8]{64,-1}(SN)
681
+ weight_sparseness: DENSE
682
+ model.decoder.layers.15.self_attn.out_proj:
683
+ accum_format: SAME
684
+ approximation_function: NONE
685
+ bias_format: SAME
686
+ input_format: BFP[8|8]{64,-1}(SN)
687
+ instance: Linear
688
+ output_format: SAME
689
+ weight_format: BFP[8|8]{64,-1}(SN)
690
+ weight_sparseness: DENSE
691
+ model.decoder.layers.15.self_attn.q_proj:
692
+ accum_format: SAME
693
+ approximation_function: NONE
694
+ bias_format: SAME
695
+ input_format: BFP[8|8]{64,-1}(SN)
696
+ instance: Linear
697
+ output_format: BFP[8|8]{64,-1}(SN)
698
+ weight_format: BFP[8|8]{64,-1}(SN)
699
+ weight_sparseness: DENSE
700
+ model.decoder.layers.15.self_attn.softmax:
701
+ approximation_function: SOFTMAX(base2,float16)
702
+ input_format: SAME
703
+ instance: Softmax
704
+ output_format: SAME
705
+ model.decoder.layers.15.self_attn.v_proj:
706
+ accum_format: SAME
707
+ approximation_function: NONE
708
+ bias_format: SAME
709
+ input_format: BFP[8|8]{64,-1}(SN)
710
+ instance: Linear
711
+ output_format: BFP[8|8]{64,-1}(SN)
712
+ weight_format: BFP[8|8]{64,-1}(SN)
713
+ weight_sparseness: DENSE
714
+ model.decoder.layers.15.self_attn_layer_norm:
715
+ approximation_function: LAYERNORM(fallback,4,float16)
716
+ bias_format: SAME
717
+ input_format: SAME
718
+ instance: LayerNorm
719
+ output_format: SAME
720
+ weight_format: SAME
721
+ model.decoder.layers.16.activation_fn:
722
+ approximation_function: NONE
723
+ input_format: SAME
724
+ instance: ReLU
725
+ output_format: SAME
726
+ model.decoder.layers.16.dropout:
727
+ approximation_function: NONE
728
+ input_format: SAME
729
+ instance: Dropout
730
+ output_format: SAME
731
+ model.decoder.layers.16.fc1:
732
+ accum_format: SAME
733
+ approximation_function: NONE
734
+ bias_format: SAME
735
+ input_format: BFP[8|8]{64,-1}(SN)
736
+ instance: Linear
737
+ output_format: SAME
738
+ weight_format: BFP[8|8]{64,-1}(SN)
739
+ weight_sparseness: DENSE
740
+ model.decoder.layers.16.fc2:
741
+ accum_format: SAME
742
+ approximation_function: NONE
743
+ bias_format: SAME
744
+ input_format: BFP[8|8]{64,-1}(SN)
745
+ instance: Linear
746
+ output_format: SAME
747
+ weight_format: BFP[8|8]{64,-1}(SN)
748
+ weight_sparseness: DENSE
749
+ model.decoder.layers.16.final_layer_norm:
750
+ approximation_function: LAYERNORM(fallback,4,float16)
751
+ bias_format: SAME
752
+ input_format: SAME
753
+ instance: LayerNorm
754
+ output_format: SAME
755
+ weight_format: SAME
756
+ model.decoder.layers.16.self_attn.dropout:
757
+ approximation_function: NONE
758
+ input_format: SAME
759
+ instance: Dropout
760
+ output_format: BFP[8|8]{64,-1}(SN)
761
+ model.decoder.layers.16.self_attn.k_proj:
762
+ accum_format: SAME
763
+ approximation_function: NONE
764
+ bias_format: SAME
765
+ input_format: BFP[8|8]{64,-1}(SN)
766
+ instance: Linear
767
+ output_format: BFP[8|8]{64,-1}(SN)
768
+ weight_format: BFP[8|8]{64,-1}(SN)
769
+ weight_sparseness: DENSE
770
+ model.decoder.layers.16.self_attn.out_proj:
771
+ accum_format: SAME
772
+ approximation_function: NONE
773
+ bias_format: SAME
774
+ input_format: BFP[8|8]{64,-1}(SN)
775
+ instance: Linear
776
+ output_format: SAME
777
+ weight_format: BFP[8|8]{64,-1}(SN)
778
+ weight_sparseness: DENSE
779
+ model.decoder.layers.16.self_attn.q_proj:
780
+ accum_format: SAME
781
+ approximation_function: NONE
782
+ bias_format: SAME
783
+ input_format: BFP[8|8]{64,-1}(SN)
784
+ instance: Linear
785
+ output_format: BFP[8|8]{64,-1}(SN)
786
+ weight_format: BFP[8|8]{64,-1}(SN)
787
+ weight_sparseness: DENSE
788
+ model.decoder.layers.16.self_attn.softmax:
789
+ approximation_function: SOFTMAX(base2,float16)
790
+ input_format: SAME
791
+ instance: Softmax
792
+ output_format: SAME
793
+ model.decoder.layers.16.self_attn.v_proj:
794
+ accum_format: SAME
795
+ approximation_function: NONE
796
+ bias_format: SAME
797
+ input_format: BFP[8|8]{64,-1}(SN)
798
+ instance: Linear
799
+ output_format: BFP[8|8]{64,-1}(SN)
800
+ weight_format: BFP[8|8]{64,-1}(SN)
801
+ weight_sparseness: DENSE
802
+ model.decoder.layers.16.self_attn_layer_norm:
803
+ approximation_function: LAYERNORM(fallback,4,float16)
804
+ bias_format: SAME
805
+ input_format: SAME
806
+ instance: LayerNorm
807
+ output_format: SAME
808
+ weight_format: SAME
809
+ model.decoder.layers.17.activation_fn:
810
+ approximation_function: NONE
811
+ input_format: SAME
812
+ instance: ReLU
813
+ output_format: SAME
814
+ model.decoder.layers.17.dropout:
815
+ approximation_function: NONE
816
+ input_format: SAME
817
+ instance: Dropout
818
+ output_format: SAME
819
+ model.decoder.layers.17.fc1:
820
+ accum_format: SAME
821
+ approximation_function: NONE
822
+ bias_format: SAME
823
+ input_format: BFP[8|8]{64,-1}(SN)
824
+ instance: Linear
825
+ output_format: SAME
826
+ weight_format: BFP[8|8]{64,-1}(SN)
827
+ weight_sparseness: DENSE
828
+ model.decoder.layers.17.fc2:
829
+ accum_format: SAME
830
+ approximation_function: NONE
831
+ bias_format: SAME
832
+ input_format: BFP[8|8]{64,-1}(SN)
833
+ instance: Linear
834
+ output_format: SAME
835
+ weight_format: BFP[8|8]{64,-1}(SN)
836
+ weight_sparseness: DENSE
837
+ model.decoder.layers.17.final_layer_norm:
838
+ approximation_function: LAYERNORM(fallback,4,float16)
839
+ bias_format: SAME
840
+ input_format: SAME
841
+ instance: LayerNorm
842
+ output_format: SAME
843
+ weight_format: SAME
844
+ model.decoder.layers.17.self_attn.dropout:
845
+ approximation_function: NONE
846
+ input_format: SAME
847
+ instance: Dropout
848
+ output_format: BFP[8|8]{64,-1}(SN)
849
+ model.decoder.layers.17.self_attn.k_proj:
850
+ accum_format: SAME
851
+ approximation_function: NONE
852
+ bias_format: SAME
853
+ input_format: BFP[8|8]{64,-1}(SN)
854
+ instance: Linear
855
+ output_format: BFP[8|8]{64,-1}(SN)
856
+ weight_format: BFP[8|8]{64,-1}(SN)
857
+ weight_sparseness: DENSE
858
+ model.decoder.layers.17.self_attn.out_proj:
859
+ accum_format: SAME
860
+ approximation_function: NONE
861
+ bias_format: SAME
862
+ input_format: BFP[8|8]{64,-1}(SN)
863
+ instance: Linear
864
+ output_format: SAME
865
+ weight_format: BFP[8|8]{64,-1}(SN)
866
+ weight_sparseness: DENSE
867
+ model.decoder.layers.17.self_attn.q_proj:
868
+ accum_format: SAME
869
+ approximation_function: NONE
870
+ bias_format: SAME
871
+ input_format: BFP[8|8]{64,-1}(SN)
872
+ instance: Linear
873
+ output_format: BFP[8|8]{64,-1}(SN)
874
+ weight_format: BFP[8|8]{64,-1}(SN)
875
+ weight_sparseness: DENSE
876
+ model.decoder.layers.17.self_attn.softmax:
877
+ approximation_function: SOFTMAX(base2,float16)
878
+ input_format: SAME
879
+ instance: Softmax
880
+ output_format: SAME
881
+ model.decoder.layers.17.self_attn.v_proj:
882
+ accum_format: SAME
883
+ approximation_function: NONE
884
+ bias_format: SAME
885
+ input_format: BFP[8|8]{64,-1}(SN)
886
+ instance: Linear
887
+ output_format: BFP[8|8]{64,-1}(SN)
888
+ weight_format: BFP[8|8]{64,-1}(SN)
889
+ weight_sparseness: DENSE
890
+ model.decoder.layers.17.self_attn_layer_norm:
891
+ approximation_function: LAYERNORM(fallback,4,float16)
892
+ bias_format: SAME
893
+ input_format: SAME
894
+ instance: LayerNorm
895
+ output_format: SAME
896
+ weight_format: SAME
897
+ model.decoder.layers.18.activation_fn:
898
+ approximation_function: NONE
899
+ input_format: SAME
900
+ instance: ReLU
901
+ output_format: SAME
902
+ model.decoder.layers.18.dropout:
903
+ approximation_function: NONE
904
+ input_format: SAME
905
+ instance: Dropout
906
+ output_format: SAME
907
+ model.decoder.layers.18.fc1:
908
+ accum_format: SAME
909
+ approximation_function: NONE
910
+ bias_format: SAME
911
+ input_format: BFP[8|8]{64,-1}(SN)
912
+ instance: Linear
913
+ output_format: SAME
914
+ weight_format: BFP[8|8]{64,-1}(SN)
915
+ weight_sparseness: DENSE
916
+ model.decoder.layers.18.fc2:
917
+ accum_format: SAME
918
+ approximation_function: NONE
919
+ bias_format: SAME
920
+ input_format: BFP[8|8]{64,-1}(SN)
921
+ instance: Linear
922
+ output_format: SAME
923
+ weight_format: BFP[8|8]{64,-1}(SN)
924
+ weight_sparseness: DENSE
925
+ model.decoder.layers.18.final_layer_norm:
926
+ approximation_function: LAYERNORM(fallback,4,float16)
927
+ bias_format: SAME
928
+ input_format: SAME
929
+ instance: LayerNorm
930
+ output_format: SAME
931
+ weight_format: SAME
932
+ model.decoder.layers.18.self_attn.dropout:
933
+ approximation_function: NONE
934
+ input_format: SAME
935
+ instance: Dropout
936
+ output_format: BFP[8|8]{64,-1}(SN)
937
+ model.decoder.layers.18.self_attn.k_proj:
938
+ accum_format: SAME
939
+ approximation_function: NONE
940
+ bias_format: SAME
941
+ input_format: BFP[8|8]{64,-1}(SN)
942
+ instance: Linear
943
+ output_format: BFP[8|8]{64,-1}(SN)
944
+ weight_format: BFP[8|8]{64,-1}(SN)
945
+ weight_sparseness: DENSE
946
+ model.decoder.layers.18.self_attn.out_proj:
947
+ accum_format: SAME
948
+ approximation_function: NONE
949
+ bias_format: SAME
950
+ input_format: BFP[8|8]{64,-1}(SN)
951
+ instance: Linear
952
+ output_format: SAME
953
+ weight_format: BFP[8|8]{64,-1}(SN)
954
+ weight_sparseness: DENSE
955
+ model.decoder.layers.18.self_attn.q_proj:
956
+ accum_format: SAME
957
+ approximation_function: NONE
958
+ bias_format: SAME
959
+ input_format: BFP[8|8]{64,-1}(SN)
960
+ instance: Linear
961
+ output_format: BFP[8|8]{64,-1}(SN)
962
+ weight_format: BFP[8|8]{64,-1}(SN)
963
+ weight_sparseness: DENSE
964
+ model.decoder.layers.18.self_attn.softmax:
965
+ approximation_function: SOFTMAX(base2,float16)
966
+ input_format: SAME
967
+ instance: Softmax
968
+ output_format: SAME
969
+ model.decoder.layers.18.self_attn.v_proj:
970
+ accum_format: SAME
971
+ approximation_function: NONE
972
+ bias_format: SAME
973
+ input_format: BFP[8|8]{64,-1}(SN)
974
+ instance: Linear
975
+ output_format: BFP[8|8]{64,-1}(SN)
976
+ weight_format: BFP[8|8]{64,-1}(SN)
977
+ weight_sparseness: DENSE
978
+ model.decoder.layers.18.self_attn_layer_norm:
979
+ approximation_function: LAYERNORM(fallback,4,float16)
980
+ bias_format: SAME
981
+ input_format: SAME
982
+ instance: LayerNorm
983
+ output_format: SAME
984
+ weight_format: SAME
985
+ model.decoder.layers.19.activation_fn:
986
+ approximation_function: NONE
987
+ input_format: SAME
988
+ instance: ReLU
989
+ output_format: SAME
990
+ model.decoder.layers.19.dropout:
991
+ approximation_function: NONE
992
+ input_format: SAME
993
+ instance: Dropout
994
+ output_format: SAME
995
+ model.decoder.layers.19.fc1:
996
+ accum_format: SAME
997
+ approximation_function: NONE
998
+ bias_format: SAME
999
+ input_format: BFP[8|8]{64,-1}(SN)
1000
+ instance: Linear
1001
+ output_format: SAME
1002
+ weight_format: BFP[8|8]{64,-1}(SN)
1003
+ weight_sparseness: DENSE
1004
+ model.decoder.layers.19.fc2:
1005
+ accum_format: SAME
1006
+ approximation_function: NONE
1007
+ bias_format: SAME
1008
+ input_format: BFP[8|8]{64,-1}(SN)
1009
+ instance: Linear
1010
+ output_format: SAME
1011
+ weight_format: BFP[8|8]{64,-1}(SN)
1012
+ weight_sparseness: DENSE
1013
+ model.decoder.layers.19.final_layer_norm:
1014
+ approximation_function: LAYERNORM(fallback,4,float16)
1015
+ bias_format: SAME
1016
+ input_format: SAME
1017
+ instance: LayerNorm
1018
+ output_format: SAME
1019
+ weight_format: SAME
1020
+ model.decoder.layers.19.self_attn.dropout:
1021
+ approximation_function: NONE
1022
+ input_format: SAME
1023
+ instance: Dropout
1024
+ output_format: BFP[8|8]{64,-1}(SN)
1025
+ model.decoder.layers.19.self_attn.k_proj:
1026
+ accum_format: SAME
1027
+ approximation_function: NONE
1028
+ bias_format: SAME
1029
+ input_format: BFP[8|8]{64,-1}(SN)
1030
+ instance: Linear
1031
+ output_format: BFP[8|8]{64,-1}(SN)
1032
+ weight_format: BFP[8|8]{64,-1}(SN)
1033
+ weight_sparseness: DENSE
1034
+ model.decoder.layers.19.self_attn.out_proj:
1035
+ accum_format: SAME
1036
+ approximation_function: NONE
1037
+ bias_format: SAME
1038
+ input_format: BFP[8|8]{64,-1}(SN)
1039
+ instance: Linear
1040
+ output_format: SAME
1041
+ weight_format: BFP[8|8]{64,-1}(SN)
1042
+ weight_sparseness: DENSE
1043
+ model.decoder.layers.19.self_attn.q_proj:
1044
+ accum_format: SAME
1045
+ approximation_function: NONE
1046
+ bias_format: SAME
1047
+ input_format: BFP[8|8]{64,-1}(SN)
1048
+ instance: Linear
1049
+ output_format: BFP[8|8]{64,-1}(SN)
1050
+ weight_format: BFP[8|8]{64,-1}(SN)
1051
+ weight_sparseness: DENSE
1052
+ model.decoder.layers.19.self_attn.softmax:
1053
+ approximation_function: SOFTMAX(base2,float16)
1054
+ input_format: SAME
1055
+ instance: Softmax
1056
+ output_format: SAME
1057
+ model.decoder.layers.19.self_attn.v_proj:
1058
+ accum_format: SAME
1059
+ approximation_function: NONE
1060
+ bias_format: SAME
1061
+ input_format: BFP[8|8]{64,-1}(SN)
1062
+ instance: Linear
1063
+ output_format: BFP[8|8]{64,-1}(SN)
1064
+ weight_format: BFP[8|8]{64,-1}(SN)
1065
+ weight_sparseness: DENSE
1066
+ model.decoder.layers.19.self_attn_layer_norm:
1067
+ approximation_function: LAYERNORM(fallback,4,float16)
1068
+ bias_format: SAME
1069
+ input_format: SAME
1070
+ instance: LayerNorm
1071
+ output_format: SAME
1072
+ weight_format: SAME
1073
+ model.decoder.layers.2.activation_fn:
1074
+ approximation_function: NONE
1075
+ input_format: SAME
1076
+ instance: ReLU
1077
+ output_format: SAME
1078
+ model.decoder.layers.2.dropout:
1079
+ approximation_function: NONE
1080
+ input_format: SAME
1081
+ instance: Dropout
1082
+ output_format: SAME
1083
+ model.decoder.layers.2.fc1:
1084
+ accum_format: SAME
1085
+ approximation_function: NONE
1086
+ bias_format: SAME
1087
+ input_format: BFP[8|8]{64,-1}(SN)
1088
+ instance: Linear
1089
+ output_format: SAME
1090
+ weight_format: BFP[8|8]{64,-1}(SN)
1091
+ weight_sparseness: DENSE
1092
+ model.decoder.layers.2.fc2:
1093
+ accum_format: SAME
1094
+ approximation_function: NONE
1095
+ bias_format: SAME
1096
+ input_format: BFP[8|8]{64,-1}(SN)
1097
+ instance: Linear
1098
+ output_format: SAME
1099
+ weight_format: BFP[8|8]{64,-1}(SN)
1100
+ weight_sparseness: DENSE
1101
+ model.decoder.layers.2.final_layer_norm:
1102
+ approximation_function: LAYERNORM(fallback,4,float16)
1103
+ bias_format: SAME
1104
+ input_format: SAME
1105
+ instance: LayerNorm
1106
+ output_format: SAME
1107
+ weight_format: SAME
1108
+ model.decoder.layers.2.self_attn.dropout:
1109
+ approximation_function: NONE
1110
+ input_format: SAME
1111
+ instance: Dropout
1112
+ output_format: BFP[8|8]{64,-1}(SN)
1113
+ model.decoder.layers.2.self_attn.k_proj:
1114
+ accum_format: SAME
1115
+ approximation_function: NONE
1116
+ bias_format: SAME
1117
+ input_format: BFP[8|8]{64,-1}(SN)
1118
+ instance: Linear
1119
+ output_format: BFP[8|8]{64,-1}(SN)
1120
+ weight_format: BFP[8|8]{64,-1}(SN)
1121
+ weight_sparseness: DENSE
1122
+ model.decoder.layers.2.self_attn.out_proj:
1123
+ accum_format: SAME
1124
+ approximation_function: NONE
1125
+ bias_format: SAME
1126
+ input_format: BFP[8|8]{64,-1}(SN)
1127
+ instance: Linear
1128
+ output_format: SAME
1129
+ weight_format: BFP[8|8]{64,-1}(SN)
1130
+ weight_sparseness: DENSE
1131
+ model.decoder.layers.2.self_attn.q_proj:
1132
+ accum_format: SAME
1133
+ approximation_function: NONE
1134
+ bias_format: SAME
1135
+ input_format: BFP[8|8]{64,-1}(SN)
1136
+ instance: Linear
1137
+ output_format: BFP[8|8]{64,-1}(SN)
1138
+ weight_format: BFP[8|8]{64,-1}(SN)
1139
+ weight_sparseness: DENSE
1140
+ model.decoder.layers.2.self_attn.softmax:
1141
+ approximation_function: SOFTMAX(base2,float16)
1142
+ input_format: SAME
1143
+ instance: Softmax
1144
+ output_format: SAME
1145
+ model.decoder.layers.2.self_attn.v_proj:
1146
+ accum_format: SAME
1147
+ approximation_function: NONE
1148
+ bias_format: SAME
1149
+ input_format: BFP[8|8]{64,-1}(SN)
1150
+ instance: Linear
1151
+ output_format: BFP[8|8]{64,-1}(SN)
1152
+ weight_format: BFP[8|8]{64,-1}(SN)
1153
+ weight_sparseness: DENSE
1154
+ model.decoder.layers.2.self_attn_layer_norm:
1155
+ approximation_function: LAYERNORM(fallback,4,float16)
1156
+ bias_format: SAME
1157
+ input_format: SAME
1158
+ instance: LayerNorm
1159
+ output_format: SAME
1160
+ weight_format: SAME
1161
+ model.decoder.layers.20.activation_fn:
1162
+ approximation_function: NONE
1163
+ input_format: SAME
1164
+ instance: ReLU
1165
+ output_format: SAME
1166
+ model.decoder.layers.20.dropout:
1167
+ approximation_function: NONE
1168
+ input_format: SAME
1169
+ instance: Dropout
1170
+ output_format: SAME
1171
+ model.decoder.layers.20.fc1:
1172
+ accum_format: SAME
1173
+ approximation_function: NONE
1174
+ bias_format: SAME
1175
+ input_format: BFP[8|8]{64,-1}(SN)
1176
+ instance: Linear
1177
+ output_format: SAME
1178
+ weight_format: BFP[8|8]{64,-1}(SN)
1179
+ weight_sparseness: DENSE
1180
+ model.decoder.layers.20.fc2:
1181
+ accum_format: SAME
1182
+ approximation_function: NONE
1183
+ bias_format: SAME
1184
+ input_format: BFP[8|8]{64,-1}(SN)
1185
+ instance: Linear
1186
+ output_format: SAME
1187
+ weight_format: BFP[8|8]{64,-1}(SN)
1188
+ weight_sparseness: DENSE
1189
+ model.decoder.layers.20.final_layer_norm:
1190
+ approximation_function: LAYERNORM(fallback,4,float16)
1191
+ bias_format: SAME
1192
+ input_format: SAME
1193
+ instance: LayerNorm
1194
+ output_format: SAME
1195
+ weight_format: SAME
1196
+ model.decoder.layers.20.self_attn.dropout:
1197
+ approximation_function: NONE
1198
+ input_format: SAME
1199
+ instance: Dropout
1200
+ output_format: BFP[8|8]{64,-1}(SN)
1201
+ model.decoder.layers.20.self_attn.k_proj:
1202
+ accum_format: SAME
1203
+ approximation_function: NONE
1204
+ bias_format: SAME
1205
+ input_format: BFP[8|8]{64,-1}(SN)
1206
+ instance: Linear
1207
+ output_format: BFP[8|8]{64,-1}(SN)
1208
+ weight_format: BFP[8|8]{64,-1}(SN)
1209
+ weight_sparseness: DENSE
1210
+ model.decoder.layers.20.self_attn.out_proj:
1211
+ accum_format: SAME
1212
+ approximation_function: NONE
1213
+ bias_format: SAME
1214
+ input_format: BFP[8|8]{64,-1}(SN)
1215
+ instance: Linear
1216
+ output_format: SAME
1217
+ weight_format: BFP[8|8]{64,-1}(SN)
1218
+ weight_sparseness: DENSE
1219
+ model.decoder.layers.20.self_attn.q_proj:
1220
+ accum_format: SAME
1221
+ approximation_function: NONE
1222
+ bias_format: SAME
1223
+ input_format: BFP[8|8]{64,-1}(SN)
1224
+ instance: Linear
1225
+ output_format: BFP[8|8]{64,-1}(SN)
1226
+ weight_format: BFP[8|8]{64,-1}(SN)
1227
+ weight_sparseness: DENSE
1228
+ model.decoder.layers.20.self_attn.softmax:
1229
+ approximation_function: SOFTMAX(base2,float16)
1230
+ input_format: SAME
1231
+ instance: Softmax
1232
+ output_format: SAME
1233
+ model.decoder.layers.20.self_attn.v_proj:
1234
+ accum_format: SAME
1235
+ approximation_function: NONE
1236
+ bias_format: SAME
1237
+ input_format: BFP[8|8]{64,-1}(SN)
1238
+ instance: Linear
1239
+ output_format: BFP[8|8]{64,-1}(SN)
1240
+ weight_format: BFP[8|8]{64,-1}(SN)
1241
+ weight_sparseness: DENSE
1242
+ model.decoder.layers.20.self_attn_layer_norm:
1243
+ approximation_function: LAYERNORM(fallback,4,float16)
1244
+ bias_format: SAME
1245
+ input_format: SAME
1246
+ instance: LayerNorm
1247
+ output_format: SAME
1248
+ weight_format: SAME
1249
+ model.decoder.layers.21.activation_fn:
1250
+ approximation_function: NONE
1251
+ input_format: SAME
1252
+ instance: ReLU
1253
+ output_format: SAME
1254
+ model.decoder.layers.21.dropout:
1255
+ approximation_function: NONE
1256
+ input_format: SAME
1257
+ instance: Dropout
1258
+ output_format: SAME
1259
+ model.decoder.layers.21.fc1:
1260
+ accum_format: SAME
1261
+ approximation_function: NONE
1262
+ bias_format: SAME
1263
+ input_format: BFP[8|8]{64,-1}(SN)
1264
+ instance: Linear
1265
+ output_format: SAME
1266
+ weight_format: BFP[8|8]{64,-1}(SN)
1267
+ weight_sparseness: DENSE
1268
+ model.decoder.layers.21.fc2:
1269
+ accum_format: SAME
1270
+ approximation_function: NONE
1271
+ bias_format: SAME
1272
+ input_format: BFP[8|8]{64,-1}(SN)
1273
+ instance: Linear
1274
+ output_format: SAME
1275
+ weight_format: BFP[8|8]{64,-1}(SN)
1276
+ weight_sparseness: DENSE
1277
+ model.decoder.layers.21.final_layer_norm:
1278
+ approximation_function: LAYERNORM(fallback,4,float16)
1279
+ bias_format: SAME
1280
+ input_format: SAME
1281
+ instance: LayerNorm
1282
+ output_format: SAME
1283
+ weight_format: SAME
1284
+ model.decoder.layers.21.self_attn.dropout:
1285
+ approximation_function: NONE
1286
+ input_format: SAME
1287
+ instance: Dropout
1288
+ output_format: BFP[8|8]{64,-1}(SN)
1289
+ model.decoder.layers.21.self_attn.k_proj:
1290
+ accum_format: SAME
1291
+ approximation_function: NONE
1292
+ bias_format: SAME
1293
+ input_format: BFP[8|8]{64,-1}(SN)
1294
+ instance: Linear
1295
+ output_format: BFP[8|8]{64,-1}(SN)
1296
+ weight_format: BFP[8|8]{64,-1}(SN)
1297
+ weight_sparseness: DENSE
1298
+ model.decoder.layers.21.self_attn.out_proj:
1299
+ accum_format: SAME
1300
+ approximation_function: NONE
1301
+ bias_format: SAME
1302
+ input_format: BFP[8|8]{64,-1}(SN)
1303
+ instance: Linear
1304
+ output_format: SAME
1305
+ weight_format: BFP[8|8]{64,-1}(SN)
1306
+ weight_sparseness: DENSE
1307
+ model.decoder.layers.21.self_attn.q_proj:
1308
+ accum_format: SAME
1309
+ approximation_function: NONE
1310
+ bias_format: SAME
1311
+ input_format: BFP[8|8]{64,-1}(SN)
1312
+ instance: Linear
1313
+ output_format: BFP[8|8]{64,-1}(SN)
1314
+ weight_format: BFP[8|8]{64,-1}(SN)
1315
+ weight_sparseness: DENSE
1316
+ model.decoder.layers.21.self_attn.softmax:
1317
+ approximation_function: SOFTMAX(base2,float16)
1318
+ input_format: SAME
1319
+ instance: Softmax
1320
+ output_format: SAME
1321
+ model.decoder.layers.21.self_attn.v_proj:
1322
+ accum_format: SAME
1323
+ approximation_function: NONE
1324
+ bias_format: SAME
1325
+ input_format: BFP[8|8]{64,-1}(SN)
1326
+ instance: Linear
1327
+ output_format: BFP[8|8]{64,-1}(SN)
1328
+ weight_format: BFP[8|8]{64,-1}(SN)
1329
+ weight_sparseness: DENSE
1330
+ model.decoder.layers.21.self_attn_layer_norm:
1331
+ approximation_function: LAYERNORM(fallback,4,float16)
1332
+ bias_format: SAME
1333
+ input_format: SAME
1334
+ instance: LayerNorm
1335
+ output_format: SAME
1336
+ weight_format: SAME
1337
+ model.decoder.layers.22.activation_fn:
1338
+ approximation_function: NONE
1339
+ input_format: SAME
1340
+ instance: ReLU
1341
+ output_format: SAME
1342
+ model.decoder.layers.22.dropout:
1343
+ approximation_function: NONE
1344
+ input_format: SAME
1345
+ instance: Dropout
1346
+ output_format: SAME
1347
+ model.decoder.layers.22.fc1:
1348
+ accum_format: SAME
1349
+ approximation_function: NONE
1350
+ bias_format: SAME
1351
+ input_format: BFP[8|8]{64,-1}(SN)
1352
+ instance: Linear
1353
+ output_format: SAME
1354
+ weight_format: BFP[8|8]{64,-1}(SN)
1355
+ weight_sparseness: DENSE
1356
+ model.decoder.layers.22.fc2:
1357
+ accum_format: SAME
1358
+ approximation_function: NONE
1359
+ bias_format: SAME
1360
+ input_format: BFP[8|8]{64,-1}(SN)
1361
+ instance: Linear
1362
+ output_format: SAME
1363
+ weight_format: BFP[8|8]{64,-1}(SN)
1364
+ weight_sparseness: DENSE
1365
+ model.decoder.layers.22.final_layer_norm:
1366
+ approximation_function: LAYERNORM(fallback,4,float16)
1367
+ bias_format: SAME
1368
+ input_format: SAME
1369
+ instance: LayerNorm
1370
+ output_format: SAME
1371
+ weight_format: SAME
1372
+ model.decoder.layers.22.self_attn.dropout:
1373
+ approximation_function: NONE
1374
+ input_format: SAME
1375
+ instance: Dropout
1376
+ output_format: BFP[8|8]{64,-1}(SN)
1377
+ model.decoder.layers.22.self_attn.k_proj:
1378
+ accum_format: SAME
1379
+ approximation_function: NONE
1380
+ bias_format: SAME
1381
+ input_format: BFP[8|8]{64,-1}(SN)
1382
+ instance: Linear
1383
+ output_format: BFP[8|8]{64,-1}(SN)
1384
+ weight_format: BFP[8|8]{64,-1}(SN)
1385
+ weight_sparseness: DENSE
1386
+ model.decoder.layers.22.self_attn.out_proj:
1387
+ accum_format: SAME
1388
+ approximation_function: NONE
1389
+ bias_format: SAME
1390
+ input_format: BFP[8|8]{64,-1}(SN)
1391
+ instance: Linear
1392
+ output_format: SAME
1393
+ weight_format: BFP[8|8]{64,-1}(SN)
1394
+ weight_sparseness: DENSE
1395
+ model.decoder.layers.22.self_attn.q_proj:
1396
+ accum_format: SAME
1397
+ approximation_function: NONE
1398
+ bias_format: SAME
1399
+ input_format: BFP[8|8]{64,-1}(SN)
1400
+ instance: Linear
1401
+ output_format: BFP[8|8]{64,-1}(SN)
1402
+ weight_format: BFP[8|8]{64,-1}(SN)
1403
+ weight_sparseness: DENSE
1404
+ model.decoder.layers.22.self_attn.softmax:
1405
+ approximation_function: SOFTMAX(base2,float16)
1406
+ input_format: SAME
1407
+ instance: Softmax
1408
+ output_format: SAME
1409
+ model.decoder.layers.22.self_attn.v_proj:
1410
+ accum_format: SAME
1411
+ approximation_function: NONE
1412
+ bias_format: SAME
1413
+ input_format: BFP[8|8]{64,-1}(SN)
1414
+ instance: Linear
1415
+ output_format: BFP[8|8]{64,-1}(SN)
1416
+ weight_format: BFP[8|8]{64,-1}(SN)
1417
+ weight_sparseness: DENSE
1418
+ model.decoder.layers.22.self_attn_layer_norm:
1419
+ approximation_function: LAYERNORM(fallback,4,float16)
1420
+ bias_format: SAME
1421
+ input_format: SAME
1422
+ instance: LayerNorm
1423
+ output_format: SAME
1424
+ weight_format: SAME
1425
+ model.decoder.layers.23.activation_fn:
1426
+ approximation_function: NONE
1427
+ input_format: SAME
1428
+ instance: ReLU
1429
+ output_format: SAME
1430
+ model.decoder.layers.23.dropout:
1431
+ approximation_function: NONE
1432
+ input_format: SAME
1433
+ instance: Dropout
1434
+ output_format: SAME
1435
+ model.decoder.layers.23.fc1:
1436
+ accum_format: SAME
1437
+ approximation_function: NONE
1438
+ bias_format: SAME
1439
+ input_format: BFP[8|8]{64,-1}(SN)
1440
+ instance: Linear
1441
+ output_format: SAME
1442
+ weight_format: BFP[8|8]{64,-1}(SN)
1443
+ weight_sparseness: DENSE
1444
+ model.decoder.layers.23.fc2:
1445
+ accum_format: SAME
1446
+ approximation_function: NONE
1447
+ bias_format: SAME
1448
+ input_format: BFP[8|8]{64,-1}(SN)
1449
+ instance: Linear
1450
+ output_format: SAME
1451
+ weight_format: BFP[8|8]{64,-1}(SN)
1452
+ weight_sparseness: DENSE
1453
+ model.decoder.layers.23.final_layer_norm:
1454
+ approximation_function: LAYERNORM(fallback,4,float16)
1455
+ bias_format: SAME
1456
+ input_format: SAME
1457
+ instance: LayerNorm
1458
+ output_format: SAME
1459
+ weight_format: SAME
1460
+ model.decoder.layers.23.self_attn.dropout:
1461
+ approximation_function: NONE
1462
+ input_format: SAME
1463
+ instance: Dropout
1464
+ output_format: BFP[8|8]{64,-1}(SN)
1465
+ model.decoder.layers.23.self_attn.k_proj:
1466
+ accum_format: SAME
1467
+ approximation_function: NONE
1468
+ bias_format: SAME
1469
+ input_format: BFP[8|8]{64,-1}(SN)
1470
+ instance: Linear
1471
+ output_format: BFP[8|8]{64,-1}(SN)
1472
+ weight_format: BFP[8|8]{64,-1}(SN)
1473
+ weight_sparseness: DENSE
1474
+ model.decoder.layers.23.self_attn.out_proj:
1475
+ accum_format: SAME
1476
+ approximation_function: NONE
1477
+ bias_format: SAME
1478
+ input_format: BFP[8|8]{64,-1}(SN)
1479
+ instance: Linear
1480
+ output_format: SAME
1481
+ weight_format: BFP[8|8]{64,-1}(SN)
1482
+ weight_sparseness: DENSE
1483
+ model.decoder.layers.23.self_attn.q_proj:
1484
+ accum_format: SAME
1485
+ approximation_function: NONE
1486
+ bias_format: SAME
1487
+ input_format: BFP[8|8]{64,-1}(SN)
1488
+ instance: Linear
1489
+ output_format: BFP[8|8]{64,-1}(SN)
1490
+ weight_format: BFP[8|8]{64,-1}(SN)
1491
+ weight_sparseness: DENSE
1492
+ model.decoder.layers.23.self_attn.softmax:
1493
+ approximation_function: SOFTMAX(base2,float16)
1494
+ input_format: SAME
1495
+ instance: Softmax
1496
+ output_format: SAME
1497
+ model.decoder.layers.23.self_attn.v_proj:
1498
+ accum_format: SAME
1499
+ approximation_function: NONE
1500
+ bias_format: SAME
1501
+ input_format: BFP[8|8]{64,-1}(SN)
1502
+ instance: Linear
1503
+ output_format: BFP[8|8]{64,-1}(SN)
1504
+ weight_format: BFP[8|8]{64,-1}(SN)
1505
+ weight_sparseness: DENSE
1506
+ model.decoder.layers.23.self_attn_layer_norm:
1507
+ approximation_function: LAYERNORM(fallback,4,float16)
1508
+ bias_format: SAME
1509
+ input_format: SAME
1510
+ instance: LayerNorm
1511
+ output_format: SAME
1512
+ weight_format: SAME
1513
+ model.decoder.layers.24.activation_fn:
1514
+ approximation_function: NONE
1515
+ input_format: SAME
1516
+ instance: ReLU
1517
+ output_format: SAME
1518
+ model.decoder.layers.24.dropout:
1519
+ approximation_function: NONE
1520
+ input_format: SAME
1521
+ instance: Dropout
1522
+ output_format: SAME
1523
+ model.decoder.layers.24.fc1:
1524
+ accum_format: SAME
1525
+ approximation_function: NONE
1526
+ bias_format: SAME
1527
+ input_format: BFP[8|8]{64,-1}(SN)
1528
+ instance: Linear
1529
+ output_format: SAME
1530
+ weight_format: BFP[8|8]{64,-1}(SN)
1531
+ weight_sparseness: DENSE
1532
+ model.decoder.layers.24.fc2:
1533
+ accum_format: SAME
1534
+ approximation_function: NONE
1535
+ bias_format: SAME
1536
+ input_format: BFP[8|8]{64,-1}(SN)
1537
+ instance: Linear
1538
+ output_format: SAME
1539
+ weight_format: BFP[8|8]{64,-1}(SN)
1540
+ weight_sparseness: DENSE
1541
+ model.decoder.layers.24.final_layer_norm:
1542
+ approximation_function: LAYERNORM(fallback,4,float16)
1543
+ bias_format: SAME
1544
+ input_format: SAME
1545
+ instance: LayerNorm
1546
+ output_format: SAME
1547
+ weight_format: SAME
1548
+ model.decoder.layers.24.self_attn.dropout:
1549
+ approximation_function: NONE
1550
+ input_format: SAME
1551
+ instance: Dropout
1552
+ output_format: BFP[8|8]{64,-1}(SN)
1553
+ model.decoder.layers.24.self_attn.k_proj:
1554
+ accum_format: SAME
1555
+ approximation_function: NONE
1556
+ bias_format: SAME
1557
+ input_format: BFP[8|8]{64,-1}(SN)
1558
+ instance: Linear
1559
+ output_format: BFP[8|8]{64,-1}(SN)
1560
+ weight_format: BFP[8|8]{64,-1}(SN)
1561
+ weight_sparseness: DENSE
1562
+ model.decoder.layers.24.self_attn.out_proj:
1563
+ accum_format: SAME
1564
+ approximation_function: NONE
1565
+ bias_format: SAME
1566
+ input_format: BFP[8|8]{64,-1}(SN)
1567
+ instance: Linear
1568
+ output_format: SAME
1569
+ weight_format: BFP[8|8]{64,-1}(SN)
1570
+ weight_sparseness: DENSE
1571
+ model.decoder.layers.24.self_attn.q_proj:
1572
+ accum_format: SAME
1573
+ approximation_function: NONE
1574
+ bias_format: SAME
1575
+ input_format: BFP[8|8]{64,-1}(SN)
1576
+ instance: Linear
1577
+ output_format: BFP[8|8]{64,-1}(SN)
1578
+ weight_format: BFP[8|8]{64,-1}(SN)
1579
+ weight_sparseness: DENSE
1580
+ model.decoder.layers.24.self_attn.softmax:
1581
+ approximation_function: SOFTMAX(base2,float16)
1582
+ input_format: SAME
1583
+ instance: Softmax
1584
+ output_format: SAME
1585
+ model.decoder.layers.24.self_attn.v_proj:
1586
+ accum_format: SAME
1587
+ approximation_function: NONE
1588
+ bias_format: SAME
1589
+ input_format: BFP[8|8]{64,-1}(SN)
1590
+ instance: Linear
1591
+ output_format: BFP[8|8]{64,-1}(SN)
1592
+ weight_format: BFP[8|8]{64,-1}(SN)
1593
+ weight_sparseness: DENSE
1594
+ model.decoder.layers.24.self_attn_layer_norm:
1595
+ approximation_function: LAYERNORM(fallback,4,float16)
1596
+ bias_format: SAME
1597
+ input_format: SAME
1598
+ instance: LayerNorm
1599
+ output_format: SAME
1600
+ weight_format: SAME
1601
+ model.decoder.layers.25.activation_fn:
1602
+ approximation_function: NONE
1603
+ input_format: SAME
1604
+ instance: ReLU
1605
+ output_format: SAME
1606
+ model.decoder.layers.25.dropout:
1607
+ approximation_function: NONE
1608
+ input_format: SAME
1609
+ instance: Dropout
1610
+ output_format: SAME
1611
+ model.decoder.layers.25.fc1:
1612
+ accum_format: SAME
1613
+ approximation_function: NONE
1614
+ bias_format: SAME
1615
+ input_format: BFP[8|8]{64,-1}(SN)
1616
+ instance: Linear
1617
+ output_format: SAME
1618
+ weight_format: BFP[8|8]{64,-1}(SN)
1619
+ weight_sparseness: DENSE
1620
+ model.decoder.layers.25.fc2:
1621
+ accum_format: SAME
1622
+ approximation_function: NONE
1623
+ bias_format: SAME
1624
+ input_format: BFP[8|8]{64,-1}(SN)
1625
+ instance: Linear
1626
+ output_format: SAME
1627
+ weight_format: BFP[8|8]{64,-1}(SN)
1628
+ weight_sparseness: DENSE
1629
+ model.decoder.layers.25.final_layer_norm:
1630
+ approximation_function: LAYERNORM(fallback,4,float16)
1631
+ bias_format: SAME
1632
+ input_format: SAME
1633
+ instance: LayerNorm
1634
+ output_format: SAME
1635
+ weight_format: SAME
1636
+ model.decoder.layers.25.self_attn.dropout:
1637
+ approximation_function: NONE
1638
+ input_format: SAME
1639
+ instance: Dropout
1640
+ output_format: BFP[8|8]{64,-1}(SN)
1641
+ model.decoder.layers.25.self_attn.k_proj:
1642
+ accum_format: SAME
1643
+ approximation_function: NONE
1644
+ bias_format: SAME
1645
+ input_format: BFP[8|8]{64,-1}(SN)
1646
+ instance: Linear
1647
+ output_format: BFP[8|8]{64,-1}(SN)
1648
+ weight_format: BFP[8|8]{64,-1}(SN)
1649
+ weight_sparseness: DENSE
1650
+ model.decoder.layers.25.self_attn.out_proj:
1651
+ accum_format: SAME
1652
+ approximation_function: NONE
1653
+ bias_format: SAME
1654
+ input_format: BFP[8|8]{64,-1}(SN)
1655
+ instance: Linear
1656
+ output_format: SAME
1657
+ weight_format: BFP[8|8]{64,-1}(SN)
1658
+ weight_sparseness: DENSE
1659
+ model.decoder.layers.25.self_attn.q_proj:
1660
+ accum_format: SAME
1661
+ approximation_function: NONE
1662
+ bias_format: SAME
1663
+ input_format: BFP[8|8]{64,-1}(SN)
1664
+ instance: Linear
1665
+ output_format: BFP[8|8]{64,-1}(SN)
1666
+ weight_format: BFP[8|8]{64,-1}(SN)
1667
+ weight_sparseness: DENSE
1668
+ model.decoder.layers.25.self_attn.softmax:
1669
+ approximation_function: SOFTMAX(base2,float16)
1670
+ input_format: SAME
1671
+ instance: Softmax
1672
+ output_format: SAME
1673
+ model.decoder.layers.25.self_attn.v_proj:
1674
+ accum_format: SAME
1675
+ approximation_function: NONE
1676
+ bias_format: SAME
1677
+ input_format: BFP[8|8]{64,-1}(SN)
1678
+ instance: Linear
1679
+ output_format: BFP[8|8]{64,-1}(SN)
1680
+ weight_format: BFP[8|8]{64,-1}(SN)
1681
+ weight_sparseness: DENSE
1682
+ model.decoder.layers.25.self_attn_layer_norm:
1683
+ approximation_function: LAYERNORM(fallback,4,float16)
1684
+ bias_format: SAME
1685
+ input_format: SAME
1686
+ instance: LayerNorm
1687
+ output_format: SAME
1688
+ weight_format: SAME
1689
+ model.decoder.layers.26.activation_fn:
1690
+ approximation_function: NONE
1691
+ input_format: SAME
1692
+ instance: ReLU
1693
+ output_format: SAME
1694
+ model.decoder.layers.26.dropout:
1695
+ approximation_function: NONE
1696
+ input_format: SAME
1697
+ instance: Dropout
1698
+ output_format: SAME
1699
+ model.decoder.layers.26.fc1:
1700
+ accum_format: SAME
1701
+ approximation_function: NONE
1702
+ bias_format: SAME
1703
+ input_format: BFP[8|8]{64,-1}(SN)
1704
+ instance: Linear
1705
+ output_format: SAME
1706
+ weight_format: BFP[8|8]{64,-1}(SN)
1707
+ weight_sparseness: DENSE
1708
+ model.decoder.layers.26.fc2:
1709
+ accum_format: SAME
1710
+ approximation_function: NONE
1711
+ bias_format: SAME
1712
+ input_format: BFP[8|8]{64,-1}(SN)
1713
+ instance: Linear
1714
+ output_format: SAME
1715
+ weight_format: BFP[8|8]{64,-1}(SN)
1716
+ weight_sparseness: DENSE
1717
+ model.decoder.layers.26.final_layer_norm:
1718
+ approximation_function: LAYERNORM(fallback,4,float16)
1719
+ bias_format: SAME
1720
+ input_format: SAME
1721
+ instance: LayerNorm
1722
+ output_format: SAME
1723
+ weight_format: SAME
1724
+ model.decoder.layers.26.self_attn.dropout:
1725
+ approximation_function: NONE
1726
+ input_format: SAME
1727
+ instance: Dropout
1728
+ output_format: BFP[8|8]{64,-1}(SN)
1729
+ model.decoder.layers.26.self_attn.k_proj:
1730
+ accum_format: SAME
1731
+ approximation_function: NONE
1732
+ bias_format: SAME
1733
+ input_format: BFP[8|8]{64,-1}(SN)
1734
+ instance: Linear
1735
+ output_format: BFP[8|8]{64,-1}(SN)
1736
+ weight_format: BFP[8|8]{64,-1}(SN)
1737
+ weight_sparseness: DENSE
1738
+ model.decoder.layers.26.self_attn.out_proj:
1739
+ accum_format: SAME
1740
+ approximation_function: NONE
1741
+ bias_format: SAME
1742
+ input_format: BFP[8|8]{64,-1}(SN)
1743
+ instance: Linear
1744
+ output_format: SAME
1745
+ weight_format: BFP[8|8]{64,-1}(SN)
1746
+ weight_sparseness: DENSE
1747
+ model.decoder.layers.26.self_attn.q_proj:
1748
+ accum_format: SAME
1749
+ approximation_function: NONE
1750
+ bias_format: SAME
1751
+ input_format: BFP[8|8]{64,-1}(SN)
1752
+ instance: Linear
1753
+ output_format: BFP[8|8]{64,-1}(SN)
1754
+ weight_format: BFP[8|8]{64,-1}(SN)
1755
+ weight_sparseness: DENSE
1756
+ model.decoder.layers.26.self_attn.softmax:
1757
+ approximation_function: SOFTMAX(base2,float16)
1758
+ input_format: SAME
1759
+ instance: Softmax
1760
+ output_format: SAME
1761
+ model.decoder.layers.26.self_attn.v_proj:
1762
+ accum_format: SAME
1763
+ approximation_function: NONE
1764
+ bias_format: SAME
1765
+ input_format: BFP[8|8]{64,-1}(SN)
1766
+ instance: Linear
1767
+ output_format: BFP[8|8]{64,-1}(SN)
1768
+ weight_format: BFP[8|8]{64,-1}(SN)
1769
+ weight_sparseness: DENSE
1770
+ model.decoder.layers.26.self_attn_layer_norm:
1771
+ approximation_function: LAYERNORM(fallback,4,float16)
1772
+ bias_format: SAME
1773
+ input_format: SAME
1774
+ instance: LayerNorm
1775
+ output_format: SAME
1776
+ weight_format: SAME
1777
+ model.decoder.layers.27.activation_fn:
1778
+ approximation_function: NONE
1779
+ input_format: SAME
1780
+ instance: ReLU
1781
+ output_format: SAME
1782
+ model.decoder.layers.27.dropout:
1783
+ approximation_function: NONE
1784
+ input_format: SAME
1785
+ instance: Dropout
1786
+ output_format: SAME
1787
+ model.decoder.layers.27.fc1:
1788
+ accum_format: SAME
1789
+ approximation_function: NONE
1790
+ bias_format: SAME
1791
+ input_format: BFP[8|8]{64,-1}(SN)
1792
+ instance: Linear
1793
+ output_format: SAME
1794
+ weight_format: BFP[8|8]{64,-1}(SN)
1795
+ weight_sparseness: DENSE
1796
+ model.decoder.layers.27.fc2:
1797
+ accum_format: SAME
1798
+ approximation_function: NONE
1799
+ bias_format: SAME
1800
+ input_format: BFP[8|8]{64,-1}(SN)
1801
+ instance: Linear
1802
+ output_format: SAME
1803
+ weight_format: BFP[8|8]{64,-1}(SN)
1804
+ weight_sparseness: DENSE
1805
+ model.decoder.layers.27.final_layer_norm:
1806
+ approximation_function: LAYERNORM(fallback,4,float16)
1807
+ bias_format: SAME
1808
+ input_format: SAME
1809
+ instance: LayerNorm
1810
+ output_format: SAME
1811
+ weight_format: SAME
1812
+ model.decoder.layers.27.self_attn.dropout:
1813
+ approximation_function: NONE
1814
+ input_format: SAME
1815
+ instance: Dropout
1816
+ output_format: BFP[8|8]{64,-1}(SN)
1817
+ model.decoder.layers.27.self_attn.k_proj:
1818
+ accum_format: SAME
1819
+ approximation_function: NONE
1820
+ bias_format: SAME
1821
+ input_format: BFP[8|8]{64,-1}(SN)
1822
+ instance: Linear
1823
+ output_format: BFP[8|8]{64,-1}(SN)
1824
+ weight_format: BFP[8|8]{64,-1}(SN)
1825
+ weight_sparseness: DENSE
1826
+ model.decoder.layers.27.self_attn.out_proj:
1827
+ accum_format: SAME
1828
+ approximation_function: NONE
1829
+ bias_format: SAME
1830
+ input_format: BFP[8|8]{64,-1}(SN)
1831
+ instance: Linear
1832
+ output_format: SAME
1833
+ weight_format: BFP[8|8]{64,-1}(SN)
1834
+ weight_sparseness: DENSE
1835
+ model.decoder.layers.27.self_attn.q_proj:
1836
+ accum_format: SAME
1837
+ approximation_function: NONE
1838
+ bias_format: SAME
1839
+ input_format: BFP[8|8]{64,-1}(SN)
1840
+ instance: Linear
1841
+ output_format: BFP[8|8]{64,-1}(SN)
1842
+ weight_format: BFP[8|8]{64,-1}(SN)
1843
+ weight_sparseness: DENSE
1844
+ model.decoder.layers.27.self_attn.softmax:
1845
+ approximation_function: SOFTMAX(base2,float16)
1846
+ input_format: SAME
1847
+ instance: Softmax
1848
+ output_format: SAME
1849
+ model.decoder.layers.27.self_attn.v_proj:
1850
+ accum_format: SAME
1851
+ approximation_function: NONE
1852
+ bias_format: SAME
1853
+ input_format: BFP[8|8]{64,-1}(SN)
1854
+ instance: Linear
1855
+ output_format: BFP[8|8]{64,-1}(SN)
1856
+ weight_format: BFP[8|8]{64,-1}(SN)
1857
+ weight_sparseness: DENSE
1858
+ model.decoder.layers.27.self_attn_layer_norm:
1859
+ approximation_function: LAYERNORM(fallback,4,float16)
1860
+ bias_format: SAME
1861
+ input_format: SAME
1862
+ instance: LayerNorm
1863
+ output_format: SAME
1864
+ weight_format: SAME
1865
+ model.decoder.layers.28.activation_fn:
1866
+ approximation_function: NONE
1867
+ input_format: SAME
1868
+ instance: ReLU
1869
+ output_format: SAME
1870
+ model.decoder.layers.28.dropout:
1871
+ approximation_function: NONE
1872
+ input_format: SAME
1873
+ instance: Dropout
1874
+ output_format: SAME
1875
+ model.decoder.layers.28.fc1:
1876
+ accum_format: SAME
1877
+ approximation_function: NONE
1878
+ bias_format: SAME
1879
+ input_format: BFP[8|8]{64,-1}(SN)
1880
+ instance: Linear
1881
+ output_format: SAME
1882
+ weight_format: BFP[8|8]{64,-1}(SN)
1883
+ weight_sparseness: DENSE
1884
+ model.decoder.layers.28.fc2:
1885
+ accum_format: SAME
1886
+ approximation_function: NONE
1887
+ bias_format: SAME
1888
+ input_format: BFP[8|8]{64,-1}(SN)
1889
+ instance: Linear
1890
+ output_format: SAME
1891
+ weight_format: BFP[8|8]{64,-1}(SN)
1892
+ weight_sparseness: DENSE
1893
+ model.decoder.layers.28.final_layer_norm:
1894
+ approximation_function: LAYERNORM(fallback,4,float16)
1895
+ bias_format: SAME
1896
+ input_format: SAME
1897
+ instance: LayerNorm
1898
+ output_format: SAME
1899
+ weight_format: SAME
1900
+ model.decoder.layers.28.self_attn.dropout:
1901
+ approximation_function: NONE
1902
+ input_format: SAME
1903
+ instance: Dropout
1904
+ output_format: BFP[8|8]{64,-1}(SN)
1905
+ model.decoder.layers.28.self_attn.k_proj:
1906
+ accum_format: SAME
1907
+ approximation_function: NONE
1908
+ bias_format: SAME
1909
+ input_format: BFP[8|8]{64,-1}(SN)
1910
+ instance: Linear
1911
+ output_format: BFP[8|8]{64,-1}(SN)
1912
+ weight_format: BFP[8|8]{64,-1}(SN)
1913
+ weight_sparseness: DENSE
1914
+ model.decoder.layers.28.self_attn.out_proj:
1915
+ accum_format: SAME
1916
+ approximation_function: NONE
1917
+ bias_format: SAME
1918
+ input_format: BFP[8|8]{64,-1}(SN)
1919
+ instance: Linear
1920
+ output_format: SAME
1921
+ weight_format: BFP[8|8]{64,-1}(SN)
1922
+ weight_sparseness: DENSE
1923
+ model.decoder.layers.28.self_attn.q_proj:
1924
+ accum_format: SAME
1925
+ approximation_function: NONE
1926
+ bias_format: SAME
1927
+ input_format: BFP[8|8]{64,-1}(SN)
1928
+ instance: Linear
1929
+ output_format: BFP[8|8]{64,-1}(SN)
1930
+ weight_format: BFP[8|8]{64,-1}(SN)
1931
+ weight_sparseness: DENSE
1932
+ model.decoder.layers.28.self_attn.softmax:
1933
+ approximation_function: SOFTMAX(base2,float16)
1934
+ input_format: SAME
1935
+ instance: Softmax
1936
+ output_format: SAME
1937
+ model.decoder.layers.28.self_attn.v_proj:
1938
+ accum_format: SAME
1939
+ approximation_function: NONE
1940
+ bias_format: SAME
1941
+ input_format: BFP[8|8]{64,-1}(SN)
1942
+ instance: Linear
1943
+ output_format: BFP[8|8]{64,-1}(SN)
1944
+ weight_format: BFP[8|8]{64,-1}(SN)
1945
+ weight_sparseness: DENSE
1946
+ model.decoder.layers.28.self_attn_layer_norm:
1947
+ approximation_function: LAYERNORM(fallback,4,float16)
1948
+ bias_format: SAME
1949
+ input_format: SAME
1950
+ instance: LayerNorm
1951
+ output_format: SAME
1952
+ weight_format: SAME
1953
+ model.decoder.layers.29.activation_fn:
1954
+ approximation_function: NONE
1955
+ input_format: SAME
1956
+ instance: ReLU
1957
+ output_format: SAME
1958
+ model.decoder.layers.29.dropout:
1959
+ approximation_function: NONE
1960
+ input_format: SAME
1961
+ instance: Dropout
1962
+ output_format: SAME
1963
+ model.decoder.layers.29.fc1:
1964
+ accum_format: SAME
1965
+ approximation_function: NONE
1966
+ bias_format: SAME
1967
+ input_format: BFP[8|8]{64,-1}(SN)
1968
+ instance: Linear
1969
+ output_format: SAME
1970
+ weight_format: BFP[8|8]{64,-1}(SN)
1971
+ weight_sparseness: DENSE
1972
+ model.decoder.layers.29.fc2:
1973
+ accum_format: SAME
1974
+ approximation_function: NONE
1975
+ bias_format: SAME
1976
+ input_format: BFP[8|8]{64,-1}(SN)
1977
+ instance: Linear
1978
+ output_format: SAME
1979
+ weight_format: BFP[8|8]{64,-1}(SN)
1980
+ weight_sparseness: DENSE
1981
+ model.decoder.layers.29.final_layer_norm:
1982
+ approximation_function: LAYERNORM(fallback,4,float16)
1983
+ bias_format: SAME
1984
+ input_format: SAME
1985
+ instance: LayerNorm
1986
+ output_format: SAME
1987
+ weight_format: SAME
1988
+ model.decoder.layers.29.self_attn.dropout:
1989
+ approximation_function: NONE
1990
+ input_format: SAME
1991
+ instance: Dropout
1992
+ output_format: BFP[8|8]{64,-1}(SN)
1993
+ model.decoder.layers.29.self_attn.k_proj:
1994
+ accum_format: SAME
1995
+ approximation_function: NONE
1996
+ bias_format: SAME
1997
+ input_format: BFP[8|8]{64,-1}(SN)
1998
+ instance: Linear
1999
+ output_format: BFP[8|8]{64,-1}(SN)
2000
+ weight_format: BFP[8|8]{64,-1}(SN)
2001
+ weight_sparseness: DENSE
2002
+ model.decoder.layers.29.self_attn.out_proj:
2003
+ accum_format: SAME
2004
+ approximation_function: NONE
2005
+ bias_format: SAME
2006
+ input_format: BFP[8|8]{64,-1}(SN)
2007
+ instance: Linear
2008
+ output_format: SAME
2009
+ weight_format: BFP[8|8]{64,-1}(SN)
2010
+ weight_sparseness: DENSE
2011
+ model.decoder.layers.29.self_attn.q_proj:
2012
+ accum_format: SAME
2013
+ approximation_function: NONE
2014
+ bias_format: SAME
2015
+ input_format: BFP[8|8]{64,-1}(SN)
2016
+ instance: Linear
2017
+ output_format: BFP[8|8]{64,-1}(SN)
2018
+ weight_format: BFP[8|8]{64,-1}(SN)
2019
+ weight_sparseness: DENSE
2020
+ model.decoder.layers.29.self_attn.softmax:
2021
+ approximation_function: SOFTMAX(base2,float16)
2022
+ input_format: SAME
2023
+ instance: Softmax
2024
+ output_format: SAME
2025
+ model.decoder.layers.29.self_attn.v_proj:
2026
+ accum_format: SAME
2027
+ approximation_function: NONE
2028
+ bias_format: SAME
2029
+ input_format: BFP[8|8]{64,-1}(SN)
2030
+ instance: Linear
2031
+ output_format: BFP[8|8]{64,-1}(SN)
2032
+ weight_format: BFP[8|8]{64,-1}(SN)
2033
+ weight_sparseness: DENSE
2034
+ model.decoder.layers.29.self_attn_layer_norm:
2035
+ approximation_function: LAYERNORM(fallback,4,float16)
2036
+ bias_format: SAME
2037
+ input_format: SAME
2038
+ instance: LayerNorm
2039
+ output_format: SAME
2040
+ weight_format: SAME
2041
+ model.decoder.layers.3.activation_fn:
2042
+ approximation_function: NONE
2043
+ input_format: SAME
2044
+ instance: ReLU
2045
+ output_format: SAME
2046
+ model.decoder.layers.3.dropout:
2047
+ approximation_function: NONE
2048
+ input_format: SAME
2049
+ instance: Dropout
2050
+ output_format: SAME
2051
+ model.decoder.layers.3.fc1:
2052
+ accum_format: SAME
2053
+ approximation_function: NONE
2054
+ bias_format: SAME
2055
+ input_format: BFP[8|8]{64,-1}(SN)
2056
+ instance: Linear
2057
+ output_format: SAME
2058
+ weight_format: BFP[8|8]{64,-1}(SN)
2059
+ weight_sparseness: DENSE
2060
+ model.decoder.layers.3.fc2:
2061
+ accum_format: SAME
2062
+ approximation_function: NONE
2063
+ bias_format: SAME
2064
+ input_format: BFP[8|8]{64,-1}(SN)
2065
+ instance: Linear
2066
+ output_format: SAME
2067
+ weight_format: BFP[8|8]{64,-1}(SN)
2068
+ weight_sparseness: DENSE
2069
+ model.decoder.layers.3.final_layer_norm:
2070
+ approximation_function: LAYERNORM(fallback,4,float16)
2071
+ bias_format: SAME
2072
+ input_format: SAME
2073
+ instance: LayerNorm
2074
+ output_format: SAME
2075
+ weight_format: SAME
2076
+ model.decoder.layers.3.self_attn.dropout:
2077
+ approximation_function: NONE
2078
+ input_format: SAME
2079
+ instance: Dropout
2080
+ output_format: BFP[8|8]{64,-1}(SN)
2081
+ model.decoder.layers.3.self_attn.k_proj:
2082
+ accum_format: SAME
2083
+ approximation_function: NONE
2084
+ bias_format: SAME
2085
+ input_format: BFP[8|8]{64,-1}(SN)
2086
+ instance: Linear
2087
+ output_format: BFP[8|8]{64,-1}(SN)
2088
+ weight_format: BFP[8|8]{64,-1}(SN)
2089
+ weight_sparseness: DENSE
2090
+ model.decoder.layers.3.self_attn.out_proj:
2091
+ accum_format: SAME
2092
+ approximation_function: NONE
2093
+ bias_format: SAME
2094
+ input_format: BFP[8|8]{64,-1}(SN)
2095
+ instance: Linear
2096
+ output_format: SAME
2097
+ weight_format: BFP[8|8]{64,-1}(SN)
2098
+ weight_sparseness: DENSE
2099
+ model.decoder.layers.3.self_attn.q_proj:
2100
+ accum_format: SAME
2101
+ approximation_function: NONE
2102
+ bias_format: SAME
2103
+ input_format: BFP[8|8]{64,-1}(SN)
2104
+ instance: Linear
2105
+ output_format: BFP[8|8]{64,-1}(SN)
2106
+ weight_format: BFP[8|8]{64,-1}(SN)
2107
+ weight_sparseness: DENSE
2108
+ model.decoder.layers.3.self_attn.softmax:
2109
+ approximation_function: SOFTMAX(base2,float16)
2110
+ input_format: SAME
2111
+ instance: Softmax
2112
+ output_format: SAME
2113
+ model.decoder.layers.3.self_attn.v_proj:
2114
+ accum_format: SAME
2115
+ approximation_function: NONE
2116
+ bias_format: SAME
2117
+ input_format: BFP[8|8]{64,-1}(SN)
2118
+ instance: Linear
2119
+ output_format: BFP[8|8]{64,-1}(SN)
2120
+ weight_format: BFP[8|8]{64,-1}(SN)
2121
+ weight_sparseness: DENSE
2122
+ model.decoder.layers.3.self_attn_layer_norm:
2123
+ approximation_function: LAYERNORM(fallback,4,float16)
2124
+ bias_format: SAME
2125
+ input_format: SAME
2126
+ instance: LayerNorm
2127
+ output_format: SAME
2128
+ weight_format: SAME
2129
+ model.decoder.layers.30.activation_fn:
2130
+ approximation_function: NONE
2131
+ input_format: SAME
2132
+ instance: ReLU
2133
+ output_format: SAME
2134
+ model.decoder.layers.30.dropout:
2135
+ approximation_function: NONE
2136
+ input_format: SAME
2137
+ instance: Dropout
2138
+ output_format: SAME
2139
+ model.decoder.layers.30.fc1:
2140
+ accum_format: SAME
2141
+ approximation_function: NONE
2142
+ bias_format: SAME
2143
+ input_format: BFP[8|8]{64,-1}(SN)
2144
+ instance: Linear
2145
+ output_format: SAME
2146
+ weight_format: BFP[8|8]{64,-1}(SN)
2147
+ weight_sparseness: DENSE
2148
+ model.decoder.layers.30.fc2:
2149
+ accum_format: SAME
2150
+ approximation_function: NONE
2151
+ bias_format: SAME
2152
+ input_format: BFP[8|8]{64,-1}(SN)
2153
+ instance: Linear
2154
+ output_format: SAME
2155
+ weight_format: BFP[8|8]{64,-1}(SN)
2156
+ weight_sparseness: DENSE
2157
+ model.decoder.layers.30.final_layer_norm:
2158
+ approximation_function: LAYERNORM(fallback,4,float16)
2159
+ bias_format: SAME
2160
+ input_format: SAME
2161
+ instance: LayerNorm
2162
+ output_format: SAME
2163
+ weight_format: SAME
2164
+ model.decoder.layers.30.self_attn.dropout:
2165
+ approximation_function: NONE
2166
+ input_format: SAME
2167
+ instance: Dropout
2168
+ output_format: BFP[8|8]{64,-1}(SN)
2169
+ model.decoder.layers.30.self_attn.k_proj:
2170
+ accum_format: SAME
2171
+ approximation_function: NONE
2172
+ bias_format: SAME
2173
+ input_format: BFP[8|8]{64,-1}(SN)
2174
+ instance: Linear
2175
+ output_format: BFP[8|8]{64,-1}(SN)
2176
+ weight_format: BFP[8|8]{64,-1}(SN)
2177
+ weight_sparseness: DENSE
2178
+ model.decoder.layers.30.self_attn.out_proj:
2179
+ accum_format: SAME
2180
+ approximation_function: NONE
2181
+ bias_format: SAME
2182
+ input_format: BFP[8|8]{64,-1}(SN)
2183
+ instance: Linear
2184
+ output_format: SAME
2185
+ weight_format: BFP[8|8]{64,-1}(SN)
2186
+ weight_sparseness: DENSE
2187
+ model.decoder.layers.30.self_attn.q_proj:
2188
+ accum_format: SAME
2189
+ approximation_function: NONE
2190
+ bias_format: SAME
2191
+ input_format: BFP[8|8]{64,-1}(SN)
2192
+ instance: Linear
2193
+ output_format: BFP[8|8]{64,-1}(SN)
2194
+ weight_format: BFP[8|8]{64,-1}(SN)
2195
+ weight_sparseness: DENSE
2196
+ model.decoder.layers.30.self_attn.softmax:
2197
+ approximation_function: SOFTMAX(base2,float16)
2198
+ input_format: SAME
2199
+ instance: Softmax
2200
+ output_format: SAME
2201
+ model.decoder.layers.30.self_attn.v_proj:
2202
+ accum_format: SAME
2203
+ approximation_function: NONE
2204
+ bias_format: SAME
2205
+ input_format: BFP[8|8]{64,-1}(SN)
2206
+ instance: Linear
2207
+ output_format: BFP[8|8]{64,-1}(SN)
2208
+ weight_format: BFP[8|8]{64,-1}(SN)
2209
+ weight_sparseness: DENSE
2210
+ model.decoder.layers.30.self_attn_layer_norm:
2211
+ approximation_function: LAYERNORM(fallback,4,float16)
2212
+ bias_format: SAME
2213
+ input_format: SAME
2214
+ instance: LayerNorm
2215
+ output_format: SAME
2216
+ weight_format: SAME
2217
+ model.decoder.layers.31.activation_fn:
2218
+ approximation_function: NONE
2219
+ input_format: SAME
2220
+ instance: ReLU
2221
+ output_format: SAME
2222
+ model.decoder.layers.31.dropout:
2223
+ approximation_function: NONE
2224
+ input_format: SAME
2225
+ instance: Dropout
2226
+ output_format: SAME
2227
+ model.decoder.layers.31.fc1:
2228
+ accum_format: SAME
2229
+ approximation_function: NONE
2230
+ bias_format: SAME
2231
+ input_format: BFP[8|8]{64,-1}(SN)
2232
+ instance: Linear
2233
+ output_format: SAME
2234
+ weight_format: BFP[8|8]{64,-1}(SN)
2235
+ weight_sparseness: DENSE
2236
+ model.decoder.layers.31.fc2:
2237
+ accum_format: SAME
2238
+ approximation_function: NONE
2239
+ bias_format: SAME
2240
+ input_format: BFP[8|8]{64,-1}(SN)
2241
+ instance: Linear
2242
+ output_format: SAME
2243
+ weight_format: BFP[8|8]{64,-1}(SN)
2244
+ weight_sparseness: DENSE
2245
+ model.decoder.layers.31.final_layer_norm:
2246
+ approximation_function: LAYERNORM(fallback,4,float16)
2247
+ bias_format: SAME
2248
+ input_format: SAME
2249
+ instance: LayerNorm
2250
+ output_format: SAME
2251
+ weight_format: SAME
2252
+ model.decoder.layers.31.self_attn.dropout:
2253
+ approximation_function: NONE
2254
+ input_format: SAME
2255
+ instance: Dropout
2256
+ output_format: BFP[8|8]{64,-1}(SN)
2257
+ model.decoder.layers.31.self_attn.k_proj:
2258
+ accum_format: SAME
2259
+ approximation_function: NONE
2260
+ bias_format: SAME
2261
+ input_format: BFP[8|8]{64,-1}(SN)
2262
+ instance: Linear
2263
+ output_format: BFP[8|8]{64,-1}(SN)
2264
+ weight_format: BFP[8|8]{64,-1}(SN)
2265
+ weight_sparseness: DENSE
2266
+ model.decoder.layers.31.self_attn.out_proj:
2267
+ accum_format: SAME
2268
+ approximation_function: NONE
2269
+ bias_format: SAME
2270
+ input_format: BFP[8|8]{64,-1}(SN)
2271
+ instance: Linear
2272
+ output_format: SAME
2273
+ weight_format: BFP[8|8]{64,-1}(SN)
2274
+ weight_sparseness: DENSE
2275
+ model.decoder.layers.31.self_attn.q_proj:
2276
+ accum_format: SAME
2277
+ approximation_function: NONE
2278
+ bias_format: SAME
2279
+ input_format: BFP[8|8]{64,-1}(SN)
2280
+ instance: Linear
2281
+ output_format: BFP[8|8]{64,-1}(SN)
2282
+ weight_format: BFP[8|8]{64,-1}(SN)
2283
+ weight_sparseness: DENSE
2284
+ model.decoder.layers.31.self_attn.softmax:
2285
+ approximation_function: SOFTMAX(base2,float16)
2286
+ input_format: SAME
2287
+ instance: Softmax
2288
+ output_format: SAME
2289
+ model.decoder.layers.31.self_attn.v_proj:
2290
+ accum_format: SAME
2291
+ approximation_function: NONE
2292
+ bias_format: SAME
2293
+ input_format: BFP[8|8]{64,-1}(SN)
2294
+ instance: Linear
2295
+ output_format: BFP[8|8]{64,-1}(SN)
2296
+ weight_format: BFP[8|8]{64,-1}(SN)
2297
+ weight_sparseness: DENSE
2298
+ model.decoder.layers.31.self_attn_layer_norm:
2299
+ approximation_function: LAYERNORM(fallback,4,float16)
2300
+ bias_format: SAME
2301
+ input_format: SAME
2302
+ instance: LayerNorm
2303
+ output_format: SAME
2304
+ weight_format: SAME
2305
+ model.decoder.layers.4.activation_fn:
2306
+ approximation_function: NONE
2307
+ input_format: SAME
2308
+ instance: ReLU
2309
+ output_format: SAME
2310
+ model.decoder.layers.4.dropout:
2311
+ approximation_function: NONE
2312
+ input_format: SAME
2313
+ instance: Dropout
2314
+ output_format: SAME
2315
+ model.decoder.layers.4.fc1:
2316
+ accum_format: SAME
2317
+ approximation_function: NONE
2318
+ bias_format: SAME
2319
+ input_format: BFP[8|8]{64,-1}(SN)
2320
+ instance: Linear
2321
+ output_format: SAME
2322
+ weight_format: BFP[8|8]{64,-1}(SN)
2323
+ weight_sparseness: DENSE
2324
+ model.decoder.layers.4.fc2:
2325
+ accum_format: SAME
2326
+ approximation_function: NONE
2327
+ bias_format: SAME
2328
+ input_format: BFP[8|8]{64,-1}(SN)
2329
+ instance: Linear
2330
+ output_format: SAME
2331
+ weight_format: BFP[8|8]{64,-1}(SN)
2332
+ weight_sparseness: DENSE
2333
+ model.decoder.layers.4.final_layer_norm:
2334
+ approximation_function: LAYERNORM(fallback,4,float16)
2335
+ bias_format: SAME
2336
+ input_format: SAME
2337
+ instance: LayerNorm
2338
+ output_format: SAME
2339
+ weight_format: SAME
2340
+ model.decoder.layers.4.self_attn.dropout:
2341
+ approximation_function: NONE
2342
+ input_format: SAME
2343
+ instance: Dropout
2344
+ output_format: BFP[8|8]{64,-1}(SN)
2345
+ model.decoder.layers.4.self_attn.k_proj:
2346
+ accum_format: SAME
2347
+ approximation_function: NONE
2348
+ bias_format: SAME
2349
+ input_format: BFP[8|8]{64,-1}(SN)
2350
+ instance: Linear
2351
+ output_format: BFP[8|8]{64,-1}(SN)
2352
+ weight_format: BFP[8|8]{64,-1}(SN)
2353
+ weight_sparseness: DENSE
2354
+ model.decoder.layers.4.self_attn.out_proj:
2355
+ accum_format: SAME
2356
+ approximation_function: NONE
2357
+ bias_format: SAME
2358
+ input_format: BFP[8|8]{64,-1}(SN)
2359
+ instance: Linear
2360
+ output_format: SAME
2361
+ weight_format: BFP[8|8]{64,-1}(SN)
2362
+ weight_sparseness: DENSE
2363
+ model.decoder.layers.4.self_attn.q_proj:
2364
+ accum_format: SAME
2365
+ approximation_function: NONE
2366
+ bias_format: SAME
2367
+ input_format: BFP[8|8]{64,-1}(SN)
2368
+ instance: Linear
2369
+ output_format: BFP[8|8]{64,-1}(SN)
2370
+ weight_format: BFP[8|8]{64,-1}(SN)
2371
+ weight_sparseness: DENSE
2372
+ model.decoder.layers.4.self_attn.softmax:
2373
+ approximation_function: SOFTMAX(base2,float16)
2374
+ input_format: SAME
2375
+ instance: Softmax
2376
+ output_format: SAME
2377
+ model.decoder.layers.4.self_attn.v_proj:
2378
+ accum_format: SAME
2379
+ approximation_function: NONE
2380
+ bias_format: SAME
2381
+ input_format: BFP[8|8]{64,-1}(SN)
2382
+ instance: Linear
2383
+ output_format: BFP[8|8]{64,-1}(SN)
2384
+ weight_format: BFP[8|8]{64,-1}(SN)
2385
+ weight_sparseness: DENSE
2386
+ model.decoder.layers.4.self_attn_layer_norm:
2387
+ approximation_function: LAYERNORM(fallback,4,float16)
2388
+ bias_format: SAME
2389
+ input_format: SAME
2390
+ instance: LayerNorm
2391
+ output_format: SAME
2392
+ weight_format: SAME
2393
+ model.decoder.layers.5.activation_fn:
2394
+ approximation_function: NONE
2395
+ input_format: SAME
2396
+ instance: ReLU
2397
+ output_format: SAME
2398
+ model.decoder.layers.5.dropout:
2399
+ approximation_function: NONE
2400
+ input_format: SAME
2401
+ instance: Dropout
2402
+ output_format: SAME
2403
+ model.decoder.layers.5.fc1:
2404
+ accum_format: SAME
2405
+ approximation_function: NONE
2406
+ bias_format: SAME
2407
+ input_format: BFP[8|8]{64,-1}(SN)
2408
+ instance: Linear
2409
+ output_format: SAME
2410
+ weight_format: BFP[8|8]{64,-1}(SN)
2411
+ weight_sparseness: DENSE
2412
+ model.decoder.layers.5.fc2:
2413
+ accum_format: SAME
2414
+ approximation_function: NONE
2415
+ bias_format: SAME
2416
+ input_format: BFP[8|8]{64,-1}(SN)
2417
+ instance: Linear
2418
+ output_format: SAME
2419
+ weight_format: BFP[8|8]{64,-1}(SN)
2420
+ weight_sparseness: DENSE
2421
+ model.decoder.layers.5.final_layer_norm:
2422
+ approximation_function: LAYERNORM(fallback,4,float16)
2423
+ bias_format: SAME
2424
+ input_format: SAME
2425
+ instance: LayerNorm
2426
+ output_format: SAME
2427
+ weight_format: SAME
2428
+ model.decoder.layers.5.self_attn.dropout:
2429
+ approximation_function: NONE
2430
+ input_format: SAME
2431
+ instance: Dropout
2432
+ output_format: BFP[8|8]{64,-1}(SN)
2433
+ model.decoder.layers.5.self_attn.k_proj:
2434
+ accum_format: SAME
2435
+ approximation_function: NONE
2436
+ bias_format: SAME
2437
+ input_format: BFP[8|8]{64,-1}(SN)
2438
+ instance: Linear
2439
+ output_format: BFP[8|8]{64,-1}(SN)
2440
+ weight_format: BFP[8|8]{64,-1}(SN)
2441
+ weight_sparseness: DENSE
2442
+ model.decoder.layers.5.self_attn.out_proj:
2443
+ accum_format: SAME
2444
+ approximation_function: NONE
2445
+ bias_format: SAME
2446
+ input_format: BFP[8|8]{64,-1}(SN)
2447
+ instance: Linear
2448
+ output_format: SAME
2449
+ weight_format: BFP[8|8]{64,-1}(SN)
2450
+ weight_sparseness: DENSE
2451
+ model.decoder.layers.5.self_attn.q_proj:
2452
+ accum_format: SAME
2453
+ approximation_function: NONE
2454
+ bias_format: SAME
2455
+ input_format: BFP[8|8]{64,-1}(SN)
2456
+ instance: Linear
2457
+ output_format: BFP[8|8]{64,-1}(SN)
2458
+ weight_format: BFP[8|8]{64,-1}(SN)
2459
+ weight_sparseness: DENSE
2460
+ model.decoder.layers.5.self_attn.softmax:
2461
+ approximation_function: SOFTMAX(base2,float16)
2462
+ input_format: SAME
2463
+ instance: Softmax
2464
+ output_format: SAME
2465
+ model.decoder.layers.5.self_attn.v_proj:
2466
+ accum_format: SAME
2467
+ approximation_function: NONE
2468
+ bias_format: SAME
2469
+ input_format: BFP[8|8]{64,-1}(SN)
2470
+ instance: Linear
2471
+ output_format: BFP[8|8]{64,-1}(SN)
2472
+ weight_format: BFP[8|8]{64,-1}(SN)
2473
+ weight_sparseness: DENSE
2474
+ model.decoder.layers.5.self_attn_layer_norm:
2475
+ approximation_function: LAYERNORM(fallback,4,float16)
2476
+ bias_format: SAME
2477
+ input_format: SAME
2478
+ instance: LayerNorm
2479
+ output_format: SAME
2480
+ weight_format: SAME
2481
+ model.decoder.layers.6.activation_fn:
2482
+ approximation_function: NONE
2483
+ input_format: SAME
2484
+ instance: ReLU
2485
+ output_format: SAME
2486
+ model.decoder.layers.6.dropout:
2487
+ approximation_function: NONE
2488
+ input_format: SAME
2489
+ instance: Dropout
2490
+ output_format: SAME
2491
+ model.decoder.layers.6.fc1:
2492
+ accum_format: SAME
2493
+ approximation_function: NONE
2494
+ bias_format: SAME
2495
+ input_format: BFP[8|8]{64,-1}(SN)
2496
+ instance: Linear
2497
+ output_format: SAME
2498
+ weight_format: BFP[8|8]{64,-1}(SN)
2499
+ weight_sparseness: DENSE
2500
+ model.decoder.layers.6.fc2:
2501
+ accum_format: SAME
2502
+ approximation_function: NONE
2503
+ bias_format: SAME
2504
+ input_format: BFP[8|8]{64,-1}(SN)
2505
+ instance: Linear
2506
+ output_format: SAME
2507
+ weight_format: BFP[8|8]{64,-1}(SN)
2508
+ weight_sparseness: DENSE
2509
+ model.decoder.layers.6.final_layer_norm:
2510
+ approximation_function: LAYERNORM(fallback,4,float16)
2511
+ bias_format: SAME
2512
+ input_format: SAME
2513
+ instance: LayerNorm
2514
+ output_format: SAME
2515
+ weight_format: SAME
2516
+ model.decoder.layers.6.self_attn.dropout:
2517
+ approximation_function: NONE
2518
+ input_format: SAME
2519
+ instance: Dropout
2520
+ output_format: BFP[8|8]{64,-1}(SN)
2521
+ model.decoder.layers.6.self_attn.k_proj:
2522
+ accum_format: SAME
2523
+ approximation_function: NONE
2524
+ bias_format: SAME
2525
+ input_format: BFP[8|8]{64,-1}(SN)
2526
+ instance: Linear
2527
+ output_format: BFP[8|8]{64,-1}(SN)
2528
+ weight_format: BFP[8|8]{64,-1}(SN)
2529
+ weight_sparseness: DENSE
2530
+ model.decoder.layers.6.self_attn.out_proj:
2531
+ accum_format: SAME
2532
+ approximation_function: NONE
2533
+ bias_format: SAME
2534
+ input_format: BFP[8|8]{64,-1}(SN)
2535
+ instance: Linear
2536
+ output_format: SAME
2537
+ weight_format: BFP[8|8]{64,-1}(SN)
2538
+ weight_sparseness: DENSE
2539
+ model.decoder.layers.6.self_attn.q_proj:
2540
+ accum_format: SAME
2541
+ approximation_function: NONE
2542
+ bias_format: SAME
2543
+ input_format: BFP[8|8]{64,-1}(SN)
2544
+ instance: Linear
2545
+ output_format: BFP[8|8]{64,-1}(SN)
2546
+ weight_format: BFP[8|8]{64,-1}(SN)
2547
+ weight_sparseness: DENSE
2548
+ model.decoder.layers.6.self_attn.softmax:
2549
+ approximation_function: SOFTMAX(base2,float16)
2550
+ input_format: SAME
2551
+ instance: Softmax
2552
+ output_format: SAME
2553
+ model.decoder.layers.6.self_attn.v_proj:
2554
+ accum_format: SAME
2555
+ approximation_function: NONE
2556
+ bias_format: SAME
2557
+ input_format: BFP[8|8]{64,-1}(SN)
2558
+ instance: Linear
2559
+ output_format: BFP[8|8]{64,-1}(SN)
2560
+ weight_format: BFP[8|8]{64,-1}(SN)
2561
+ weight_sparseness: DENSE
2562
+ model.decoder.layers.6.self_attn_layer_norm:
2563
+ approximation_function: LAYERNORM(fallback,4,float16)
2564
+ bias_format: SAME
2565
+ input_format: SAME
2566
+ instance: LayerNorm
2567
+ output_format: SAME
2568
+ weight_format: SAME
2569
+ model.decoder.layers.7.activation_fn:
2570
+ approximation_function: NONE
2571
+ input_format: SAME
2572
+ instance: ReLU
2573
+ output_format: SAME
2574
+ model.decoder.layers.7.dropout:
2575
+ approximation_function: NONE
2576
+ input_format: SAME
2577
+ instance: Dropout
2578
+ output_format: SAME
2579
+ model.decoder.layers.7.fc1:
2580
+ accum_format: SAME
2581
+ approximation_function: NONE
2582
+ bias_format: SAME
2583
+ input_format: BFP[8|8]{64,-1}(SN)
2584
+ instance: Linear
2585
+ output_format: SAME
2586
+ weight_format: BFP[8|8]{64,-1}(SN)
2587
+ weight_sparseness: DENSE
2588
+ model.decoder.layers.7.fc2:
2589
+ accum_format: SAME
2590
+ approximation_function: NONE
2591
+ bias_format: SAME
2592
+ input_format: BFP[8|8]{64,-1}(SN)
2593
+ instance: Linear
2594
+ output_format: SAME
2595
+ weight_format: BFP[8|8]{64,-1}(SN)
2596
+ weight_sparseness: DENSE
2597
+ model.decoder.layers.7.final_layer_norm:
2598
+ approximation_function: LAYERNORM(fallback,4,float16)
2599
+ bias_format: SAME
2600
+ input_format: SAME
2601
+ instance: LayerNorm
2602
+ output_format: SAME
2603
+ weight_format: SAME
2604
+ model.decoder.layers.7.self_attn.dropout:
2605
+ approximation_function: NONE
2606
+ input_format: SAME
2607
+ instance: Dropout
2608
+ output_format: BFP[8|8]{64,-1}(SN)
2609
+ model.decoder.layers.7.self_attn.k_proj:
2610
+ accum_format: SAME
2611
+ approximation_function: NONE
2612
+ bias_format: SAME
2613
+ input_format: BFP[8|8]{64,-1}(SN)
2614
+ instance: Linear
2615
+ output_format: BFP[8|8]{64,-1}(SN)
2616
+ weight_format: BFP[8|8]{64,-1}(SN)
2617
+ weight_sparseness: DENSE
2618
+ model.decoder.layers.7.self_attn.out_proj:
2619
+ accum_format: SAME
2620
+ approximation_function: NONE
2621
+ bias_format: SAME
2622
+ input_format: BFP[8|8]{64,-1}(SN)
2623
+ instance: Linear
2624
+ output_format: SAME
2625
+ weight_format: BFP[8|8]{64,-1}(SN)
2626
+ weight_sparseness: DENSE
2627
+ model.decoder.layers.7.self_attn.q_proj:
2628
+ accum_format: SAME
2629
+ approximation_function: NONE
2630
+ bias_format: SAME
2631
+ input_format: BFP[8|8]{64,-1}(SN)
2632
+ instance: Linear
2633
+ output_format: BFP[8|8]{64,-1}(SN)
2634
+ weight_format: BFP[8|8]{64,-1}(SN)
2635
+ weight_sparseness: DENSE
2636
+ model.decoder.layers.7.self_attn.softmax:
2637
+ approximation_function: SOFTMAX(base2,float16)
2638
+ input_format: SAME
2639
+ instance: Softmax
2640
+ output_format: SAME
2641
+ model.decoder.layers.7.self_attn.v_proj:
2642
+ accum_format: SAME
2643
+ approximation_function: NONE
2644
+ bias_format: SAME
2645
+ input_format: BFP[8|8]{64,-1}(SN)
2646
+ instance: Linear
2647
+ output_format: BFP[8|8]{64,-1}(SN)
2648
+ weight_format: BFP[8|8]{64,-1}(SN)
2649
+ weight_sparseness: DENSE
2650
+ model.decoder.layers.7.self_attn_layer_norm:
2651
+ approximation_function: LAYERNORM(fallback,4,float16)
2652
+ bias_format: SAME
2653
+ input_format: SAME
2654
+ instance: LayerNorm
2655
+ output_format: SAME
2656
+ weight_format: SAME
2657
+ model.decoder.layers.8.activation_fn:
2658
+ approximation_function: NONE
2659
+ input_format: SAME
2660
+ instance: ReLU
2661
+ output_format: SAME
2662
+ model.decoder.layers.8.dropout:
2663
+ approximation_function: NONE
2664
+ input_format: SAME
2665
+ instance: Dropout
2666
+ output_format: SAME
2667
+ model.decoder.layers.8.fc1:
2668
+ accum_format: SAME
2669
+ approximation_function: NONE
2670
+ bias_format: SAME
2671
+ input_format: BFP[8|8]{64,-1}(SN)
2672
+ instance: Linear
2673
+ output_format: SAME
2674
+ weight_format: BFP[8|8]{64,-1}(SN)
2675
+ weight_sparseness: DENSE
2676
+ model.decoder.layers.8.fc2:
2677
+ accum_format: SAME
2678
+ approximation_function: NONE
2679
+ bias_format: SAME
2680
+ input_format: BFP[8|8]{64,-1}(SN)
2681
+ instance: Linear
2682
+ output_format: SAME
2683
+ weight_format: BFP[8|8]{64,-1}(SN)
2684
+ weight_sparseness: DENSE
2685
+ model.decoder.layers.8.final_layer_norm:
2686
+ approximation_function: LAYERNORM(fallback,4,float16)
2687
+ bias_format: SAME
2688
+ input_format: SAME
2689
+ instance: LayerNorm
2690
+ output_format: SAME
2691
+ weight_format: SAME
2692
+ model.decoder.layers.8.self_attn.dropout:
2693
+ approximation_function: NONE
2694
+ input_format: SAME
2695
+ instance: Dropout
2696
+ output_format: BFP[8|8]{64,-1}(SN)
2697
+ model.decoder.layers.8.self_attn.k_proj:
2698
+ accum_format: SAME
2699
+ approximation_function: NONE
2700
+ bias_format: SAME
2701
+ input_format: BFP[8|8]{64,-1}(SN)
2702
+ instance: Linear
2703
+ output_format: BFP[8|8]{64,-1}(SN)
2704
+ weight_format: BFP[8|8]{64,-1}(SN)
2705
+ weight_sparseness: DENSE
2706
+ model.decoder.layers.8.self_attn.out_proj:
2707
+ accum_format: SAME
2708
+ approximation_function: NONE
2709
+ bias_format: SAME
2710
+ input_format: BFP[8|8]{64,-1}(SN)
2711
+ instance: Linear
2712
+ output_format: SAME
2713
+ weight_format: BFP[8|8]{64,-1}(SN)
2714
+ weight_sparseness: DENSE
2715
+ model.decoder.layers.8.self_attn.q_proj:
2716
+ accum_format: SAME
2717
+ approximation_function: NONE
2718
+ bias_format: SAME
2719
+ input_format: BFP[8|8]{64,-1}(SN)
2720
+ instance: Linear
2721
+ output_format: BFP[8|8]{64,-1}(SN)
2722
+ weight_format: BFP[8|8]{64,-1}(SN)
2723
+ weight_sparseness: DENSE
2724
+ model.decoder.layers.8.self_attn.softmax:
2725
+ approximation_function: SOFTMAX(base2,float16)
2726
+ input_format: SAME
2727
+ instance: Softmax
2728
+ output_format: SAME
2729
+ model.decoder.layers.8.self_attn.v_proj:
2730
+ accum_format: SAME
2731
+ approximation_function: NONE
2732
+ bias_format: SAME
2733
+ input_format: BFP[8|8]{64,-1}(SN)
2734
+ instance: Linear
2735
+ output_format: BFP[8|8]{64,-1}(SN)
2736
+ weight_format: BFP[8|8]{64,-1}(SN)
2737
+ weight_sparseness: DENSE
2738
+ model.decoder.layers.8.self_attn_layer_norm:
2739
+ approximation_function: LAYERNORM(fallback,4,float16)
2740
+ bias_format: SAME
2741
+ input_format: SAME
2742
+ instance: LayerNorm
2743
+ output_format: SAME
2744
+ weight_format: SAME
2745
+ model.decoder.layers.9.activation_fn:
2746
+ approximation_function: NONE
2747
+ input_format: SAME
2748
+ instance: ReLU
2749
+ output_format: SAME
2750
+ model.decoder.layers.9.dropout:
2751
+ approximation_function: NONE
2752
+ input_format: SAME
2753
+ instance: Dropout
2754
+ output_format: SAME
2755
+ model.decoder.layers.9.fc1:
2756
+ accum_format: SAME
2757
+ approximation_function: NONE
2758
+ bias_format: SAME
2759
+ input_format: BFP[8|8]{64,-1}(SN)
2760
+ instance: Linear
2761
+ output_format: SAME
2762
+ weight_format: BFP[8|8]{64,-1}(SN)
2763
+ weight_sparseness: DENSE
2764
+ model.decoder.layers.9.fc2:
2765
+ accum_format: SAME
2766
+ approximation_function: NONE
2767
+ bias_format: SAME
2768
+ input_format: BFP[8|8]{64,-1}(SN)
2769
+ instance: Linear
2770
+ output_format: SAME
2771
+ weight_format: BFP[8|8]{64,-1}(SN)
2772
+ weight_sparseness: DENSE
2773
+ model.decoder.layers.9.final_layer_norm:
2774
+ approximation_function: LAYERNORM(fallback,4,float16)
2775
+ bias_format: SAME
2776
+ input_format: SAME
2777
+ instance: LayerNorm
2778
+ output_format: SAME
2779
+ weight_format: SAME
2780
+ model.decoder.layers.9.self_attn.dropout:
2781
+ approximation_function: NONE
2782
+ input_format: SAME
2783
+ instance: Dropout
2784
+ output_format: BFP[8|8]{64,-1}(SN)
2785
+ model.decoder.layers.9.self_attn.k_proj:
2786
+ accum_format: SAME
2787
+ approximation_function: NONE
2788
+ bias_format: SAME
2789
+ input_format: BFP[8|8]{64,-1}(SN)
2790
+ instance: Linear
2791
+ output_format: BFP[8|8]{64,-1}(SN)
2792
+ weight_format: BFP[8|8]{64,-1}(SN)
2793
+ weight_sparseness: DENSE
2794
+ model.decoder.layers.9.self_attn.out_proj:
2795
+ accum_format: SAME
2796
+ approximation_function: NONE
2797
+ bias_format: SAME
2798
+ input_format: BFP[8|8]{64,-1}(SN)
2799
+ instance: Linear
2800
+ output_format: SAME
2801
+ weight_format: BFP[8|8]{64,-1}(SN)
2802
+ weight_sparseness: DENSE
2803
+ model.decoder.layers.9.self_attn.q_proj:
2804
+ accum_format: SAME
2805
+ approximation_function: NONE
2806
+ bias_format: SAME
2807
+ input_format: BFP[8|8]{64,-1}(SN)
2808
+ instance: Linear
2809
+ output_format: BFP[8|8]{64,-1}(SN)
2810
+ weight_format: BFP[8|8]{64,-1}(SN)
2811
+ weight_sparseness: DENSE
2812
+ model.decoder.layers.9.self_attn.softmax:
2813
+ approximation_function: SOFTMAX(base2,float16)
2814
+ input_format: SAME
2815
+ instance: Softmax
2816
+ output_format: SAME
2817
+ model.decoder.layers.9.self_attn.v_proj:
2818
+ accum_format: SAME
2819
+ approximation_function: NONE
2820
+ bias_format: SAME
2821
+ input_format: BFP[8|8]{64,-1}(SN)
2822
+ instance: Linear
2823
+ output_format: BFP[8|8]{64,-1}(SN)
2824
+ weight_format: BFP[8|8]{64,-1}(SN)
2825
+ weight_sparseness: DENSE
2826
+ model.decoder.layers.9.self_attn_layer_norm:
2827
+ approximation_function: LAYERNORM(fallback,4,float16)
2828
+ bias_format: SAME
2829
+ input_format: SAME
2830
+ instance: LayerNorm
2831
+ output_format: SAME
2832
+ weight_format: SAME
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 1,
6
+ "transformers_version": "4.27.0.dev0"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model-00001-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fe9acfd39b64d2f51371afac60af02289eea8451c5e28ede2f67fec5505f50c
3
+ size 9960750957
pytorch_model-00002-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3081528684cb8aa0f3d5b7d2fac2c93c9742a1daa7ec8b8fbcd44459370b2690
3
+ size 3356360185
pytorch_model.bin.index.json ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 13316947968
4
+ },
5
+ "weight_map": {
6
+ "decoder.embed_positions.weight": "pytorch_model-00001-of-00002.bin",
7
+ "decoder.embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
8
+ "decoder.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
9
+ "decoder.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
10
+ "decoder.layers.0.fc1.bias": "pytorch_model-00001-of-00002.bin",
11
+ "decoder.layers.0.fc1.weight": "pytorch_model-00001-of-00002.bin",
12
+ "decoder.layers.0.fc2.bias": "pytorch_model-00001-of-00002.bin",
13
+ "decoder.layers.0.fc2.weight": "pytorch_model-00001-of-00002.bin",
14
+ "decoder.layers.0.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
15
+ "decoder.layers.0.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
16
+ "decoder.layers.0.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
17
+ "decoder.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
18
+ "decoder.layers.0.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
19
+ "decoder.layers.0.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
20
+ "decoder.layers.0.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
21
+ "decoder.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
22
+ "decoder.layers.0.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
23
+ "decoder.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
24
+ "decoder.layers.0.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
25
+ "decoder.layers.0.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
26
+ "decoder.layers.1.fc1.bias": "pytorch_model-00001-of-00002.bin",
27
+ "decoder.layers.1.fc1.weight": "pytorch_model-00001-of-00002.bin",
28
+ "decoder.layers.1.fc2.bias": "pytorch_model-00001-of-00002.bin",
29
+ "decoder.layers.1.fc2.weight": "pytorch_model-00001-of-00002.bin",
30
+ "decoder.layers.1.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
31
+ "decoder.layers.1.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
32
+ "decoder.layers.1.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
33
+ "decoder.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
34
+ "decoder.layers.1.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
35
+ "decoder.layers.1.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
36
+ "decoder.layers.1.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
37
+ "decoder.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
38
+ "decoder.layers.1.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
39
+ "decoder.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
40
+ "decoder.layers.1.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
41
+ "decoder.layers.1.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
42
+ "decoder.layers.10.fc1.bias": "pytorch_model-00001-of-00002.bin",
43
+ "decoder.layers.10.fc1.weight": "pytorch_model-00001-of-00002.bin",
44
+ "decoder.layers.10.fc2.bias": "pytorch_model-00001-of-00002.bin",
45
+ "decoder.layers.10.fc2.weight": "pytorch_model-00001-of-00002.bin",
46
+ "decoder.layers.10.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
47
+ "decoder.layers.10.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
48
+ "decoder.layers.10.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
49
+ "decoder.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
50
+ "decoder.layers.10.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
51
+ "decoder.layers.10.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
52
+ "decoder.layers.10.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
53
+ "decoder.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
54
+ "decoder.layers.10.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
55
+ "decoder.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
56
+ "decoder.layers.10.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
57
+ "decoder.layers.10.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
58
+ "decoder.layers.11.fc1.bias": "pytorch_model-00001-of-00002.bin",
59
+ "decoder.layers.11.fc1.weight": "pytorch_model-00001-of-00002.bin",
60
+ "decoder.layers.11.fc2.bias": "pytorch_model-00001-of-00002.bin",
61
+ "decoder.layers.11.fc2.weight": "pytorch_model-00001-of-00002.bin",
62
+ "decoder.layers.11.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
63
+ "decoder.layers.11.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
64
+ "decoder.layers.11.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
65
+ "decoder.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
66
+ "decoder.layers.11.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
67
+ "decoder.layers.11.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
68
+ "decoder.layers.11.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
69
+ "decoder.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
70
+ "decoder.layers.11.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
71
+ "decoder.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
72
+ "decoder.layers.11.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
73
+ "decoder.layers.11.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
74
+ "decoder.layers.12.fc1.bias": "pytorch_model-00001-of-00002.bin",
75
+ "decoder.layers.12.fc1.weight": "pytorch_model-00001-of-00002.bin",
76
+ "decoder.layers.12.fc2.bias": "pytorch_model-00001-of-00002.bin",
77
+ "decoder.layers.12.fc2.weight": "pytorch_model-00001-of-00002.bin",
78
+ "decoder.layers.12.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
79
+ "decoder.layers.12.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
80
+ "decoder.layers.12.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
81
+ "decoder.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
82
+ "decoder.layers.12.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
83
+ "decoder.layers.12.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
84
+ "decoder.layers.12.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
85
+ "decoder.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
86
+ "decoder.layers.12.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
87
+ "decoder.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
88
+ "decoder.layers.12.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
89
+ "decoder.layers.12.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
90
+ "decoder.layers.13.fc1.bias": "pytorch_model-00001-of-00002.bin",
91
+ "decoder.layers.13.fc1.weight": "pytorch_model-00001-of-00002.bin",
92
+ "decoder.layers.13.fc2.bias": "pytorch_model-00001-of-00002.bin",
93
+ "decoder.layers.13.fc2.weight": "pytorch_model-00001-of-00002.bin",
94
+ "decoder.layers.13.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
95
+ "decoder.layers.13.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
96
+ "decoder.layers.13.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
97
+ "decoder.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
98
+ "decoder.layers.13.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
99
+ "decoder.layers.13.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
100
+ "decoder.layers.13.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
101
+ "decoder.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
102
+ "decoder.layers.13.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
103
+ "decoder.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
104
+ "decoder.layers.13.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
105
+ "decoder.layers.13.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
106
+ "decoder.layers.14.fc1.bias": "pytorch_model-00001-of-00002.bin",
107
+ "decoder.layers.14.fc1.weight": "pytorch_model-00001-of-00002.bin",
108
+ "decoder.layers.14.fc2.bias": "pytorch_model-00001-of-00002.bin",
109
+ "decoder.layers.14.fc2.weight": "pytorch_model-00001-of-00002.bin",
110
+ "decoder.layers.14.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
111
+ "decoder.layers.14.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
112
+ "decoder.layers.14.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
113
+ "decoder.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
114
+ "decoder.layers.14.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
115
+ "decoder.layers.14.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
116
+ "decoder.layers.14.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
117
+ "decoder.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
118
+ "decoder.layers.14.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
119
+ "decoder.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
120
+ "decoder.layers.14.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
121
+ "decoder.layers.14.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
122
+ "decoder.layers.15.fc1.bias": "pytorch_model-00001-of-00002.bin",
123
+ "decoder.layers.15.fc1.weight": "pytorch_model-00001-of-00002.bin",
124
+ "decoder.layers.15.fc2.bias": "pytorch_model-00001-of-00002.bin",
125
+ "decoder.layers.15.fc2.weight": "pytorch_model-00001-of-00002.bin",
126
+ "decoder.layers.15.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
127
+ "decoder.layers.15.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
128
+ "decoder.layers.15.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
129
+ "decoder.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
130
+ "decoder.layers.15.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
131
+ "decoder.layers.15.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
132
+ "decoder.layers.15.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
133
+ "decoder.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
134
+ "decoder.layers.15.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
135
+ "decoder.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
136
+ "decoder.layers.15.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
137
+ "decoder.layers.15.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
138
+ "decoder.layers.16.fc1.bias": "pytorch_model-00001-of-00002.bin",
139
+ "decoder.layers.16.fc1.weight": "pytorch_model-00001-of-00002.bin",
140
+ "decoder.layers.16.fc2.bias": "pytorch_model-00001-of-00002.bin",
141
+ "decoder.layers.16.fc2.weight": "pytorch_model-00001-of-00002.bin",
142
+ "decoder.layers.16.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
143
+ "decoder.layers.16.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
144
+ "decoder.layers.16.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
145
+ "decoder.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
146
+ "decoder.layers.16.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
147
+ "decoder.layers.16.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
148
+ "decoder.layers.16.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
149
+ "decoder.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
150
+ "decoder.layers.16.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
151
+ "decoder.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
152
+ "decoder.layers.16.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
153
+ "decoder.layers.16.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
154
+ "decoder.layers.17.fc1.bias": "pytorch_model-00001-of-00002.bin",
155
+ "decoder.layers.17.fc1.weight": "pytorch_model-00001-of-00002.bin",
156
+ "decoder.layers.17.fc2.bias": "pytorch_model-00001-of-00002.bin",
157
+ "decoder.layers.17.fc2.weight": "pytorch_model-00001-of-00002.bin",
158
+ "decoder.layers.17.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
159
+ "decoder.layers.17.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
160
+ "decoder.layers.17.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
161
+ "decoder.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
162
+ "decoder.layers.17.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
163
+ "decoder.layers.17.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
164
+ "decoder.layers.17.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
165
+ "decoder.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
166
+ "decoder.layers.17.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
167
+ "decoder.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
168
+ "decoder.layers.17.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
169
+ "decoder.layers.17.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
170
+ "decoder.layers.18.fc1.bias": "pytorch_model-00001-of-00002.bin",
171
+ "decoder.layers.18.fc1.weight": "pytorch_model-00001-of-00002.bin",
172
+ "decoder.layers.18.fc2.bias": "pytorch_model-00001-of-00002.bin",
173
+ "decoder.layers.18.fc2.weight": "pytorch_model-00001-of-00002.bin",
174
+ "decoder.layers.18.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
175
+ "decoder.layers.18.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
176
+ "decoder.layers.18.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
177
+ "decoder.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
178
+ "decoder.layers.18.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
179
+ "decoder.layers.18.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
180
+ "decoder.layers.18.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
181
+ "decoder.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
182
+ "decoder.layers.18.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
183
+ "decoder.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
184
+ "decoder.layers.18.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
185
+ "decoder.layers.18.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
186
+ "decoder.layers.19.fc1.bias": "pytorch_model-00001-of-00002.bin",
187
+ "decoder.layers.19.fc1.weight": "pytorch_model-00001-of-00002.bin",
188
+ "decoder.layers.19.fc2.bias": "pytorch_model-00001-of-00002.bin",
189
+ "decoder.layers.19.fc2.weight": "pytorch_model-00001-of-00002.bin",
190
+ "decoder.layers.19.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
191
+ "decoder.layers.19.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
192
+ "decoder.layers.19.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
193
+ "decoder.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
194
+ "decoder.layers.19.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
195
+ "decoder.layers.19.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
196
+ "decoder.layers.19.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
197
+ "decoder.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
198
+ "decoder.layers.19.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
199
+ "decoder.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
200
+ "decoder.layers.19.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
201
+ "decoder.layers.19.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
202
+ "decoder.layers.2.fc1.bias": "pytorch_model-00001-of-00002.bin",
203
+ "decoder.layers.2.fc1.weight": "pytorch_model-00001-of-00002.bin",
204
+ "decoder.layers.2.fc2.bias": "pytorch_model-00001-of-00002.bin",
205
+ "decoder.layers.2.fc2.weight": "pytorch_model-00001-of-00002.bin",
206
+ "decoder.layers.2.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
207
+ "decoder.layers.2.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
208
+ "decoder.layers.2.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
209
+ "decoder.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
210
+ "decoder.layers.2.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
211
+ "decoder.layers.2.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
212
+ "decoder.layers.2.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
213
+ "decoder.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
214
+ "decoder.layers.2.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
215
+ "decoder.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
216
+ "decoder.layers.2.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
217
+ "decoder.layers.2.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
218
+ "decoder.layers.20.fc1.bias": "pytorch_model-00001-of-00002.bin",
219
+ "decoder.layers.20.fc1.weight": "pytorch_model-00001-of-00002.bin",
220
+ "decoder.layers.20.fc2.bias": "pytorch_model-00001-of-00002.bin",
221
+ "decoder.layers.20.fc2.weight": "pytorch_model-00001-of-00002.bin",
222
+ "decoder.layers.20.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
223
+ "decoder.layers.20.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
224
+ "decoder.layers.20.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
225
+ "decoder.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
226
+ "decoder.layers.20.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
227
+ "decoder.layers.20.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
228
+ "decoder.layers.20.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
229
+ "decoder.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
230
+ "decoder.layers.20.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
231
+ "decoder.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
232
+ "decoder.layers.20.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
233
+ "decoder.layers.20.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
234
+ "decoder.layers.21.fc1.bias": "pytorch_model-00001-of-00002.bin",
235
+ "decoder.layers.21.fc1.weight": "pytorch_model-00001-of-00002.bin",
236
+ "decoder.layers.21.fc2.bias": "pytorch_model-00001-of-00002.bin",
237
+ "decoder.layers.21.fc2.weight": "pytorch_model-00001-of-00002.bin",
238
+ "decoder.layers.21.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
239
+ "decoder.layers.21.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
240
+ "decoder.layers.21.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
241
+ "decoder.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
242
+ "decoder.layers.21.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
243
+ "decoder.layers.21.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
244
+ "decoder.layers.21.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
245
+ "decoder.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
246
+ "decoder.layers.21.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
247
+ "decoder.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
248
+ "decoder.layers.21.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
249
+ "decoder.layers.21.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
250
+ "decoder.layers.22.fc1.bias": "pytorch_model-00001-of-00002.bin",
251
+ "decoder.layers.22.fc1.weight": "pytorch_model-00001-of-00002.bin",
252
+ "decoder.layers.22.fc2.bias": "pytorch_model-00001-of-00002.bin",
253
+ "decoder.layers.22.fc2.weight": "pytorch_model-00001-of-00002.bin",
254
+ "decoder.layers.22.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
255
+ "decoder.layers.22.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
256
+ "decoder.layers.22.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
257
+ "decoder.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
258
+ "decoder.layers.22.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
259
+ "decoder.layers.22.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
260
+ "decoder.layers.22.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
261
+ "decoder.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
262
+ "decoder.layers.22.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
263
+ "decoder.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
264
+ "decoder.layers.22.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
265
+ "decoder.layers.22.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
266
+ "decoder.layers.23.fc1.bias": "pytorch_model-00001-of-00002.bin",
267
+ "decoder.layers.23.fc1.weight": "pytorch_model-00001-of-00002.bin",
268
+ "decoder.layers.23.fc2.bias": "pytorch_model-00002-of-00002.bin",
269
+ "decoder.layers.23.fc2.weight": "pytorch_model-00002-of-00002.bin",
270
+ "decoder.layers.23.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
271
+ "decoder.layers.23.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
272
+ "decoder.layers.23.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
273
+ "decoder.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
274
+ "decoder.layers.23.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
275
+ "decoder.layers.23.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
276
+ "decoder.layers.23.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
277
+ "decoder.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
278
+ "decoder.layers.23.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
279
+ "decoder.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
280
+ "decoder.layers.23.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
281
+ "decoder.layers.23.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
282
+ "decoder.layers.24.fc1.bias": "pytorch_model-00002-of-00002.bin",
283
+ "decoder.layers.24.fc1.weight": "pytorch_model-00002-of-00002.bin",
284
+ "decoder.layers.24.fc2.bias": "pytorch_model-00002-of-00002.bin",
285
+ "decoder.layers.24.fc2.weight": "pytorch_model-00002-of-00002.bin",
286
+ "decoder.layers.24.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
287
+ "decoder.layers.24.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
288
+ "decoder.layers.24.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
289
+ "decoder.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
290
+ "decoder.layers.24.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
291
+ "decoder.layers.24.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
292
+ "decoder.layers.24.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
293
+ "decoder.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
294
+ "decoder.layers.24.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
295
+ "decoder.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
296
+ "decoder.layers.24.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
297
+ "decoder.layers.24.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
298
+ "decoder.layers.25.fc1.bias": "pytorch_model-00002-of-00002.bin",
299
+ "decoder.layers.25.fc1.weight": "pytorch_model-00002-of-00002.bin",
300
+ "decoder.layers.25.fc2.bias": "pytorch_model-00002-of-00002.bin",
301
+ "decoder.layers.25.fc2.weight": "pytorch_model-00002-of-00002.bin",
302
+ "decoder.layers.25.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
303
+ "decoder.layers.25.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
304
+ "decoder.layers.25.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
305
+ "decoder.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
306
+ "decoder.layers.25.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
307
+ "decoder.layers.25.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
308
+ "decoder.layers.25.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
309
+ "decoder.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
310
+ "decoder.layers.25.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
311
+ "decoder.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
312
+ "decoder.layers.25.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
313
+ "decoder.layers.25.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
314
+ "decoder.layers.26.fc1.bias": "pytorch_model-00002-of-00002.bin",
315
+ "decoder.layers.26.fc1.weight": "pytorch_model-00002-of-00002.bin",
316
+ "decoder.layers.26.fc2.bias": "pytorch_model-00002-of-00002.bin",
317
+ "decoder.layers.26.fc2.weight": "pytorch_model-00002-of-00002.bin",
318
+ "decoder.layers.26.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
319
+ "decoder.layers.26.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
320
+ "decoder.layers.26.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
321
+ "decoder.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
322
+ "decoder.layers.26.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
323
+ "decoder.layers.26.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
324
+ "decoder.layers.26.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
325
+ "decoder.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
326
+ "decoder.layers.26.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
327
+ "decoder.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
328
+ "decoder.layers.26.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
329
+ "decoder.layers.26.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
330
+ "decoder.layers.27.fc1.bias": "pytorch_model-00002-of-00002.bin",
331
+ "decoder.layers.27.fc1.weight": "pytorch_model-00002-of-00002.bin",
332
+ "decoder.layers.27.fc2.bias": "pytorch_model-00002-of-00002.bin",
333
+ "decoder.layers.27.fc2.weight": "pytorch_model-00002-of-00002.bin",
334
+ "decoder.layers.27.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
335
+ "decoder.layers.27.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
336
+ "decoder.layers.27.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
337
+ "decoder.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
338
+ "decoder.layers.27.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
339
+ "decoder.layers.27.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
340
+ "decoder.layers.27.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
341
+ "decoder.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
342
+ "decoder.layers.27.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
343
+ "decoder.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
344
+ "decoder.layers.27.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
345
+ "decoder.layers.27.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
346
+ "decoder.layers.28.fc1.bias": "pytorch_model-00002-of-00002.bin",
347
+ "decoder.layers.28.fc1.weight": "pytorch_model-00002-of-00002.bin",
348
+ "decoder.layers.28.fc2.bias": "pytorch_model-00002-of-00002.bin",
349
+ "decoder.layers.28.fc2.weight": "pytorch_model-00002-of-00002.bin",
350
+ "decoder.layers.28.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
351
+ "decoder.layers.28.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
352
+ "decoder.layers.28.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
353
+ "decoder.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
354
+ "decoder.layers.28.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
355
+ "decoder.layers.28.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
356
+ "decoder.layers.28.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
357
+ "decoder.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
358
+ "decoder.layers.28.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
359
+ "decoder.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
360
+ "decoder.layers.28.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
361
+ "decoder.layers.28.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
362
+ "decoder.layers.29.fc1.bias": "pytorch_model-00002-of-00002.bin",
363
+ "decoder.layers.29.fc1.weight": "pytorch_model-00002-of-00002.bin",
364
+ "decoder.layers.29.fc2.bias": "pytorch_model-00002-of-00002.bin",
365
+ "decoder.layers.29.fc2.weight": "pytorch_model-00002-of-00002.bin",
366
+ "decoder.layers.29.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
367
+ "decoder.layers.29.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
368
+ "decoder.layers.29.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
369
+ "decoder.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
370
+ "decoder.layers.29.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
371
+ "decoder.layers.29.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
372
+ "decoder.layers.29.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
373
+ "decoder.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
374
+ "decoder.layers.29.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
375
+ "decoder.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
376
+ "decoder.layers.29.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
377
+ "decoder.layers.29.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
378
+ "decoder.layers.3.fc1.bias": "pytorch_model-00001-of-00002.bin",
379
+ "decoder.layers.3.fc1.weight": "pytorch_model-00001-of-00002.bin",
380
+ "decoder.layers.3.fc2.bias": "pytorch_model-00001-of-00002.bin",
381
+ "decoder.layers.3.fc2.weight": "pytorch_model-00001-of-00002.bin",
382
+ "decoder.layers.3.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
383
+ "decoder.layers.3.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
384
+ "decoder.layers.3.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
385
+ "decoder.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
386
+ "decoder.layers.3.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
387
+ "decoder.layers.3.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
388
+ "decoder.layers.3.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
389
+ "decoder.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
390
+ "decoder.layers.3.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
391
+ "decoder.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
392
+ "decoder.layers.3.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
393
+ "decoder.layers.3.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
394
+ "decoder.layers.30.fc1.bias": "pytorch_model-00002-of-00002.bin",
395
+ "decoder.layers.30.fc1.weight": "pytorch_model-00002-of-00002.bin",
396
+ "decoder.layers.30.fc2.bias": "pytorch_model-00002-of-00002.bin",
397
+ "decoder.layers.30.fc2.weight": "pytorch_model-00002-of-00002.bin",
398
+ "decoder.layers.30.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
399
+ "decoder.layers.30.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
400
+ "decoder.layers.30.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
401
+ "decoder.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
402
+ "decoder.layers.30.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
403
+ "decoder.layers.30.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
404
+ "decoder.layers.30.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
405
+ "decoder.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
406
+ "decoder.layers.30.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
407
+ "decoder.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
408
+ "decoder.layers.30.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
409
+ "decoder.layers.30.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
410
+ "decoder.layers.31.fc1.bias": "pytorch_model-00002-of-00002.bin",
411
+ "decoder.layers.31.fc1.weight": "pytorch_model-00002-of-00002.bin",
412
+ "decoder.layers.31.fc2.bias": "pytorch_model-00002-of-00002.bin",
413
+ "decoder.layers.31.fc2.weight": "pytorch_model-00002-of-00002.bin",
414
+ "decoder.layers.31.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
415
+ "decoder.layers.31.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
416
+ "decoder.layers.31.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
417
+ "decoder.layers.31.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
418
+ "decoder.layers.31.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
419
+ "decoder.layers.31.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
420
+ "decoder.layers.31.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
421
+ "decoder.layers.31.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
422
+ "decoder.layers.31.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
423
+ "decoder.layers.31.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
424
+ "decoder.layers.31.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
425
+ "decoder.layers.31.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
426
+ "decoder.layers.4.fc1.bias": "pytorch_model-00001-of-00002.bin",
427
+ "decoder.layers.4.fc1.weight": "pytorch_model-00001-of-00002.bin",
428
+ "decoder.layers.4.fc2.bias": "pytorch_model-00001-of-00002.bin",
429
+ "decoder.layers.4.fc2.weight": "pytorch_model-00001-of-00002.bin",
430
+ "decoder.layers.4.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
431
+ "decoder.layers.4.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
432
+ "decoder.layers.4.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
433
+ "decoder.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
434
+ "decoder.layers.4.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
435
+ "decoder.layers.4.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
436
+ "decoder.layers.4.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
437
+ "decoder.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
438
+ "decoder.layers.4.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
439
+ "decoder.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
440
+ "decoder.layers.4.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
441
+ "decoder.layers.4.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
442
+ "decoder.layers.5.fc1.bias": "pytorch_model-00001-of-00002.bin",
443
+ "decoder.layers.5.fc1.weight": "pytorch_model-00001-of-00002.bin",
444
+ "decoder.layers.5.fc2.bias": "pytorch_model-00001-of-00002.bin",
445
+ "decoder.layers.5.fc2.weight": "pytorch_model-00001-of-00002.bin",
446
+ "decoder.layers.5.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
447
+ "decoder.layers.5.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
448
+ "decoder.layers.5.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
449
+ "decoder.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
450
+ "decoder.layers.5.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
451
+ "decoder.layers.5.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
452
+ "decoder.layers.5.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
453
+ "decoder.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
454
+ "decoder.layers.5.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
455
+ "decoder.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
456
+ "decoder.layers.5.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
457
+ "decoder.layers.5.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
458
+ "decoder.layers.6.fc1.bias": "pytorch_model-00001-of-00002.bin",
459
+ "decoder.layers.6.fc1.weight": "pytorch_model-00001-of-00002.bin",
460
+ "decoder.layers.6.fc2.bias": "pytorch_model-00001-of-00002.bin",
461
+ "decoder.layers.6.fc2.weight": "pytorch_model-00001-of-00002.bin",
462
+ "decoder.layers.6.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
463
+ "decoder.layers.6.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
464
+ "decoder.layers.6.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
465
+ "decoder.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
466
+ "decoder.layers.6.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
467
+ "decoder.layers.6.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
468
+ "decoder.layers.6.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
469
+ "decoder.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
470
+ "decoder.layers.6.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
471
+ "decoder.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
472
+ "decoder.layers.6.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
473
+ "decoder.layers.6.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
474
+ "decoder.layers.7.fc1.bias": "pytorch_model-00001-of-00002.bin",
475
+ "decoder.layers.7.fc1.weight": "pytorch_model-00001-of-00002.bin",
476
+ "decoder.layers.7.fc2.bias": "pytorch_model-00001-of-00002.bin",
477
+ "decoder.layers.7.fc2.weight": "pytorch_model-00001-of-00002.bin",
478
+ "decoder.layers.7.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
479
+ "decoder.layers.7.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
480
+ "decoder.layers.7.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
481
+ "decoder.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
482
+ "decoder.layers.7.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
483
+ "decoder.layers.7.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
484
+ "decoder.layers.7.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
485
+ "decoder.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
486
+ "decoder.layers.7.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
487
+ "decoder.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
488
+ "decoder.layers.7.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
489
+ "decoder.layers.7.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
490
+ "decoder.layers.8.fc1.bias": "pytorch_model-00001-of-00002.bin",
491
+ "decoder.layers.8.fc1.weight": "pytorch_model-00001-of-00002.bin",
492
+ "decoder.layers.8.fc2.bias": "pytorch_model-00001-of-00002.bin",
493
+ "decoder.layers.8.fc2.weight": "pytorch_model-00001-of-00002.bin",
494
+ "decoder.layers.8.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
495
+ "decoder.layers.8.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
496
+ "decoder.layers.8.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
497
+ "decoder.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
498
+ "decoder.layers.8.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
499
+ "decoder.layers.8.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
500
+ "decoder.layers.8.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
501
+ "decoder.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
502
+ "decoder.layers.8.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
503
+ "decoder.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
504
+ "decoder.layers.8.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
505
+ "decoder.layers.8.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
506
+ "decoder.layers.9.fc1.bias": "pytorch_model-00001-of-00002.bin",
507
+ "decoder.layers.9.fc1.weight": "pytorch_model-00001-of-00002.bin",
508
+ "decoder.layers.9.fc2.bias": "pytorch_model-00001-of-00002.bin",
509
+ "decoder.layers.9.fc2.weight": "pytorch_model-00001-of-00002.bin",
510
+ "decoder.layers.9.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
511
+ "decoder.layers.9.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
512
+ "decoder.layers.9.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
513
+ "decoder.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
514
+ "decoder.layers.9.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
515
+ "decoder.layers.9.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
516
+ "decoder.layers.9.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
517
+ "decoder.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
518
+ "decoder.layers.9.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
519
+ "decoder.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
520
+ "decoder.layers.9.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
521
+ "decoder.layers.9.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin"
522
+ }
523
+ }
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "unk_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "add_bos_token": true, "special_tokens_map_file": null, "name_or_path": "patrickvonplaten/opt-30b"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff