File size: 54,095 Bytes
a76a14d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})]
{
    func main<ios16>(tensor<fp16, [128, 64]> cos, tensor<fp16, [1, 32, 128, 448]> k_cache_0, tensor<fp16, [1, 32, 128, 448]> k_cache_1, tensor<fp16, [1, 1, 64, 512]> mask, tensor<fp16, [128, 64]> sin, tensor<fp16, [1, 32, 128, 448]> v_cache_0, tensor<fp16, [1, 32, 128, 448]> v_cache_1, tensor<fp16, [1, 4096, 1, 64]> x) [CoreML_InputDefaultValues = dict<tensor<string, []>, tensor<fp32, []>>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] {
            tensor<fp16, [4096, 4096, 1, 1]> blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8388736))), name = tensor<string, []>("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8388864))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16777536))), name = tensor<string, []>("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16777664))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25166336))), name = tensor<string, []>("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25166464))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33555136))), name = tensor<string, []>("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [11008, 4096, 1, 1]> blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33555264))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56099712))), name = tensor<string, []>("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([11008, 4096, 1, 1])];
            tensor<fp16, [11008, 4096, 1, 1]> blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56099840))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78644288))), name = tensor<string, []>("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([11008, 4096, 1, 1])];
            tensor<fp16, [4096, 11008, 1, 1]> blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78644416))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101188864))), name = tensor<string, []>("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 11008, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101188992))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(109577664))), name = tensor<string, []>("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(109577792))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(117966464))), name = tensor<string, []>("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(117966592))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126355264))), name = tensor<string, []>("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126355392))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134744064))), name = tensor<string, []>("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [11008, 4096, 1, 1]> blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134744192))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157288640))), name = tensor<string, []>("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([11008, 4096, 1, 1])];
            tensor<fp16, [11008, 4096, 1, 1]> blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157288768))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179833216))), name = tensor<string, []>("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([11008, 4096, 1, 1])];
            tensor<fp16, [4096, 11008, 1, 1]> blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179833344))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202377792))), name = tensor<string, []>("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 11008, 1, 1])];
            tensor<int32, []> var_14 = const()[name = tensor<string, []>("op_14"), val = tensor<int32, []>(3)];
            tensor<int32, []> var_19 = const()[name = tensor<string, []>("op_19"), val = tensor<int32, []>(-2)];
            tensor<int32, []> var_21 = const()[name = tensor<string, []>("op_21"), val = tensor<int32, []>(-1)];
            tensor<int32, []> var_28 = const()[name = tensor<string, []>("op_28"), val = tensor<int32, []>(1)];
            tensor<bool, []> var_29 = const()[name = tensor<string, []>("op_29"), val = tensor<bool, []>(true)];
            tensor<fp16, [1, 4096, 1, 64]> var_37_cast_fp16 = mul(x = x, y = x)[name = tensor<string, []>("op_37_cast_fp16")];
            tensor<int32, [1]> var_38 = const()[name = tensor<string, []>("op_38"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 1, 64]> norm_x_1_cast_fp16 = reduce_mean(axes = var_38, keep_dims = var_29, x = var_37_cast_fp16)[name = tensor<string, []>("norm_x_1_cast_fp16")];
            tensor<fp16, []> var_40_to_fp16 = const()[name = tensor<string, []>("op_40_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 1, 64]> var_41_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_40_to_fp16)[name = tensor<string, []>("op_41_cast_fp16")];
            tensor<fp16, []> var_42_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_42_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
            tensor<fp16, [1, 1, 1, 64]> var_42_cast_fp16 = rsqrt(epsilon = var_42_epsilon_0_to_fp16, x = var_41_cast_fp16)[name = tensor<string, []>("op_42_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_normed_1_cast_fp16 = mul(x = x, y = var_42_cast_fp16)[name = tensor<string, []>("x_normed_1_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_norm_1_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_norm_1_weight_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202377920)))];
            tensor<fp16, [1, 4096, 1, 64]> x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor<string, []>("x_5_cast_fp16")];
            tensor<int32, [2]> var_54 = const()[name = tensor<string, []>("op_54"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_56 = const()[name = tensor<string, []>("op_56"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_58_pad_type_0 = const()[name = tensor<string, []>("op_58_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_58_pad_0 = const()[name = tensor<string, []>("op_58_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_58_cast_fp16 = conv(dilations = var_56, groups = var_28, pad = var_58_pad_0, pad_type = var_58_pad_type_0, strides = var_54, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor<string, []>("op_58_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202386176)))];
            tensor<fp16, [1, 4096, 1, 64]> q_1_cast_fp16 = mul(x = var_58_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor<string, []>("q_1_cast_fp16")];
            tensor<int32, [2]> var_62 = const()[name = tensor<string, []>("op_62"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_64 = const()[name = tensor<string, []>("op_64"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_66_pad_type_0 = const()[name = tensor<string, []>("op_66_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_66_pad_0 = const()[name = tensor<string, []>("op_66_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_66_cast_fp16 = conv(dilations = var_64, groups = var_28, pad = var_66_pad_0, pad_type = var_66_pad_type_0, strides = var_62, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor<string, []>("op_66_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202394432)))];
            tensor<fp16, [1, 4096, 1, 64]> k_1_cast_fp16 = mul(x = var_66_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
            tensor<int32, [2]> var_70 = const()[name = tensor<string, []>("op_70"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_72 = const()[name = tensor<string, []>("op_72"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_74_pad_type_0 = const()[name = tensor<string, []>("op_74_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_74_pad_0 = const()[name = tensor<string, []>("op_74_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_74_cast_fp16 = conv(dilations = var_72, groups = var_28, pad = var_74_pad_0, pad_type = var_74_pad_type_0, strides = var_70, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor<string, []>("op_74_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202402688)))];
            tensor<fp16, [1, 4096, 1, 64]> v_1_cast_fp16 = mul(x = var_74_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor<string, []>("v_1_cast_fp16")];
            tensor<int32, [4]> var_76 = const()[name = tensor<string, []>("op_76"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> q_3_cast_fp16 = reshape(shape = var_76, x = q_1_cast_fp16)[name = tensor<string, []>("q_3_cast_fp16")];
            tensor<int32, [4]> var_78 = const()[name = tensor<string, []>("op_78"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> k_3_cast_fp16 = reshape(shape = var_78, x = k_1_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
            tensor<int32, [4]> var_80 = const()[name = tensor<string, []>("op_80"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> new_v_cache_0 = reshape(shape = var_80, x = v_1_cast_fp16)[name = tensor<string, []>("v_3_cast_fp16")];
            tensor<int32, [4]> var_92_begin_0 = const()[name = tensor<string, []>("op_92_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_92_end_0 = const()[name = tensor<string, []>("op_92_end_0"), val = tensor<int32, [4]>([1, 32, 64, 64])];
            tensor<bool, [4]> var_92_end_mask_0 = const()[name = tensor<string, []>("op_92_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 32, 64, 64]> var_92_cast_fp16 = slice_by_index(begin = var_92_begin_0, end = var_92_end_0, end_mask = var_92_end_mask_0, x = q_3_cast_fp16)[name = tensor<string, []>("op_92_cast_fp16")];
            tensor<int32, [4]> var_98_begin_0 = const()[name = tensor<string, []>("op_98_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_98_end_0 = const()[name = tensor<string, []>("op_98_end_0"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<bool, [4]> var_98_end_mask_0 = const()[name = tensor<string, []>("op_98_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 64, 64]> var_98_cast_fp16 = slice_by_index(begin = var_98_begin_0, end = var_98_end_0, end_mask = var_98_end_mask_0, x = q_3_cast_fp16)[name = tensor<string, []>("op_98_cast_fp16")];
            tensor<fp16, []> const_3_promoted_to_fp16 = const()[name = tensor<string, []>("const_3_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 32, 64, 64]> var_100_cast_fp16 = mul(x = var_98_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor<string, []>("op_100_cast_fp16")];
            tensor<bool, []> rotated_1_interleave_0 = const()[name = tensor<string, []>("rotated_1_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> rotated_1_cast_fp16 = concat(axis = var_19, interleave = rotated_1_interleave_0, values = (var_100_cast_fp16, var_92_cast_fp16))[name = tensor<string, []>("rotated_1_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_103_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor<string, []>("op_103_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_104_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor<string, []>("op_104_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> roped_1_cast_fp16 = add(x = var_103_cast_fp16, y = var_104_cast_fp16)[name = tensor<string, []>("roped_1_cast_fp16")];
            tensor<int32, [4]> var_117_begin_0 = const()[name = tensor<string, []>("op_117_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_117_end_0 = const()[name = tensor<string, []>("op_117_end_0"), val = tensor<int32, [4]>([1, 32, 64, 64])];
            tensor<bool, [4]> var_117_end_mask_0 = const()[name = tensor<string, []>("op_117_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 32, 64, 64]> var_117_cast_fp16 = slice_by_index(begin = var_117_begin_0, end = var_117_end_0, end_mask = var_117_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_117_cast_fp16")];
            tensor<int32, [4]> var_123_begin_0 = const()[name = tensor<string, []>("op_123_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_123_end_0 = const()[name = tensor<string, []>("op_123_end_0"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<bool, [4]> var_123_end_mask_0 = const()[name = tensor<string, []>("op_123_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 64, 64]> var_123_cast_fp16 = slice_by_index(begin = var_123_begin_0, end = var_123_end_0, end_mask = var_123_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_123_cast_fp16")];
            tensor<fp16, []> const_5_promoted_to_fp16 = const()[name = tensor<string, []>("const_5_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 32, 64, 64]> var_125_cast_fp16 = mul(x = var_123_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor<string, []>("op_125_cast_fp16")];
            tensor<bool, []> rotated_3_interleave_0 = const()[name = tensor<string, []>("rotated_3_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> rotated_3_cast_fp16 = concat(axis = var_19, interleave = rotated_3_interleave_0, values = (var_125_cast_fp16, var_117_cast_fp16))[name = tensor<string, []>("rotated_3_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_128_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor<string, []>("op_128_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_129_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor<string, []>("op_129_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> roped_3_cast_fp16 = add(x = var_128_cast_fp16, y = var_129_cast_fp16)[name = tensor<string, []>("roped_3_cast_fp16")];
            tensor<bool, []> q_5_interleave_0 = const()[name = tensor<string, []>("q_5_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> q_5_cast_fp16 = concat(axis = var_19, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor<string, []>("q_5_cast_fp16")];
            tensor<bool, []> k_5_interleave_0 = const()[name = tensor<string, []>("k_5_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> new_k_cache_0 = concat(axis = var_19, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
            tensor<bool, []> k_7_interleave_0 = const()[name = tensor<string, []>("k_7_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 512]> k_7_cast_fp16 = concat(axis = var_21, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor<string, []>("k_7_cast_fp16")];
            tensor<bool, []> v_5_interleave_0 = const()[name = tensor<string, []>("v_5_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 512]> v_5_cast_fp16 = concat(axis = var_21, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor<string, []>("v_5_cast_fp16")];
            tensor<fp16, []> var_151_to_fp16 = const()[name = tensor<string, []>("op_151_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 32, 128, 64]> var_152_cast_fp16 = mul(x = q_5_cast_fp16, y = var_151_to_fp16)[name = tensor<string, []>("op_152_cast_fp16")];
            tensor<bool, []> attn_weights_1_transpose_x_0 = const()[name = tensor<string, []>("attn_weights_1_transpose_x_0"), val = tensor<bool, []>(true)];
            tensor<bool, []> attn_weights_1_transpose_y_0 = const()[name = tensor<string, []>("attn_weights_1_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 64, 512]> attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_152_cast_fp16, y = k_7_cast_fp16)[name = tensor<string, []>("attn_weights_1_cast_fp16")];
            tensor<fp16, [1, 32, 64, 512]> attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor<string, []>("attn_weights_3_cast_fp16")];
            tensor<fp16, [1, 32, 64, 512]> var_160_cast_fp16 = softmax(axis = var_14, x = attn_weights_3_cast_fp16)[name = tensor<string, []>("op_160_cast_fp16")];
            tensor<bool, []> attn_1_transpose_x_0 = const()[name = tensor<string, []>("attn_1_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> attn_1_transpose_y_0 = const()[name = tensor<string, []>("attn_1_transpose_y_0"), val = tensor<bool, []>(true)];
            tensor<fp16, [1, 32, 128, 64]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_160_cast_fp16)[name = tensor<string, []>("attn_1_cast_fp16")];
            tensor<int32, [4]> var_164 = const()[name = tensor<string, []>("op_164"), val = tensor<int32, [4]>([1, 4096, 1, -1])];
            tensor<fp16, [1, 4096, 1, 64]> input_1_cast_fp16 = reshape(shape = var_164, x = attn_1_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
            tensor<int32, [2]> var_168 = const()[name = tensor<string, []>("op_168"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_170 = const()[name = tensor<string, []>("op_170"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_172_pad_type_0 = const()[name = tensor<string, []>("op_172_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_172_pad_0 = const()[name = tensor<string, []>("op_172_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_172_cast_fp16 = conv(dilations = var_170, groups = var_28, pad = var_172_pad_0, pad_type = var_172_pad_type_0, strides = var_168, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_172_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202410944)))];
            tensor<fp16, [1, 4096, 1, 64]> attention_output_1_cast_fp16 = mul(x = var_172_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor<string, []>("attention_output_1_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor<string, []>("x_11_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> var_181_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor<string, []>("op_181_cast_fp16")];
            tensor<int32, [1]> var_182 = const()[name = tensor<string, []>("op_182"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 1, 64]> norm_x_3_cast_fp16 = reduce_mean(axes = var_182, keep_dims = var_29, x = var_181_cast_fp16)[name = tensor<string, []>("norm_x_3_cast_fp16")];
            tensor<fp16, []> var_184_to_fp16 = const()[name = tensor<string, []>("op_184_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 1, 64]> var_185_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_184_to_fp16)[name = tensor<string, []>("op_185_cast_fp16")];
            tensor<fp16, []> var_186_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_186_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
            tensor<fp16, [1, 1, 1, 64]> var_186_cast_fp16 = rsqrt(epsilon = var_186_epsilon_0_to_fp16, x = var_185_cast_fp16)[name = tensor<string, []>("op_186_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_186_cast_fp16)[name = tensor<string, []>("x_normed_5_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_norm_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_norm_2_weight_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202419200)))];
            tensor<fp16, [1, 4096, 1, 64]> input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
            tensor<int32, [2]> var_198 = const()[name = tensor<string, []>("op_198"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_200 = const()[name = tensor<string, []>("op_200"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_202_pad_type_0 = const()[name = tensor<string, []>("op_202_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_202_pad_0 = const()[name = tensor<string, []>("op_202_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 11008, 1, 64]> var_202_cast_fp16 = conv(dilations = var_200, groups = var_28, pad = var_202_pad_0, pad_type = var_202_pad_type_0, strides = var_198, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_202_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 1]> blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor<fp16, [1, 11008, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202427456)))];
            tensor<fp16, [1, 11008, 1, 64]> input_5_cast_fp16 = mul(x = var_202_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
            tensor<int32, [2]> var_206 = const()[name = tensor<string, []>("op_206"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_208 = const()[name = tensor<string, []>("op_208"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_210_pad_type_0 = const()[name = tensor<string, []>("op_210_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_210_pad_0 = const()[name = tensor<string, []>("op_210_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 11008, 1, 64]> var_210_cast_fp16 = conv(dilations = var_208, groups = var_28, pad = var_210_pad_0, pad_type = var_210_pad_type_0, strides = var_206, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_210_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 1]> blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor<fp16, [1, 11008, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202449536)))];
            tensor<fp16, [1, 11008, 1, 64]> x_fc_2_1_cast_fp16 = mul(x = var_210_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor<string, []>("x_fc_2_1_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 64]> var_212_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor<string, []>("op_212_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 64]> input_7_cast_fp16 = mul(x = var_212_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
            tensor<int32, [2]> var_216 = const()[name = tensor<string, []>("op_216"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_218 = const()[name = tensor<string, []>("op_218"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_220_pad_type_0 = const()[name = tensor<string, []>("op_220_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_220_pad_0 = const()[name = tensor<string, []>("op_220_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_220_cast_fp16 = conv(dilations = var_218, groups = var_28, pad = var_220_pad_0, pad_type = var_220_pad_type_0, strides = var_216, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("op_220_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202471616)))];
            tensor<fp16, [1, 4096, 1, 64]> var_221_cast_fp16 = mul(x = var_220_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor<string, []>("op_221_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_15_cast_fp16 = add(x = var_221_cast_fp16, y = x_11_cast_fp16)[name = tensor<string, []>("x_15_cast_fp16")];
            tensor<int32, []> var_228 = const()[name = tensor<string, []>("op_228"), val = tensor<int32, []>(3)];
            tensor<int32, []> var_233 = const()[name = tensor<string, []>("op_233"), val = tensor<int32, []>(-2)];
            tensor<int32, []> var_235 = const()[name = tensor<string, []>("op_235"), val = tensor<int32, []>(-1)];
            tensor<int32, []> var_242 = const()[name = tensor<string, []>("op_242"), val = tensor<int32, []>(1)];
            tensor<bool, []> var_243 = const()[name = tensor<string, []>("op_243"), val = tensor<bool, []>(true)];
            tensor<fp16, [1, 4096, 1, 64]> var_250_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor<string, []>("op_250_cast_fp16")];
            tensor<int32, [1]> var_251 = const()[name = tensor<string, []>("op_251"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 1, 64]> norm_x_5_cast_fp16 = reduce_mean(axes = var_251, keep_dims = var_243, x = var_250_cast_fp16)[name = tensor<string, []>("norm_x_5_cast_fp16")];
            tensor<fp16, []> var_253_to_fp16 = const()[name = tensor<string, []>("op_253_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 1, 64]> var_254_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_253_to_fp16)[name = tensor<string, []>("op_254_cast_fp16")];
            tensor<fp16, []> var_255_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_255_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
            tensor<fp16, [1, 1, 1, 64]> var_255_cast_fp16 = rsqrt(epsilon = var_255_epsilon_0_to_fp16, x = var_254_cast_fp16)[name = tensor<string, []>("op_255_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_255_cast_fp16)[name = tensor<string, []>("x_normed_9_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_norm_1_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_norm_1_weight_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202479872)))];
            tensor<fp16, [1, 4096, 1, 64]> x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor<string, []>("x_19_cast_fp16")];
            tensor<int32, [2]> var_270 = const()[name = tensor<string, []>("op_270"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_272 = const()[name = tensor<string, []>("op_272"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_274_pad_type_0 = const()[name = tensor<string, []>("op_274_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_274_pad_0 = const()[name = tensor<string, []>("op_274_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_274_cast_fp16 = conv(dilations = var_272, groups = var_242, pad = var_274_pad_0, pad_type = var_274_pad_type_0, strides = var_270, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor<string, []>("op_274_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202488128)))];
            tensor<fp16, [1, 4096, 1, 64]> q_7_cast_fp16 = mul(x = var_274_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor<string, []>("q_7_cast_fp16")];
            tensor<int32, [2]> var_278 = const()[name = tensor<string, []>("op_278"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_280 = const()[name = tensor<string, []>("op_280"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_282_pad_type_0 = const()[name = tensor<string, []>("op_282_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_282_pad_0 = const()[name = tensor<string, []>("op_282_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_282_cast_fp16 = conv(dilations = var_280, groups = var_242, pad = var_282_pad_0, pad_type = var_282_pad_type_0, strides = var_278, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor<string, []>("op_282_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202496384)))];
            tensor<fp16, [1, 4096, 1, 64]> k_9_cast_fp16 = mul(x = var_282_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
            tensor<int32, [2]> var_286 = const()[name = tensor<string, []>("op_286"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_288 = const()[name = tensor<string, []>("op_288"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_290_pad_type_0 = const()[name = tensor<string, []>("op_290_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_290_pad_0 = const()[name = tensor<string, []>("op_290_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_290_cast_fp16 = conv(dilations = var_288, groups = var_242, pad = var_290_pad_0, pad_type = var_290_pad_type_0, strides = var_286, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor<string, []>("op_290_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202504640)))];
            tensor<fp16, [1, 4096, 1, 64]> v_7_cast_fp16 = mul(x = var_290_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor<string, []>("v_7_cast_fp16")];
            tensor<int32, [4]> var_292 = const()[name = tensor<string, []>("op_292"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> q_9_cast_fp16 = reshape(shape = var_292, x = q_7_cast_fp16)[name = tensor<string, []>("q_9_cast_fp16")];
            tensor<int32, [4]> var_294 = const()[name = tensor<string, []>("op_294"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> k_11_cast_fp16 = reshape(shape = var_294, x = k_9_cast_fp16)[name = tensor<string, []>("k_11_cast_fp16")];
            tensor<int32, [4]> var_296 = const()[name = tensor<string, []>("op_296"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> new_v_cache_1 = reshape(shape = var_296, x = v_7_cast_fp16)[name = tensor<string, []>("v_9_cast_fp16")];
            tensor<int32, [4]> var_308_begin_0 = const()[name = tensor<string, []>("op_308_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_308_end_0 = const()[name = tensor<string, []>("op_308_end_0"), val = tensor<int32, [4]>([1, 32, 64, 64])];
            tensor<bool, [4]> var_308_end_mask_0 = const()[name = tensor<string, []>("op_308_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 32, 64, 64]> var_308_cast_fp16 = slice_by_index(begin = var_308_begin_0, end = var_308_end_0, end_mask = var_308_end_mask_0, x = q_9_cast_fp16)[name = tensor<string, []>("op_308_cast_fp16")];
            tensor<int32, [4]> var_314_begin_0 = const()[name = tensor<string, []>("op_314_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_314_end_0 = const()[name = tensor<string, []>("op_314_end_0"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<bool, [4]> var_314_end_mask_0 = const()[name = tensor<string, []>("op_314_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 64, 64]> var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_9_cast_fp16)[name = tensor<string, []>("op_314_cast_fp16")];
            tensor<fp16, []> const_10_promoted_to_fp16 = const()[name = tensor<string, []>("const_10_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 32, 64, 64]> var_316_cast_fp16 = mul(x = var_314_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor<string, []>("op_316_cast_fp16")];
            tensor<bool, []> rotated_5_interleave_0 = const()[name = tensor<string, []>("rotated_5_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> rotated_5_cast_fp16 = concat(axis = var_233, interleave = rotated_5_interleave_0, values = (var_316_cast_fp16, var_308_cast_fp16))[name = tensor<string, []>("rotated_5_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_319_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor<string, []>("op_319_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_320_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor<string, []>("op_320_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> roped_5_cast_fp16 = add(x = var_319_cast_fp16, y = var_320_cast_fp16)[name = tensor<string, []>("roped_5_cast_fp16")];
            tensor<int32, [4]> var_333_begin_0 = const()[name = tensor<string, []>("op_333_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_333_end_0 = const()[name = tensor<string, []>("op_333_end_0"), val = tensor<int32, [4]>([1, 32, 64, 64])];
            tensor<bool, [4]> var_333_end_mask_0 = const()[name = tensor<string, []>("op_333_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 32, 64, 64]> var_333_cast_fp16 = slice_by_index(begin = var_333_begin_0, end = var_333_end_0, end_mask = var_333_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_333_cast_fp16")];
            tensor<int32, [4]> var_339_begin_0 = const()[name = tensor<string, []>("op_339_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_339_end_0 = const()[name = tensor<string, []>("op_339_end_0"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<bool, [4]> var_339_end_mask_0 = const()[name = tensor<string, []>("op_339_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 64, 64]> var_339_cast_fp16 = slice_by_index(begin = var_339_begin_0, end = var_339_end_0, end_mask = var_339_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_339_cast_fp16")];
            tensor<fp16, []> const_12_promoted_to_fp16 = const()[name = tensor<string, []>("const_12_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 32, 64, 64]> var_341_cast_fp16 = mul(x = var_339_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor<string, []>("op_341_cast_fp16")];
            tensor<bool, []> rotated_interleave_0 = const()[name = tensor<string, []>("rotated_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> rotated_cast_fp16 = concat(axis = var_233, interleave = rotated_interleave_0, values = (var_341_cast_fp16, var_333_cast_fp16))[name = tensor<string, []>("rotated_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_344_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor<string, []>("op_344_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_345_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor<string, []>("op_345_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> roped_cast_fp16 = add(x = var_344_cast_fp16, y = var_345_cast_fp16)[name = tensor<string, []>("roped_cast_fp16")];
            tensor<bool, []> q_interleave_0 = const()[name = tensor<string, []>("q_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> q_cast_fp16 = concat(axis = var_233, interleave = q_interleave_0, values = roped_5_cast_fp16)[name = tensor<string, []>("q_cast_fp16")];
            tensor<bool, []> k_13_interleave_0 = const()[name = tensor<string, []>("k_13_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> new_k_cache_1 = concat(axis = var_233, interleave = k_13_interleave_0, values = roped_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
            tensor<bool, []> k_interleave_0 = const()[name = tensor<string, []>("k_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 512]> k_cast_fp16 = concat(axis = var_235, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor<string, []>("k_cast_fp16")];
            tensor<bool, []> v_interleave_0 = const()[name = tensor<string, []>("v_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 512]> v_cast_fp16 = concat(axis = var_235, interleave = v_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor<string, []>("v_cast_fp16")];
            tensor<fp16, []> var_367_to_fp16 = const()[name = tensor<string, []>("op_367_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 32, 128, 64]> var_368_cast_fp16 = mul(x = q_cast_fp16, y = var_367_to_fp16)[name = tensor<string, []>("op_368_cast_fp16")];
            tensor<bool, []> attn_weights_5_transpose_x_0 = const()[name = tensor<string, []>("attn_weights_5_transpose_x_0"), val = tensor<bool, []>(true)];
            tensor<bool, []> attn_weights_5_transpose_y_0 = const()[name = tensor<string, []>("attn_weights_5_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 64, 512]> attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_368_cast_fp16, y = k_cast_fp16)[name = tensor<string, []>("attn_weights_5_cast_fp16")];
            tensor<fp16, [1, 32, 64, 512]> attn_weights_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor<string, []>("attn_weights_cast_fp16")];
            tensor<fp16, [1, 32, 64, 512]> var_376_cast_fp16 = softmax(axis = var_228, x = attn_weights_cast_fp16)[name = tensor<string, []>("op_376_cast_fp16")];
            tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
            tensor<fp16, [1, 32, 128, 64]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_cast_fp16, y = var_376_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
            tensor<int32, [4]> var_380 = const()[name = tensor<string, []>("op_380"), val = tensor<int32, [4]>([1, 4096, 1, -1])];
            tensor<fp16, [1, 4096, 1, 64]> input_9_cast_fp16 = reshape(shape = var_380, x = attn_3_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
            tensor<int32, [2]> var_384 = const()[name = tensor<string, []>("op_384"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_386 = const()[name = tensor<string, []>("op_386"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_388_pad_type_0 = const()[name = tensor<string, []>("op_388_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_388_pad_0 = const()[name = tensor<string, []>("op_388_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_388_cast_fp16 = conv(dilations = var_386, groups = var_242, pad = var_388_pad_0, pad_type = var_388_pad_type_0, strides = var_384, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("op_388_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202512896)))];
            tensor<fp16, [1, 4096, 1, 64]> attention_output_cast_fp16 = mul(x = var_388_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor<string, []>("attention_output_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_25_cast_fp16 = add(x = attention_output_cast_fp16, y = x_15_cast_fp16)[name = tensor<string, []>("x_25_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> var_397_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor<string, []>("op_397_cast_fp16")];
            tensor<int32, [1]> var_398 = const()[name = tensor<string, []>("op_398"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 1, 64]> norm_x_cast_fp16 = reduce_mean(axes = var_398, keep_dims = var_243, x = var_397_cast_fp16)[name = tensor<string, []>("norm_x_cast_fp16")];
            tensor<fp16, []> var_400_to_fp16 = const()[name = tensor<string, []>("op_400_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 1, 64]> var_401_cast_fp16 = add(x = norm_x_cast_fp16, y = var_400_to_fp16)[name = tensor<string, []>("op_401_cast_fp16")];
            tensor<fp16, []> var_402_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_402_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
            tensor<fp16, [1, 1, 1, 64]> var_402_cast_fp16 = rsqrt(epsilon = var_402_epsilon_0_to_fp16, x = var_401_cast_fp16)[name = tensor<string, []>("op_402_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_402_cast_fp16)[name = tensor<string, []>("x_normed_13_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_norm_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_norm_2_weight_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202521152)))];
            tensor<fp16, [1, 4096, 1, 64]> input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
            tensor<int32, [2]> var_414 = const()[name = tensor<string, []>("op_414"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_416 = const()[name = tensor<string, []>("op_416"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_418_pad_type_0 = const()[name = tensor<string, []>("op_418_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_418_pad_0 = const()[name = tensor<string, []>("op_418_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 11008, 1, 64]> var_418_cast_fp16 = conv(dilations = var_416, groups = var_242, pad = var_418_pad_0, pad_type = var_418_pad_type_0, strides = var_414, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_418_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 1]> blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor<fp16, [1, 11008, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202529408)))];
            tensor<fp16, [1, 11008, 1, 64]> input_13_cast_fp16 = mul(x = var_418_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
            tensor<int32, [2]> var_422 = const()[name = tensor<string, []>("op_422"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_424 = const()[name = tensor<string, []>("op_424"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_426_pad_type_0 = const()[name = tensor<string, []>("op_426_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_426_pad_0 = const()[name = tensor<string, []>("op_426_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 11008, 1, 64]> var_426_cast_fp16 = conv(dilations = var_424, groups = var_242, pad = var_426_pad_0, pad_type = var_426_pad_type_0, strides = var_422, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_426_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 1]> blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor<fp16, [1, 11008, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202551488)))];
            tensor<fp16, [1, 11008, 1, 64]> x_fc_2_cast_fp16 = mul(x = var_426_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor<string, []>("x_fc_2_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 64]> var_428_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor<string, []>("op_428_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 64]> input_cast_fp16 = mul(x = var_428_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
            tensor<int32, [2]> var_432 = const()[name = tensor<string, []>("op_432"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_434 = const()[name = tensor<string, []>("op_434"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_436_pad_type_0 = const()[name = tensor<string, []>("op_436_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_436_pad_0 = const()[name = tensor<string, []>("op_436_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_436_cast_fp16 = conv(dilations = var_434, groups = var_242, pad = var_436_pad_0, pad_type = var_436_pad_type_0, strides = var_432, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_436_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202573568)))];
            tensor<fp16, [1, 4096, 1, 64]> var_437_cast_fp16 = mul(x = var_436_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor<string, []>("op_437_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> new_x = add(x = var_437_cast_fp16, y = x_25_cast_fp16)[name = tensor<string, []>("op_438_cast_fp16")];
        } -> (new_x, new_k_cache_0, new_k_cache_1, new_v_cache_0, new_v_cache_1);
}