File size: 81,096 Bytes

a76a14d

program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})]
{
    func main<ios16>(tensor<fp16, [128, 64]> cos, tensor<fp16, [1, 32, 128, 448]> k_cache_0, tensor<fp16, [1, 32, 128, 448]> k_cache_1, tensor<fp16, [1, 32, 128, 448]> k_cache_2, tensor<fp16, [1, 1, 64, 512]> mask, tensor<fp16, [128, 64]> sin, tensor<fp16, [1, 32, 128, 448]> v_cache_0, tensor<fp16, [1, 32, 128, 448]> v_cache_1, tensor<fp16, [1, 32, 128, 448]> v_cache_2, tensor<fp16, [1, 4096, 1, 64]> x) [CoreML_InputDefaultValues = dict<tensor<string, []>, tensor<fp32, []>>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] {
            tensor<fp16, [4096, 4096, 1, 1]> blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8388736))), name = tensor<string, []>("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8388864))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16777536))), name = tensor<string, []>("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16777664))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25166336))), name = tensor<string, []>("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25166464))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33555136))), name = tensor<string, []>("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [11008, 4096, 1, 1]> blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33555264))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56099712))), name = tensor<string, []>("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([11008, 4096, 1, 1])];
            tensor<fp16, [11008, 4096, 1, 1]> blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56099840))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78644288))), name = tensor<string, []>("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([11008, 4096, 1, 1])];
            tensor<fp16, [4096, 11008, 1, 1]> blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78644416))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101188864))), name = tensor<string, []>("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 11008, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101188992))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(109577664))), name = tensor<string, []>("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(109577792))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(117966464))), name = tensor<string, []>("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(117966592))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126355264))), name = tensor<string, []>("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126355392))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134744064))), name = tensor<string, []>("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [11008, 4096, 1, 1]> blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134744192))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157288640))), name = tensor<string, []>("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([11008, 4096, 1, 1])];
            tensor<fp16, [11008, 4096, 1, 1]> blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157288768))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179833216))), name = tensor<string, []>("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([11008, 4096, 1, 1])];
            tensor<fp16, [4096, 11008, 1, 1]> blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179833344))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202377792))), name = tensor<string, []>("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 11008, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202377920))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210766592))), name = tensor<string, []>("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210766720))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219155392))), name = tensor<string, []>("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219155520))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(227544192))), name = tensor<string, []>("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [4096, 4096, 1, 1]> blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [8388608]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(227544320))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235932992))), name = tensor<string, []>("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 4096, 1, 1])];
            tensor<fp16, [11008, 4096, 1, 1]> blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235933120))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258477568))), name = tensor<string, []>("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([11008, 4096, 1, 1])];
            tensor<fp16, [11008, 4096, 1, 1]> blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258477696))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(281022144))), name = tensor<string, []>("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([11008, 4096, 1, 1])];
            tensor<fp16, [4096, 11008, 1, 1]> blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor<uint8, [22544384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(281022272))), lut = tensor<fp16, [16]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303566720))), name = tensor<string, []>("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor<uint32, [4]>([4096, 11008, 1, 1])];
            tensor<int32, []> var_18 = const()[name = tensor<string, []>("op_18"), val = tensor<int32, []>(3)];
            tensor<int32, []> var_23 = const()[name = tensor<string, []>("op_23"), val = tensor<int32, []>(-2)];
            tensor<int32, []> var_25 = const()[name = tensor<string, []>("op_25"), val = tensor<int32, []>(-1)];
            tensor<int32, []> var_32 = const()[name = tensor<string, []>("op_32"), val = tensor<int32, []>(1)];
            tensor<bool, []> var_33 = const()[name = tensor<string, []>("op_33"), val = tensor<bool, []>(true)];
            tensor<fp16, [1, 4096, 1, 64]> var_41_cast_fp16 = mul(x = x, y = x)[name = tensor<string, []>("op_41_cast_fp16")];
            tensor<int32, [1]> var_42 = const()[name = tensor<string, []>("op_42"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 1, 64]> norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor<string, []>("norm_x_1_cast_fp16")];
            tensor<fp16, []> var_44_to_fp16 = const()[name = tensor<string, []>("op_44_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 1, 64]> var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor<string, []>("op_45_cast_fp16")];
            tensor<fp16, []> var_46_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_46_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
            tensor<fp16, [1, 1, 1, 64]> var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor<string, []>("op_46_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor<string, []>("x_normed_1_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_norm_1_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_norm_1_weight_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303566848)))];
            tensor<fp16, [1, 4096, 1, 64]> x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor<string, []>("x_5_cast_fp16")];
            tensor<int32, [2]> var_58 = const()[name = tensor<string, []>("op_58"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_60 = const()[name = tensor<string, []>("op_60"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_62_pad_type_0 = const()[name = tensor<string, []>("op_62_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_62_pad_0 = const()[name = tensor<string, []>("op_62_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303575104)))];
            tensor<fp16, [1, 4096, 1, 64]> q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor<string, []>("q_1_cast_fp16")];
            tensor<int32, [2]> var_66 = const()[name = tensor<string, []>("op_66"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_68 = const()[name = tensor<string, []>("op_68"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_70_pad_type_0 = const()[name = tensor<string, []>("op_70_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_70_pad_0 = const()[name = tensor<string, []>("op_70_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor<string, []>("op_70_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303583360)))];
            tensor<fp16, [1, 4096, 1, 64]> k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
            tensor<int32, [2]> var_74 = const()[name = tensor<string, []>("op_74"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_76 = const()[name = tensor<string, []>("op_76"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_78_pad_type_0 = const()[name = tensor<string, []>("op_78_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_78_pad_0 = const()[name = tensor<string, []>("op_78_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor<string, []>("op_78_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303591616)))];
            tensor<fp16, [1, 4096, 1, 64]> v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor<string, []>("v_1_cast_fp16")];
            tensor<int32, [4]> var_80 = const()[name = tensor<string, []>("op_80"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor<string, []>("q_3_cast_fp16")];
            tensor<int32, [4]> var_82 = const()[name = tensor<string, []>("op_82"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
            tensor<int32, [4]> var_84 = const()[name = tensor<string, []>("op_84"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor<string, []>("v_3_cast_fp16")];
            tensor<int32, [4]> var_96_begin_0 = const()[name = tensor<string, []>("op_96_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_96_end_0 = const()[name = tensor<string, []>("op_96_end_0"), val = tensor<int32, [4]>([1, 32, 64, 64])];
            tensor<bool, [4]> var_96_end_mask_0 = const()[name = tensor<string, []>("op_96_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 32, 64, 64]> var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor<string, []>("op_96_cast_fp16")];
            tensor<int32, [4]> var_102_begin_0 = const()[name = tensor<string, []>("op_102_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_102_end_0 = const()[name = tensor<string, []>("op_102_end_0"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<bool, [4]> var_102_end_mask_0 = const()[name = tensor<string, []>("op_102_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 64, 64]> var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor<string, []>("op_102_cast_fp16")];
            tensor<fp16, []> const_3_promoted_to_fp16 = const()[name = tensor<string, []>("const_3_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 32, 64, 64]> var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor<string, []>("op_104_cast_fp16")];
            tensor<bool, []> rotated_1_interleave_0 = const()[name = tensor<string, []>("rotated_1_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor<string, []>("rotated_1_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor<string, []>("op_107_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor<string, []>("op_108_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor<string, []>("roped_1_cast_fp16")];
            tensor<int32, [4]> var_121_begin_0 = const()[name = tensor<string, []>("op_121_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_121_end_0 = const()[name = tensor<string, []>("op_121_end_0"), val = tensor<int32, [4]>([1, 32, 64, 64])];
            tensor<bool, [4]> var_121_end_mask_0 = const()[name = tensor<string, []>("op_121_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 32, 64, 64]> var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_121_cast_fp16")];
            tensor<int32, [4]> var_127_begin_0 = const()[name = tensor<string, []>("op_127_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_127_end_0 = const()[name = tensor<string, []>("op_127_end_0"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<bool, [4]> var_127_end_mask_0 = const()[name = tensor<string, []>("op_127_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 64, 64]> var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_127_cast_fp16")];
            tensor<fp16, []> const_5_promoted_to_fp16 = const()[name = tensor<string, []>("const_5_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 32, 64, 64]> var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor<string, []>("op_129_cast_fp16")];
            tensor<bool, []> rotated_3_interleave_0 = const()[name = tensor<string, []>("rotated_3_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor<string, []>("rotated_3_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor<string, []>("op_132_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor<string, []>("op_133_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor<string, []>("roped_3_cast_fp16")];
            tensor<bool, []> q_5_interleave_0 = const()[name = tensor<string, []>("q_5_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor<string, []>("q_5_cast_fp16")];
            tensor<bool, []> k_5_interleave_0 = const()[name = tensor<string, []>("k_5_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
            tensor<bool, []> k_7_interleave_0 = const()[name = tensor<string, []>("k_7_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 512]> k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor<string, []>("k_7_cast_fp16")];
            tensor<bool, []> v_5_interleave_0 = const()[name = tensor<string, []>("v_5_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 512]> v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor<string, []>("v_5_cast_fp16")];
            tensor<fp16, []> var_155_to_fp16 = const()[name = tensor<string, []>("op_155_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 32, 128, 64]> var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor<string, []>("op_156_cast_fp16")];
            tensor<bool, []> attn_weights_1_transpose_x_0 = const()[name = tensor<string, []>("attn_weights_1_transpose_x_0"), val = tensor<bool, []>(true)];
            tensor<bool, []> attn_weights_1_transpose_y_0 = const()[name = tensor<string, []>("attn_weights_1_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 64, 512]> attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor<string, []>("attn_weights_1_cast_fp16")];
            tensor<fp16, [1, 32, 64, 512]> attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor<string, []>("attn_weights_3_cast_fp16")];
            tensor<fp16, [1, 32, 64, 512]> var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor<string, []>("op_164_cast_fp16")];
            tensor<bool, []> attn_1_transpose_x_0 = const()[name = tensor<string, []>("attn_1_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> attn_1_transpose_y_0 = const()[name = tensor<string, []>("attn_1_transpose_y_0"), val = tensor<bool, []>(true)];
            tensor<fp16, [1, 32, 128, 64]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor<string, []>("attn_1_cast_fp16")];
            tensor<int32, [4]> var_168 = const()[name = tensor<string, []>("op_168"), val = tensor<int32, [4]>([1, 4096, 1, -1])];
            tensor<fp16, [1, 4096, 1, 64]> input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
            tensor<int32, [2]> var_172 = const()[name = tensor<string, []>("op_172"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_174 = const()[name = tensor<string, []>("op_174"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_176_pad_type_0 = const()[name = tensor<string, []>("op_176_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_176_pad_0 = const()[name = tensor<string, []>("op_176_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_176_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303599872)))];
            tensor<fp16, [1, 4096, 1, 64]> attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor<string, []>("attention_output_1_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor<string, []>("x_11_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor<string, []>("op_185_cast_fp16")];
            tensor<int32, [1]> var_186 = const()[name = tensor<string, []>("op_186"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 1, 64]> norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor<string, []>("norm_x_3_cast_fp16")];
            tensor<fp16, []> var_188_to_fp16 = const()[name = tensor<string, []>("op_188_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 1, 64]> var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor<string, []>("op_189_cast_fp16")];
            tensor<fp16, []> var_190_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_190_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
            tensor<fp16, [1, 1, 1, 64]> var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor<string, []>("op_190_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor<string, []>("x_normed_5_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_norm_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_norm_2_weight_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303608128)))];
            tensor<fp16, [1, 4096, 1, 64]> input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
            tensor<int32, [2]> var_202 = const()[name = tensor<string, []>("op_202"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_204 = const()[name = tensor<string, []>("op_204"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_206_pad_type_0 = const()[name = tensor<string, []>("op_206_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_206_pad_0 = const()[name = tensor<string, []>("op_206_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 11008, 1, 64]> var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_206_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 1]> blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor<fp16, [1, 11008, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303616384)))];
            tensor<fp16, [1, 11008, 1, 64]> input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
            tensor<int32, [2]> var_210 = const()[name = tensor<string, []>("op_210"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_212 = const()[name = tensor<string, []>("op_212"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_214_pad_type_0 = const()[name = tensor<string, []>("op_214_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_214_pad_0 = const()[name = tensor<string, []>("op_214_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 11008, 1, 64]> var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_214_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 1]> blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor<fp16, [1, 11008, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303638464)))];
            tensor<fp16, [1, 11008, 1, 64]> x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor<string, []>("x_fc_2_1_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 64]> var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor<string, []>("op_216_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 64]> input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
            tensor<int32, [2]> var_220 = const()[name = tensor<string, []>("op_220"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_222 = const()[name = tensor<string, []>("op_222"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_224_pad_type_0 = const()[name = tensor<string, []>("op_224_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_224_pad_0 = const()[name = tensor<string, []>("op_224_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("op_224_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303660544)))];
            tensor<fp16, [1, 4096, 1, 64]> var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor<string, []>("op_225_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor<string, []>("x_15_cast_fp16")];
            tensor<int32, []> var_232 = const()[name = tensor<string, []>("op_232"), val = tensor<int32, []>(3)];
            tensor<int32, []> var_237 = const()[name = tensor<string, []>("op_237"), val = tensor<int32, []>(-2)];
            tensor<int32, []> var_239 = const()[name = tensor<string, []>("op_239"), val = tensor<int32, []>(-1)];
            tensor<int32, []> var_246 = const()[name = tensor<string, []>("op_246"), val = tensor<int32, []>(1)];
            tensor<bool, []> var_247 = const()[name = tensor<string, []>("op_247"), val = tensor<bool, []>(true)];
            tensor<fp16, [1, 4096, 1, 64]> var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor<string, []>("op_254_cast_fp16")];
            tensor<int32, [1]> var_255 = const()[name = tensor<string, []>("op_255"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 1, 64]> norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor<string, []>("norm_x_5_cast_fp16")];
            tensor<fp16, []> var_257_to_fp16 = const()[name = tensor<string, []>("op_257_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 1, 64]> var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor<string, []>("op_258_cast_fp16")];
            tensor<fp16, []> var_259_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_259_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
            tensor<fp16, [1, 1, 1, 64]> var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor<string, []>("op_259_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor<string, []>("x_normed_9_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_norm_1_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_norm_1_weight_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303668800)))];
            tensor<fp16, [1, 4096, 1, 64]> x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor<string, []>("x_19_cast_fp16")];
            tensor<int32, [2]> var_274 = const()[name = tensor<string, []>("op_274"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_276 = const()[name = tensor<string, []>("op_276"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_278_pad_type_0 = const()[name = tensor<string, []>("op_278_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_278_pad_0 = const()[name = tensor<string, []>("op_278_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor<string, []>("op_278_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303677056)))];
            tensor<fp16, [1, 4096, 1, 64]> q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor<string, []>("q_7_cast_fp16")];
            tensor<int32, [2]> var_282 = const()[name = tensor<string, []>("op_282"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_284 = const()[name = tensor<string, []>("op_284"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_286_pad_type_0 = const()[name = tensor<string, []>("op_286_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_286_pad_0 = const()[name = tensor<string, []>("op_286_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor<string, []>("op_286_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303685312)))];
            tensor<fp16, [1, 4096, 1, 64]> k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
            tensor<int32, [2]> var_290 = const()[name = tensor<string, []>("op_290"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_292 = const()[name = tensor<string, []>("op_292"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_294_pad_type_0 = const()[name = tensor<string, []>("op_294_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_294_pad_0 = const()[name = tensor<string, []>("op_294_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor<string, []>("op_294_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303693568)))];
            tensor<fp16, [1, 4096, 1, 64]> v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor<string, []>("v_7_cast_fp16")];
            tensor<int32, [4]> var_296 = const()[name = tensor<string, []>("op_296"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor<string, []>("q_9_cast_fp16")];
            tensor<int32, [4]> var_298 = const()[name = tensor<string, []>("op_298"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor<string, []>("k_11_cast_fp16")];
            tensor<int32, [4]> var_300 = const()[name = tensor<string, []>("op_300"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor<string, []>("v_9_cast_fp16")];
            tensor<int32, [4]> var_312_begin_0 = const()[name = tensor<string, []>("op_312_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_312_end_0 = const()[name = tensor<string, []>("op_312_end_0"), val = tensor<int32, [4]>([1, 32, 64, 64])];
            tensor<bool, [4]> var_312_end_mask_0 = const()[name = tensor<string, []>("op_312_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 32, 64, 64]> var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor<string, []>("op_312_cast_fp16")];
            tensor<int32, [4]> var_318_begin_0 = const()[name = tensor<string, []>("op_318_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_318_end_0 = const()[name = tensor<string, []>("op_318_end_0"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<bool, [4]> var_318_end_mask_0 = const()[name = tensor<string, []>("op_318_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 64, 64]> var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor<string, []>("op_318_cast_fp16")];
            tensor<fp16, []> const_10_promoted_to_fp16 = const()[name = tensor<string, []>("const_10_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 32, 64, 64]> var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor<string, []>("op_320_cast_fp16")];
            tensor<bool, []> rotated_5_interleave_0 = const()[name = tensor<string, []>("rotated_5_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor<string, []>("rotated_5_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor<string, []>("op_323_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor<string, []>("op_324_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor<string, []>("roped_5_cast_fp16")];
            tensor<int32, [4]> var_337_begin_0 = const()[name = tensor<string, []>("op_337_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_337_end_0 = const()[name = tensor<string, []>("op_337_end_0"), val = tensor<int32, [4]>([1, 32, 64, 64])];
            tensor<bool, [4]> var_337_end_mask_0 = const()[name = tensor<string, []>("op_337_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 32, 64, 64]> var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_337_cast_fp16")];
            tensor<int32, [4]> var_343_begin_0 = const()[name = tensor<string, []>("op_343_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_343_end_0 = const()[name = tensor<string, []>("op_343_end_0"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<bool, [4]> var_343_end_mask_0 = const()[name = tensor<string, []>("op_343_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 64, 64]> var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor<string, []>("op_343_cast_fp16")];
            tensor<fp16, []> const_12_promoted_to_fp16 = const()[name = tensor<string, []>("const_12_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 32, 64, 64]> var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor<string, []>("op_345_cast_fp16")];
            tensor<bool, []> rotated_7_interleave_0 = const()[name = tensor<string, []>("rotated_7_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor<string, []>("rotated_7_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor<string, []>("op_348_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor<string, []>("op_349_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor<string, []>("roped_7_cast_fp16")];
            tensor<bool, []> q_11_interleave_0 = const()[name = tensor<string, []>("q_11_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor<string, []>("q_11_cast_fp16")];
            tensor<bool, []> k_13_interleave_0 = const()[name = tensor<string, []>("k_13_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
            tensor<bool, []> k_15_interleave_0 = const()[name = tensor<string, []>("k_15_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 512]> k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor<string, []>("k_15_cast_fp16")];
            tensor<bool, []> v_11_interleave_0 = const()[name = tensor<string, []>("v_11_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 512]> v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor<string, []>("v_11_cast_fp16")];
            tensor<fp16, []> var_371_to_fp16 = const()[name = tensor<string, []>("op_371_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 32, 128, 64]> var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor<string, []>("op_372_cast_fp16")];
            tensor<bool, []> attn_weights_5_transpose_x_0 = const()[name = tensor<string, []>("attn_weights_5_transpose_x_0"), val = tensor<bool, []>(true)];
            tensor<bool, []> attn_weights_5_transpose_y_0 = const()[name = tensor<string, []>("attn_weights_5_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 64, 512]> attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor<string, []>("attn_weights_5_cast_fp16")];
            tensor<fp16, [1, 32, 64, 512]> attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor<string, []>("attn_weights_7_cast_fp16")];
            tensor<fp16, [1, 32, 64, 512]> var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor<string, []>("op_380_cast_fp16")];
            tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
            tensor<fp16, [1, 32, 128, 64]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
            tensor<int32, [4]> var_384 = const()[name = tensor<string, []>("op_384"), val = tensor<int32, [4]>([1, 4096, 1, -1])];
            tensor<fp16, [1, 4096, 1, 64]> input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
            tensor<int32, [2]> var_388 = const()[name = tensor<string, []>("op_388"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_390 = const()[name = tensor<string, []>("op_390"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_392_pad_type_0 = const()[name = tensor<string, []>("op_392_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_392_pad_0 = const()[name = tensor<string, []>("op_392_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("op_392_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303701824)))];
            tensor<fp16, [1, 4096, 1, 64]> attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor<string, []>("attention_output_3_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor<string, []>("x_25_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor<string, []>("op_401_cast_fp16")];
            tensor<int32, [1]> var_402 = const()[name = tensor<string, []>("op_402"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 1, 64]> norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor<string, []>("norm_x_7_cast_fp16")];
            tensor<fp16, []> var_404_to_fp16 = const()[name = tensor<string, []>("op_404_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 1, 64]> var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor<string, []>("op_405_cast_fp16")];
            tensor<fp16, []> var_406_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_406_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
            tensor<fp16, [1, 1, 1, 64]> var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor<string, []>("op_406_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor<string, []>("x_normed_13_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_norm_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_norm_2_weight_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303710080)))];
            tensor<fp16, [1, 4096, 1, 64]> input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
            tensor<int32, [2]> var_418 = const()[name = tensor<string, []>("op_418"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_420 = const()[name = tensor<string, []>("op_420"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_422_pad_type_0 = const()[name = tensor<string, []>("op_422_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_422_pad_0 = const()[name = tensor<string, []>("op_422_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 11008, 1, 64]> var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_422_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 1]> blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor<fp16, [1, 11008, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303718336)))];
            tensor<fp16, [1, 11008, 1, 64]> input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
            tensor<int32, [2]> var_426 = const()[name = tensor<string, []>("op_426"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_428 = const()[name = tensor<string, []>("op_428"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_430_pad_type_0 = const()[name = tensor<string, []>("op_430_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_430_pad_0 = const()[name = tensor<string, []>("op_430_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 11008, 1, 64]> var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_430_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 1]> blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor<fp16, [1, 11008, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303740416)))];
            tensor<fp16, [1, 11008, 1, 64]> x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor<string, []>("x_fc_2_3_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 64]> var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor<string, []>("op_432_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 64]> input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
            tensor<int32, [2]> var_436 = const()[name = tensor<string, []>("op_436"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_438 = const()[name = tensor<string, []>("op_438"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_440_pad_type_0 = const()[name = tensor<string, []>("op_440_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_440_pad_0 = const()[name = tensor<string, []>("op_440_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_440_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303762496)))];
            tensor<fp16, [1, 4096, 1, 64]> var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor<string, []>("op_441_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor<string, []>("x_29_cast_fp16")];
            tensor<int32, []> var_448 = const()[name = tensor<string, []>("op_448"), val = tensor<int32, []>(3)];
            tensor<int32, []> var_453 = const()[name = tensor<string, []>("op_453"), val = tensor<int32, []>(-2)];
            tensor<int32, []> var_455 = const()[name = tensor<string, []>("op_455"), val = tensor<int32, []>(-1)];
            tensor<int32, []> var_462 = const()[name = tensor<string, []>("op_462"), val = tensor<int32, []>(1)];
            tensor<bool, []> var_463 = const()[name = tensor<string, []>("op_463"), val = tensor<bool, []>(true)];
            tensor<fp16, [1, 4096, 1, 64]> var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor<string, []>("op_470_cast_fp16")];
            tensor<int32, [1]> var_471 = const()[name = tensor<string, []>("op_471"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 1, 64]> norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor<string, []>("norm_x_9_cast_fp16")];
            tensor<fp16, []> var_473_to_fp16 = const()[name = tensor<string, []>("op_473_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 1, 64]> var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor<string, []>("op_474_cast_fp16")];
            tensor<fp16, []> var_475_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_475_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
            tensor<fp16, [1, 1, 1, 64]> var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor<string, []>("op_475_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor<string, []>("x_normed_17_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_2_norm_1_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_norm_1_weight_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303770752)))];
            tensor<fp16, [1, 4096, 1, 64]> x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor<string, []>("x_33_cast_fp16")];
            tensor<int32, [2]> var_490 = const()[name = tensor<string, []>("op_490"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_492 = const()[name = tensor<string, []>("op_492"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_494_pad_type_0 = const()[name = tensor<string, []>("op_494_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_494_pad_0 = const()[name = tensor<string, []>("op_494_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor<string, []>("op_494_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303779008)))];
            tensor<fp16, [1, 4096, 1, 64]> q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor<string, []>("q_13_cast_fp16")];
            tensor<int32, [2]> var_498 = const()[name = tensor<string, []>("op_498"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_500 = const()[name = tensor<string, []>("op_500"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_502_pad_type_0 = const()[name = tensor<string, []>("op_502_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_502_pad_0 = const()[name = tensor<string, []>("op_502_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor<string, []>("op_502_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303787264)))];
            tensor<fp16, [1, 4096, 1, 64]> k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor<string, []>("k_17_cast_fp16")];
            tensor<int32, [2]> var_506 = const()[name = tensor<string, []>("op_506"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_508 = const()[name = tensor<string, []>("op_508"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_510_pad_type_0 = const()[name = tensor<string, []>("op_510_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_510_pad_0 = const()[name = tensor<string, []>("op_510_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor<string, []>("op_510_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303795520)))];
            tensor<fp16, [1, 4096, 1, 64]> v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor<string, []>("v_13_cast_fp16")];
            tensor<int32, [4]> var_512 = const()[name = tensor<string, []>("op_512"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor<string, []>("q_15_cast_fp16")];
            tensor<int32, [4]> var_514 = const()[name = tensor<string, []>("op_514"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor<string, []>("k_19_cast_fp16")];
            tensor<int32, [4]> var_516 = const()[name = tensor<string, []>("op_516"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<fp16, [1, 32, 128, 64]> new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor<string, []>("v_15_cast_fp16")];
            tensor<int32, [4]> var_528_begin_0 = const()[name = tensor<string, []>("op_528_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_528_end_0 = const()[name = tensor<string, []>("op_528_end_0"), val = tensor<int32, [4]>([1, 32, 64, 64])];
            tensor<bool, [4]> var_528_end_mask_0 = const()[name = tensor<string, []>("op_528_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 32, 64, 64]> var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor<string, []>("op_528_cast_fp16")];
            tensor<int32, [4]> var_534_begin_0 = const()[name = tensor<string, []>("op_534_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_534_end_0 = const()[name = tensor<string, []>("op_534_end_0"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<bool, [4]> var_534_end_mask_0 = const()[name = tensor<string, []>("op_534_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 64, 64]> var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor<string, []>("op_534_cast_fp16")];
            tensor<fp16, []> const_17_promoted_to_fp16 = const()[name = tensor<string, []>("const_17_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 32, 64, 64]> var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor<string, []>("op_536_cast_fp16")];
            tensor<bool, []> rotated_9_interleave_0 = const()[name = tensor<string, []>("rotated_9_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor<string, []>("rotated_9_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor<string, []>("op_539_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor<string, []>("op_540_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor<string, []>("roped_9_cast_fp16")];
            tensor<int32, [4]> var_553_begin_0 = const()[name = tensor<string, []>("op_553_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> var_553_end_0 = const()[name = tensor<string, []>("op_553_end_0"), val = tensor<int32, [4]>([1, 32, 64, 64])];
            tensor<bool, [4]> var_553_end_mask_0 = const()[name = tensor<string, []>("op_553_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
            tensor<fp16, [1, 32, 64, 64]> var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor<string, []>("op_553_cast_fp16")];
            tensor<int32, [4]> var_559_begin_0 = const()[name = tensor<string, []>("op_559_begin_0"), val = tensor<int32, [4]>([0, 0, 64, 0])];
            tensor<int32, [4]> var_559_end_0 = const()[name = tensor<string, []>("op_559_end_0"), val = tensor<int32, [4]>([1, 32, 128, 64])];
            tensor<bool, [4]> var_559_end_mask_0 = const()[name = tensor<string, []>("op_559_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 64, 64]> var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor<string, []>("op_559_cast_fp16")];
            tensor<fp16, []> const_19_promoted_to_fp16 = const()[name = tensor<string, []>("const_19_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
            tensor<fp16, [1, 32, 64, 64]> var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor<string, []>("op_561_cast_fp16")];
            tensor<bool, []> rotated_interleave_0 = const()[name = tensor<string, []>("rotated_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor<string, []>("rotated_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor<string, []>("op_564_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor<string, []>("op_565_cast_fp16")];
            tensor<fp16, [1, 32, 128, 64]> roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor<string, []>("roped_cast_fp16")];
            tensor<bool, []> q_interleave_0 = const()[name = tensor<string, []>("q_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor<string, []>("q_cast_fp16")];
            tensor<bool, []> k_21_interleave_0 = const()[name = tensor<string, []>("k_21_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 64]> new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor<string, []>("k_21_cast_fp16")];
            tensor<bool, []> k_interleave_0 = const()[name = tensor<string, []>("k_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 512]> k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor<string, []>("k_cast_fp16")];
            tensor<bool, []> v_interleave_0 = const()[name = tensor<string, []>("v_interleave_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 128, 512]> v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor<string, []>("v_cast_fp16")];
            tensor<fp16, []> var_587_to_fp16 = const()[name = tensor<string, []>("op_587_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 32, 128, 64]> var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor<string, []>("op_588_cast_fp16")];
            tensor<bool, []> attn_weights_9_transpose_x_0 = const()[name = tensor<string, []>("attn_weights_9_transpose_x_0"), val = tensor<bool, []>(true)];
            tensor<bool, []> attn_weights_9_transpose_y_0 = const()[name = tensor<string, []>("attn_weights_9_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 32, 64, 512]> attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor<string, []>("attn_weights_9_cast_fp16")];
            tensor<fp16, [1, 32, 64, 512]> attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor<string, []>("attn_weights_cast_fp16")];
            tensor<fp16, [1, 32, 64, 512]> var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor<string, []>("op_596_cast_fp16")];
            tensor<bool, []> attn_5_transpose_x_0 = const()[name = tensor<string, []>("attn_5_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> attn_5_transpose_y_0 = const()[name = tensor<string, []>("attn_5_transpose_y_0"), val = tensor<bool, []>(true)];
            tensor<fp16, [1, 32, 128, 64]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor<string, []>("attn_5_cast_fp16")];
            tensor<int32, [4]> var_600 = const()[name = tensor<string, []>("op_600"), val = tensor<int32, [4]>([1, 4096, 1, -1])];
            tensor<fp16, [1, 4096, 1, 64]> input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
            tensor<int32, [2]> var_604 = const()[name = tensor<string, []>("op_604"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_606 = const()[name = tensor<string, []>("op_606"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_608_pad_type_0 = const()[name = tensor<string, []>("op_608_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_608_pad_0 = const()[name = tensor<string, []>("op_608_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("op_608_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303803776)))];
            tensor<fp16, [1, 4096, 1, 64]> attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor<string, []>("attention_output_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor<string, []>("x_39_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor<string, []>("op_617_cast_fp16")];
            tensor<int32, [1]> var_618 = const()[name = tensor<string, []>("op_618"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 1, 64]> norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor<string, []>("norm_x_cast_fp16")];
            tensor<fp16, []> var_620_to_fp16 = const()[name = tensor<string, []>("op_620_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 1, 64]> var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor<string, []>("op_621_cast_fp16")];
            tensor<fp16, []> var_622_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_622_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
            tensor<fp16, [1, 1, 1, 64]> var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor<string, []>("op_622_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor<string, []>("x_normed_21_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_2_norm_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_norm_2_weight_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303812032)))];
            tensor<fp16, [1, 4096, 1, 64]> input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
            tensor<int32, [2]> var_634 = const()[name = tensor<string, []>("op_634"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_636 = const()[name = tensor<string, []>("op_636"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_638_pad_type_0 = const()[name = tensor<string, []>("op_638_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_638_pad_0 = const()[name = tensor<string, []>("op_638_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 11008, 1, 64]> var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("op_638_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 1]> blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor<fp16, [1, 11008, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303820288)))];
            tensor<fp16, [1, 11008, 1, 64]> input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
            tensor<int32, [2]> var_642 = const()[name = tensor<string, []>("op_642"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_644 = const()[name = tensor<string, []>("op_644"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_646_pad_type_0 = const()[name = tensor<string, []>("op_646_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_646_pad_0 = const()[name = tensor<string, []>("op_646_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 11008, 1, 64]> var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("op_646_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 1]> blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor<fp16, [1, 11008, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303842368)))];
            tensor<fp16, [1, 11008, 1, 64]> x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor<string, []>("x_fc_2_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 64]> var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor<string, []>("op_648_cast_fp16")];
            tensor<fp16, [1, 11008, 1, 64]> input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
            tensor<int32, [2]> var_652 = const()[name = tensor<string, []>("op_652"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [2]> var_654 = const()[name = tensor<string, []>("op_654"), val = tensor<int32, [2]>([1, 1])];
            tensor<string, []> var_656_pad_type_0 = const()[name = tensor<string, []>("op_656_pad_type_0"), val = tensor<string, []>("custom")];
            tensor<int32, [4]> var_656_pad_0 = const()[name = tensor<string, []>("op_656_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<fp16, [1, 4096, 1, 64]> var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_656_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 1]> blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor<fp16, [1, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303864448)))];
            tensor<fp16, [1, 4096, 1, 64]> var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor<string, []>("op_657_cast_fp16")];
            tensor<fp16, [1, 4096, 1, 64]> new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor<string, []>("op_658_cast_fp16")];
        } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2);
}