program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3400.42.1"}, {"coremlc-version", "3400.51.1"}})] { func input_1_context_512(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303873792))))[name = string("blocks_0_attn_q_proj_weight_palettized_cast_fp16")]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8388864))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303873920))))[name = string("blocks_0_attn_k_proj_weight_palettized_cast_fp16")]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16777664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303874048))))[name = string("blocks_0_attn_v_proj_weight_palettized_cast_fp16")]; tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25166464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303874176))))[name = string("blocks_0_attn_proj_weight_palettized_cast_fp16")]; tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33555264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303874304))))[name = string("blocks_0_mlp_fc_1_weight_palettized_cast_fp16")]; tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56099840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303874432))))[name = string("blocks_0_mlp_fc_2_weight_palettized_cast_fp16")]; tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78644416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303874560))))[name = string("blocks_0_mlp_proj_weight_palettized_cast_fp16")]; tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101188992))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303874688))))[name = string("blocks_1_attn_q_proj_weight_palettized_cast_fp16")]; tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109577792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303874816))))[name = string("blocks_1_attn_k_proj_weight_palettized_cast_fp16")]; tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117966592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303874944))))[name = string("blocks_1_attn_v_proj_weight_palettized_cast_fp16")]; tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126355392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303875072))))[name = string("blocks_1_attn_proj_weight_palettized_cast_fp16")]; tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134744192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303875200))))[name = string("blocks_1_mlp_fc_1_weight_palettized_cast_fp16")]; tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157288768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303875328))))[name = string("blocks_1_mlp_fc_2_weight_palettized_cast_fp16")]; tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179833344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303875456))))[name = string("blocks_1_mlp_proj_weight_palettized_cast_fp16")]; tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202377920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303875584))))[name = string("blocks_2_attn_q_proj_weight_palettized_cast_fp16")]; tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210766720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303875712))))[name = string("blocks_2_attn_k_proj_weight_palettized_cast_fp16")]; tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219155520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303875840))))[name = string("blocks_2_attn_v_proj_weight_palettized_cast_fp16")]; tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227544320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303875968))))[name = string("blocks_2_attn_proj_weight_palettized_cast_fp16")]; tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235933120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303876096))))[name = string("blocks_2_mlp_fc_1_weight_palettized_cast_fp16")]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258477696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303876224))))[name = string("blocks_2_mlp_fc_2_weight_palettized_cast_fp16")]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281022272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303876352))))[name = string("blocks_2_mlp_proj_weight_palettized_cast_fp16")]; int32 var_22 = const()[name = string("op_22"), val = int32(-1)]; int32 var_30 = const()[name = string("op_30"), val = int32(3)]; int32 var_31 = const()[name = string("op_31"), val = int32(1)]; int32 var_34 = const()[name = string("op_34"), val = int32(-2)]; bool var_35 = const()[name = string("op_35"), val = bool(true)]; tensor var_53_axes_0 = const()[name = string("op_53_axes_0"), val = tensor([-2])]; tensor var_53_cast_fp16 = squeeze(axes = var_53_axes_0, x = x)[name = string("op_53_cast_fp16")]; bool var_55_interleave_0 = const()[name = string("op_55_interleave_0"), val = bool(false)]; tensor eps_chan_1_to_fp16 = const()[name = string("eps_chan_1_to_fp16"), val = tensor([[[0x1.9e8p-3, 0x1.9e8p-3, 0x1.9e8p-3, 0x1.9e8p-3]]])]; tensor var_55_cast_fp16 = concat(axis = var_31, interleave = var_55_interleave_0, values = (var_53_cast_fp16, eps_chan_1_to_fp16))[name = string("op_55_cast_fp16")]; tensor x_eps_1_axes_0 = const()[name = string("x_eps_1_axes_0"), val = tensor([-2])]; tensor x_eps_1_cast_fp16 = expand_dims(axes = x_eps_1_axes_0, x = var_55_cast_fp16)[name = string("x_eps_1_cast_fp16")]; tensor norm_x_1_axes_0 = const()[name = string("norm_x_1_axes_0"), val = tensor([1])]; tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_35, x = x_eps_1_cast_fp16)[name = string("norm_x_1_cast_fp16")]; tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = string("x_normed_1_cast_fp16")]; fp16 var_60_to_fp16 = const()[name = string("op_60_to_fp16"), val = fp16(0x1p+6)]; tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_60_to_fp16)[name = string("x_normed_3_cast_fp16")]; tensor blocks_0_norm_1_weight_to_fp16 = const()[name = string("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303567936)))]; tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = string("x_5_cast_fp16")]; tensor var_73 = const()[name = string("op_73"), val = tensor([1, 1])]; tensor var_75 = const()[name = string("op_75"), val = tensor([1, 1])]; string var_77_pad_type_0 = const()[name = string("op_77_pad_type_0"), val = string("custom")]; tensor var_77_pad_0 = const()[name = string("op_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_77_cast_fp16 = conv(dilations = var_75, groups = var_31, pad = var_77_pad_0, pad_type = var_77_pad_type_0, strides = var_73, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = string("op_77_cast_fp16")]; tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = string("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303576192)))]; tensor q_1_cast_fp16 = mul(x = var_77_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = string("q_1_cast_fp16")]; tensor var_81 = const()[name = string("op_81"), val = tensor([1, 1])]; tensor var_83 = const()[name = string("op_83"), val = tensor([1, 1])]; string var_85_pad_type_0 = const()[name = string("op_85_pad_type_0"), val = string("custom")]; tensor var_85_pad_0 = const()[name = string("op_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_85_cast_fp16 = conv(dilations = var_83, groups = var_31, pad = var_85_pad_0, pad_type = var_85_pad_type_0, strides = var_81, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = string("op_85_cast_fp16")]; tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = string("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303584448)))]; tensor k_1_cast_fp16 = mul(x = var_85_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = string("k_1_cast_fp16")]; tensor var_89 = const()[name = string("op_89"), val = tensor([1, 1])]; tensor var_91 = const()[name = string("op_91"), val = tensor([1, 1])]; string var_93_pad_type_0 = const()[name = string("op_93_pad_type_0"), val = string("custom")]; tensor var_93_pad_0 = const()[name = string("op_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_93_cast_fp16 = conv(dilations = var_91, groups = var_31, pad = var_93_pad_0, pad_type = var_93_pad_type_0, strides = var_89, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = string("op_93_cast_fp16")]; tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = string("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303592704)))]; tensor v_1_cast_fp16 = mul(x = var_93_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = string("v_1_cast_fp16")]; tensor var_95 = const()[name = string("op_95"), val = tensor([1, 32, 128, 4])]; tensor q_3_cast_fp16 = reshape(shape = var_95, x = q_1_cast_fp16)[name = string("q_3_cast_fp16")]; tensor var_97 = const()[name = string("op_97"), val = tensor([1, 32, 128, 4])]; tensor k_3_cast_fp16 = reshape(shape = var_97, x = k_1_cast_fp16)[name = string("k_3_cast_fp16")]; tensor var_99 = const()[name = string("op_99"), val = tensor([1, 32, 128, 4])]; tensor v_3_cast_fp16 = reshape(shape = var_99, x = v_1_cast_fp16)[name = string("v_3_cast_fp16")]; tensor var_111_begin_0 = const()[name = string("op_111_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_111_end_0 = const()[name = string("op_111_end_0"), val = tensor([1, 32, 64, 4])]; tensor var_111_end_mask_0 = const()[name = string("op_111_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_111_cast_fp16 = slice_by_index(begin = var_111_begin_0, end = var_111_end_0, end_mask = var_111_end_mask_0, x = q_3_cast_fp16)[name = string("op_111_cast_fp16")]; tensor var_117_begin_0 = const()[name = string("op_117_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_117_end_0 = const()[name = string("op_117_end_0"), val = tensor([1, 32, 128, 4])]; tensor var_117_end_mask_0 = const()[name = string("op_117_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_117_cast_fp16 = slice_by_index(begin = var_117_begin_0, end = var_117_end_0, end_mask = var_117_end_mask_0, x = q_3_cast_fp16)[name = string("op_117_cast_fp16")]; fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_119_cast_fp16 = mul(x = var_117_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_119_cast_fp16")]; bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; tensor rotated_1_cast_fp16 = concat(axis = var_34, interleave = rotated_1_interleave_0, values = (var_119_cast_fp16, var_111_cast_fp16))[name = string("rotated_1_cast_fp16")]; tensor var_122_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = string("op_122_cast_fp16")]; tensor var_123_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = string("op_123_cast_fp16")]; tensor roped_1_cast_fp16 = add(x = var_122_cast_fp16, y = var_123_cast_fp16)[name = string("roped_1_cast_fp16")]; tensor var_136_begin_0 = const()[name = string("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_136_end_0 = const()[name = string("op_136_end_0"), val = tensor([1, 32, 64, 4])]; tensor var_136_end_mask_0 = const()[name = string("op_136_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = k_3_cast_fp16)[name = string("op_136_cast_fp16")]; tensor var_142_begin_0 = const()[name = string("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_142_end_0 = const()[name = string("op_142_end_0"), val = tensor([1, 32, 128, 4])]; tensor var_142_end_mask_0 = const()[name = string("op_142_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = k_3_cast_fp16)[name = string("op_142_cast_fp16")]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_144_cast_fp16")]; bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; tensor rotated_3_cast_fp16 = concat(axis = var_34, interleave = rotated_3_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = string("rotated_3_cast_fp16")]; tensor var_147_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = string("op_147_cast_fp16")]; tensor var_148_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = string("op_148_cast_fp16")]; tensor roped_3_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = string("roped_3_cast_fp16")]; tensor v_5_perm_0 = const()[name = string("v_5_perm_0"), val = tensor([0, 1, -1, -2])]; bool k_7_interleave_0 = const()[name = string("k_7_interleave_0"), val = bool(false)]; tensor k_7_cast_fp16 = concat(axis = var_22, interleave = k_7_interleave_0, values = (k_cache_0, roped_3_cast_fp16))[name = string("k_7_cast_fp16")]; bool v_7_interleave_0 = const()[name = string("v_7_interleave_0"), val = bool(false)]; tensor v_5_cast_fp16 = transpose(perm = v_5_perm_0, x = v_3_cast_fp16)[name = string("transpose_5")]; tensor v_7_cast_fp16 = concat(axis = var_34, interleave = v_7_interleave_0, values = (v_cache_0, v_5_cast_fp16))[name = string("v_7_cast_fp16")]; tensor var_159_begin_0 = const()[name = string("op_159_begin_0"), val = tensor([0, 0, 0, 1])]; tensor var_159_end_0 = const()[name = string("op_159_end_0"), val = tensor([1, 32, 128, 509])]; tensor var_159_end_mask_0 = const()[name = string("op_159_end_mask_0"), val = tensor([true, true, true, false])]; tensor new_k_cache_0 = slice_by_index(begin = var_159_begin_0, end = var_159_end_0, end_mask = var_159_end_mask_0, x = k_7_cast_fp16)[name = string("op_159_cast_fp16")]; tensor var_160_begin_0 = const()[name = string("op_160_begin_0"), val = tensor([0, 0, 1, 0])]; tensor var_160_end_0 = const()[name = string("op_160_end_0"), val = tensor([1, 32, 509, 128])]; tensor var_160_end_mask_0 = const()[name = string("op_160_end_mask_0"), val = tensor([true, true, false, true])]; tensor new_v_cache_0 = slice_by_index(begin = var_160_begin_0, end = var_160_end_0, end_mask = var_160_end_mask_0, x = v_7_cast_fp16)[name = string("op_160_cast_fp16")]; fp16 var_165_to_fp16 = const()[name = string("op_165_to_fp16"), val = fp16(0x1.6ap-4)]; tensor var_166_cast_fp16 = mul(x = roped_1_cast_fp16, y = var_165_to_fp16)[name = string("op_166_cast_fp16")]; bool attn_weights_1_transpose_x_0 = const()[name = string("attn_weights_1_transpose_x_0"), val = bool(true)]; bool attn_weights_1_transpose_y_0 = const()[name = string("attn_weights_1_transpose_y_0"), val = bool(false)]; tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_166_cast_fp16, y = k_7_cast_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = string("attn_weights_3_cast_fp16")]; tensor attn_weights_5_cast_fp16 = softmax(axis = var_30, x = attn_weights_3_cast_fp16)[name = string("attn_weights_5_cast_fp16")]; bool var_175_transpose_x_0 = const()[name = string("op_175_transpose_x_0"), val = bool(false)]; bool var_175_transpose_y_0 = const()[name = string("op_175_transpose_y_0"), val = bool(false)]; tensor var_175_cast_fp16 = matmul(transpose_x = var_175_transpose_x_0, transpose_y = var_175_transpose_y_0, x = attn_weights_5_cast_fp16, y = v_7_cast_fp16)[name = string("op_175_cast_fp16")]; tensor attn_1_perm_0 = const()[name = string("attn_1_perm_0"), val = tensor([0, 1, -1, -2])]; tensor var_178 = const()[name = string("op_178"), val = tensor([1, 4096, 1, -1])]; tensor attn_1_cast_fp16 = transpose(perm = attn_1_perm_0, x = var_175_cast_fp16)[name = string("transpose_4")]; tensor input_1_cast_fp16 = reshape(shape = var_178, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_182 = const()[name = string("op_182"), val = tensor([1, 1])]; tensor var_184 = const()[name = string("op_184"), val = tensor([1, 1])]; string var_186_pad_type_0 = const()[name = string("op_186_pad_type_0"), val = string("custom")]; tensor var_186_pad_0 = const()[name = string("op_186_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_186_cast_fp16 = conv(dilations = var_184, groups = var_31, pad = var_186_pad_0, pad_type = var_186_pad_type_0, strides = var_182, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = string("op_186_cast_fp16")]; tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = string("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303600960)))]; tensor attention_output_1_cast_fp16 = mul(x = var_186_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = string("attention_output_1_cast_fp16")]; tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = string("x_11_cast_fp16")]; tensor var_205_axes_0 = const()[name = string("op_205_axes_0"), val = tensor([-2])]; tensor var_205_cast_fp16 = squeeze(axes = var_205_axes_0, x = x_11_cast_fp16)[name = string("op_205_cast_fp16")]; bool var_207_interleave_0 = const()[name = string("op_207_interleave_0"), val = bool(false)]; tensor eps_chan_3_to_fp16 = const()[name = string("eps_chan_3_to_fp16"), val = tensor([[[0x1.9e8p-3, 0x1.9e8p-3, 0x1.9e8p-3, 0x1.9e8p-3]]])]; tensor var_207_cast_fp16 = concat(axis = var_31, interleave = var_207_interleave_0, values = (var_205_cast_fp16, eps_chan_3_to_fp16))[name = string("op_207_cast_fp16")]; tensor x_eps_3_axes_0 = const()[name = string("x_eps_3_axes_0"), val = tensor([-2])]; tensor x_eps_3_cast_fp16 = expand_dims(axes = x_eps_3_axes_0, x = var_207_cast_fp16)[name = string("x_eps_3_cast_fp16")]; tensor norm_x_3_axes_0 = const()[name = string("norm_x_3_axes_0"), val = tensor([1])]; tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_35, x = x_eps_3_cast_fp16)[name = string("norm_x_3_cast_fp16")]; tensor x_normed_7_cast_fp16 = real_div(x = x_11_cast_fp16, y = norm_x_3_cast_fp16)[name = string("x_normed_7_cast_fp16")]; fp16 var_212_to_fp16 = const()[name = string("op_212_to_fp16"), val = fp16(0x1p+6)]; tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_212_to_fp16)[name = string("x_normed_9_cast_fp16")]; tensor blocks_0_norm_2_weight_to_fp16 = const()[name = string("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303609216)))]; tensor input_3_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = string("input_3_cast_fp16")]; tensor var_224 = const()[name = string("op_224"), val = tensor([1, 1])]; tensor var_226 = const()[name = string("op_226"), val = tensor([1, 1])]; string var_228_pad_type_0 = const()[name = string("op_228_pad_type_0"), val = string("custom")]; tensor var_228_pad_0 = const()[name = string("op_228_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_228_cast_fp16 = conv(dilations = var_226, groups = var_31, pad = var_228_pad_0, pad_type = var_228_pad_type_0, strides = var_224, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = string("op_228_cast_fp16")]; tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = string("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303617472)))]; tensor input_5_cast_fp16 = mul(x = var_228_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = string("input_5_cast_fp16")]; tensor var_232 = const()[name = string("op_232"), val = tensor([1, 1])]; tensor var_234 = const()[name = string("op_234"), val = tensor([1, 1])]; string var_236_pad_type_0 = const()[name = string("op_236_pad_type_0"), val = string("custom")]; tensor var_236_pad_0 = const()[name = string("op_236_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_236_cast_fp16 = conv(dilations = var_234, groups = var_31, pad = var_236_pad_0, pad_type = var_236_pad_type_0, strides = var_232, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = string("op_236_cast_fp16")]; tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = string("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303639552)))]; tensor x_fc_2_1_cast_fp16 = mul(x = var_236_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = string("x_fc_2_1_cast_fp16")]; tensor var_238_cast_fp16 = silu(x = input_5_cast_fp16)[name = string("op_238_cast_fp16")]; tensor input_7_cast_fp16 = mul(x = var_238_cast_fp16, y = x_fc_2_1_cast_fp16)[name = string("input_7_cast_fp16")]; tensor var_242 = const()[name = string("op_242"), val = tensor([1, 1])]; tensor var_244 = const()[name = string("op_244"), val = tensor([1, 1])]; string var_246_pad_type_0 = const()[name = string("op_246_pad_type_0"), val = string("custom")]; tensor var_246_pad_0 = const()[name = string("op_246_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_246_cast_fp16 = conv(dilations = var_244, groups = var_31, pad = var_246_pad_0, pad_type = var_246_pad_type_0, strides = var_242, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = string("op_246_cast_fp16")]; tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = string("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303661632)))]; tensor var_247_cast_fp16 = mul(x = var_246_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = string("op_247_cast_fp16")]; tensor x_15_cast_fp16 = add(x = var_247_cast_fp16, y = x_11_cast_fp16)[name = string("x_15_cast_fp16")]; int32 var_258 = const()[name = string("op_258"), val = int32(-1)]; int32 var_266 = const()[name = string("op_266"), val = int32(3)]; int32 var_267 = const()[name = string("op_267"), val = int32(1)]; int32 var_270 = const()[name = string("op_270"), val = int32(-2)]; bool var_271 = const()[name = string("op_271"), val = bool(true)]; tensor var_288_axes_0 = const()[name = string("op_288_axes_0"), val = tensor([-2])]; tensor var_288_cast_fp16 = squeeze(axes = var_288_axes_0, x = x_15_cast_fp16)[name = string("op_288_cast_fp16")]; bool var_290_interleave_0 = const()[name = string("op_290_interleave_0"), val = bool(false)]; tensor eps_chan_5_to_fp16 = const()[name = string("eps_chan_5_to_fp16"), val = tensor([[[0x1.9e8p-3, 0x1.9e8p-3, 0x1.9e8p-3, 0x1.9e8p-3]]])]; tensor var_290_cast_fp16 = concat(axis = var_267, interleave = var_290_interleave_0, values = (var_288_cast_fp16, eps_chan_5_to_fp16))[name = string("op_290_cast_fp16")]; tensor x_eps_5_axes_0 = const()[name = string("x_eps_5_axes_0"), val = tensor([-2])]; tensor x_eps_5_cast_fp16 = expand_dims(axes = x_eps_5_axes_0, x = var_290_cast_fp16)[name = string("x_eps_5_cast_fp16")]; tensor norm_x_5_axes_0 = const()[name = string("norm_x_5_axes_0"), val = tensor([1])]; tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_271, x = x_eps_5_cast_fp16)[name = string("norm_x_5_cast_fp16")]; tensor x_normed_13_cast_fp16 = real_div(x = x_15_cast_fp16, y = norm_x_5_cast_fp16)[name = string("x_normed_13_cast_fp16")]; fp16 var_295_to_fp16 = const()[name = string("op_295_to_fp16"), val = fp16(0x1p+6)]; tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_295_to_fp16)[name = string("x_normed_15_cast_fp16")]; tensor blocks_1_norm_1_weight_to_fp16 = const()[name = string("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303669888)))]; tensor x_19_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = string("x_19_cast_fp16")]; tensor var_311 = const()[name = string("op_311"), val = tensor([1, 1])]; tensor var_313 = const()[name = string("op_313"), val = tensor([1, 1])]; string var_315_pad_type_0 = const()[name = string("op_315_pad_type_0"), val = string("custom")]; tensor var_315_pad_0 = const()[name = string("op_315_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_315_cast_fp16 = conv(dilations = var_313, groups = var_267, pad = var_315_pad_0, pad_type = var_315_pad_type_0, strides = var_311, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = string("op_315_cast_fp16")]; tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = string("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303678144)))]; tensor q_7_cast_fp16 = mul(x = var_315_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = string("q_7_cast_fp16")]; tensor var_319 = const()[name = string("op_319"), val = tensor([1, 1])]; tensor var_321 = const()[name = string("op_321"), val = tensor([1, 1])]; string var_323_pad_type_0 = const()[name = string("op_323_pad_type_0"), val = string("custom")]; tensor var_323_pad_0 = const()[name = string("op_323_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_323_cast_fp16 = conv(dilations = var_321, groups = var_267, pad = var_323_pad_0, pad_type = var_323_pad_type_0, strides = var_319, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = string("op_323_cast_fp16")]; tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = string("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303686400)))]; tensor k_9_cast_fp16 = mul(x = var_323_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = string("k_9_cast_fp16")]; tensor var_327 = const()[name = string("op_327"), val = tensor([1, 1])]; tensor var_329 = const()[name = string("op_329"), val = tensor([1, 1])]; string var_331_pad_type_0 = const()[name = string("op_331_pad_type_0"), val = string("custom")]; tensor var_331_pad_0 = const()[name = string("op_331_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_331_cast_fp16 = conv(dilations = var_329, groups = var_267, pad = var_331_pad_0, pad_type = var_331_pad_type_0, strides = var_327, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = string("op_331_cast_fp16")]; tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = string("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303694656)))]; tensor v_9_cast_fp16 = mul(x = var_331_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = string("v_9_cast_fp16")]; tensor var_333 = const()[name = string("op_333"), val = tensor([1, 32, 128, 4])]; tensor q_9_cast_fp16 = reshape(shape = var_333, x = q_7_cast_fp16)[name = string("q_9_cast_fp16")]; tensor var_335 = const()[name = string("op_335"), val = tensor([1, 32, 128, 4])]; tensor k_11_cast_fp16 = reshape(shape = var_335, x = k_9_cast_fp16)[name = string("k_11_cast_fp16")]; tensor var_337 = const()[name = string("op_337"), val = tensor([1, 32, 128, 4])]; tensor v_11_cast_fp16 = reshape(shape = var_337, x = v_9_cast_fp16)[name = string("v_11_cast_fp16")]; tensor var_349_begin_0 = const()[name = string("op_349_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_349_end_0 = const()[name = string("op_349_end_0"), val = tensor([1, 32, 64, 4])]; tensor var_349_end_mask_0 = const()[name = string("op_349_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = q_9_cast_fp16)[name = string("op_349_cast_fp16")]; tensor var_355_begin_0 = const()[name = string("op_355_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_355_end_0 = const()[name = string("op_355_end_0"), val = tensor([1, 32, 128, 4])]; tensor var_355_end_mask_0 = const()[name = string("op_355_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_355_cast_fp16 = slice_by_index(begin = var_355_begin_0, end = var_355_end_0, end_mask = var_355_end_mask_0, x = q_9_cast_fp16)[name = string("op_355_cast_fp16")]; fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_357_cast_fp16 = mul(x = var_355_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_357_cast_fp16")]; bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; tensor rotated_5_cast_fp16 = concat(axis = var_270, interleave = rotated_5_interleave_0, values = (var_357_cast_fp16, var_349_cast_fp16))[name = string("rotated_5_cast_fp16")]; tensor var_360_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = string("op_360_cast_fp16")]; tensor var_361_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = string("op_361_cast_fp16")]; tensor roped_5_cast_fp16 = add(x = var_360_cast_fp16, y = var_361_cast_fp16)[name = string("roped_5_cast_fp16")]; tensor var_374_begin_0 = const()[name = string("op_374_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_374_end_0 = const()[name = string("op_374_end_0"), val = tensor([1, 32, 64, 4])]; tensor var_374_end_mask_0 = const()[name = string("op_374_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_374_cast_fp16 = slice_by_index(begin = var_374_begin_0, end = var_374_end_0, end_mask = var_374_end_mask_0, x = k_11_cast_fp16)[name = string("op_374_cast_fp16")]; tensor var_380_begin_0 = const()[name = string("op_380_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_380_end_0 = const()[name = string("op_380_end_0"), val = tensor([1, 32, 128, 4])]; tensor var_380_end_mask_0 = const()[name = string("op_380_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_11_cast_fp16)[name = string("op_380_cast_fp16")]; fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_382_cast_fp16 = mul(x = var_380_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_382_cast_fp16")]; bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; tensor rotated_7_cast_fp16 = concat(axis = var_270, interleave = rotated_7_interleave_0, values = (var_382_cast_fp16, var_374_cast_fp16))[name = string("rotated_7_cast_fp16")]; tensor var_385_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = string("op_385_cast_fp16")]; tensor var_386_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = string("op_386_cast_fp16")]; tensor roped_7_cast_fp16 = add(x = var_385_cast_fp16, y = var_386_cast_fp16)[name = string("roped_7_cast_fp16")]; tensor v_13_perm_0 = const()[name = string("v_13_perm_0"), val = tensor([0, 1, -1, -2])]; bool k_15_interleave_0 = const()[name = string("k_15_interleave_0"), val = bool(false)]; tensor k_15_cast_fp16 = concat(axis = var_258, interleave = k_15_interleave_0, values = (k_cache_1, roped_7_cast_fp16))[name = string("k_15_cast_fp16")]; bool v_15_interleave_0 = const()[name = string("v_15_interleave_0"), val = bool(false)]; tensor v_13_cast_fp16 = transpose(perm = v_13_perm_0, x = v_11_cast_fp16)[name = string("transpose_3")]; tensor v_15_cast_fp16 = concat(axis = var_270, interleave = v_15_interleave_0, values = (v_cache_1, v_13_cast_fp16))[name = string("v_15_cast_fp16")]; tensor var_397_begin_0 = const()[name = string("op_397_begin_0"), val = tensor([0, 0, 0, 1])]; tensor var_397_end_0 = const()[name = string("op_397_end_0"), val = tensor([1, 32, 128, 509])]; tensor var_397_end_mask_0 = const()[name = string("op_397_end_mask_0"), val = tensor([true, true, true, false])]; tensor new_k_cache_1 = slice_by_index(begin = var_397_begin_0, end = var_397_end_0, end_mask = var_397_end_mask_0, x = k_15_cast_fp16)[name = string("op_397_cast_fp16")]; tensor var_398_begin_0 = const()[name = string("op_398_begin_0"), val = tensor([0, 0, 1, 0])]; tensor var_398_end_0 = const()[name = string("op_398_end_0"), val = tensor([1, 32, 509, 128])]; tensor var_398_end_mask_0 = const()[name = string("op_398_end_mask_0"), val = tensor([true, true, false, true])]; tensor new_v_cache_1 = slice_by_index(begin = var_398_begin_0, end = var_398_end_0, end_mask = var_398_end_mask_0, x = v_15_cast_fp16)[name = string("op_398_cast_fp16")]; fp16 var_403_to_fp16 = const()[name = string("op_403_to_fp16"), val = fp16(0x1.6ap-4)]; tensor var_404_cast_fp16 = mul(x = roped_5_cast_fp16, y = var_403_to_fp16)[name = string("op_404_cast_fp16")]; bool attn_weights_7_transpose_x_0 = const()[name = string("attn_weights_7_transpose_x_0"), val = bool(true)]; bool attn_weights_7_transpose_y_0 = const()[name = string("attn_weights_7_transpose_y_0"), val = bool(false)]; tensor attn_weights_7_cast_fp16 = matmul(transpose_x = attn_weights_7_transpose_x_0, transpose_y = attn_weights_7_transpose_y_0, x = var_404_cast_fp16, y = k_15_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; tensor attn_weights_9_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = mask)[name = string("attn_weights_9_cast_fp16")]; tensor attn_weights_11_cast_fp16 = softmax(axis = var_266, x = attn_weights_9_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; bool var_413_transpose_x_0 = const()[name = string("op_413_transpose_x_0"), val = bool(false)]; bool var_413_transpose_y_0 = const()[name = string("op_413_transpose_y_0"), val = bool(false)]; tensor var_413_cast_fp16 = matmul(transpose_x = var_413_transpose_x_0, transpose_y = var_413_transpose_y_0, x = attn_weights_11_cast_fp16, y = v_15_cast_fp16)[name = string("op_413_cast_fp16")]; tensor attn_3_perm_0 = const()[name = string("attn_3_perm_0"), val = tensor([0, 1, -1, -2])]; tensor var_416 = const()[name = string("op_416"), val = tensor([1, 4096, 1, -1])]; tensor attn_3_cast_fp16 = transpose(perm = attn_3_perm_0, x = var_413_cast_fp16)[name = string("transpose_2")]; tensor input_9_cast_fp16 = reshape(shape = var_416, x = attn_3_cast_fp16)[name = string("input_9_cast_fp16")]; tensor var_420 = const()[name = string("op_420"), val = tensor([1, 1])]; tensor var_422 = const()[name = string("op_422"), val = tensor([1, 1])]; string var_424_pad_type_0 = const()[name = string("op_424_pad_type_0"), val = string("custom")]; tensor var_424_pad_0 = const()[name = string("op_424_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_424_cast_fp16 = conv(dilations = var_422, groups = var_267, pad = var_424_pad_0, pad_type = var_424_pad_type_0, strides = var_420, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = string("op_424_cast_fp16")]; tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = string("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303702912)))]; tensor attention_output_3_cast_fp16 = mul(x = var_424_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = string("attention_output_3_cast_fp16")]; tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = string("x_25_cast_fp16")]; tensor var_443_axes_0 = const()[name = string("op_443_axes_0"), val = tensor([-2])]; tensor var_443_cast_fp16 = squeeze(axes = var_443_axes_0, x = x_25_cast_fp16)[name = string("op_443_cast_fp16")]; bool var_445_interleave_0 = const()[name = string("op_445_interleave_0"), val = bool(false)]; tensor eps_chan_7_to_fp16 = const()[name = string("eps_chan_7_to_fp16"), val = tensor