program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] { func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); }