program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] { func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; tensor q_5_interleave_0 = const()[name = tensor