program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] { func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; tensor var_14 = const()[name = tensor("op_14"), val = tensor(3)]; tensor var_19 = const()[name = tensor("op_19"), val = tensor(-2)]; tensor var_21 = const()[name = tensor("op_21"), val = tensor(-1)]; tensor var_28 = const()[name = tensor("op_28"), val = tensor(1)]; tensor var_29 = const()[name = tensor("op_29"), val = tensor(true)]; tensor var_37_cast_fp16 = mul(x = x, y = x)[name = tensor("op_37_cast_fp16")]; tensor var_38 = const()[name = tensor("op_38"), val = tensor([1])]; tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_38, keep_dims = var_29, x = var_37_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; tensor var_40_to_fp16 = const()[name = tensor("op_40_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_41_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_40_to_fp16)[name = tensor("op_41_cast_fp16")]; tensor var_42_epsilon_0_to_fp16 = const()[name = tensor("op_42_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_42_cast_fp16 = rsqrt(epsilon = var_42_epsilon_0_to_fp16, x = var_41_cast_fp16)[name = tensor("op_42_cast_fp16")]; tensor x_normed_1_cast_fp16 = mul(x = x, y = var_42_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920)))]; tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; tensor var_54 = const()[name = tensor("op_54"), val = tensor([1, 1])]; tensor var_56 = const()[name = tensor("op_56"), val = tensor([1, 1])]; tensor var_58_pad_type_0 = const()[name = tensor("op_58_pad_type_0"), val = tensor("custom")]; tensor var_58_pad_0 = const()[name = tensor("op_58_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_58_cast_fp16 = conv(dilations = var_56, groups = var_28, pad = var_58_pad_0, pad_type = var_58_pad_type_0, strides = var_54, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_58_cast_fp16")]; tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202386176)))]; tensor q_1_cast_fp16 = mul(x = var_58_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; tensor var_62 = const()[name = tensor("op_62"), val = tensor([1, 1])]; tensor var_64 = const()[name = tensor("op_64"), val = tensor([1, 1])]; tensor var_66_pad_type_0 = const()[name = tensor("op_66_pad_type_0"), val = tensor("custom")]; tensor var_66_pad_0 = const()[name = tensor("op_66_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_66_cast_fp16 = conv(dilations = var_64, groups = var_28, pad = var_66_pad_0, pad_type = var_66_pad_type_0, strides = var_62, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_66_cast_fp16")]; tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202394432)))]; tensor k_1_cast_fp16 = mul(x = var_66_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; tensor var_70 = const()[name = tensor("op_70"), val = tensor([1, 1])]; tensor var_72 = const()[name = tensor("op_72"), val = tensor([1, 1])]; tensor var_74_pad_type_0 = const()[name = tensor("op_74_pad_type_0"), val = tensor("custom")]; tensor var_74_pad_0 = const()[name = tensor("op_74_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_74_cast_fp16 = conv(dilations = var_72, groups = var_28, pad = var_74_pad_0, pad_type = var_74_pad_type_0, strides = var_70, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_74_cast_fp16")]; tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202402688)))]; tensor v_1_cast_fp16 = mul(x = var_74_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 32, 128, 64])]; tensor q_3_cast_fp16 = reshape(shape = var_76, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; tensor var_78 = const()[name = tensor("op_78"), val = tensor([1, 32, 128, 64])]; tensor k_3_cast_fp16 = reshape(shape = var_78, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; tensor new_v_cache_0 = reshape(shape = var_80, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; tensor var_92_begin_0 = const()[name = tensor("op_92_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_92_end_0 = const()[name = tensor("op_92_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_92_end_mask_0 = const()[name = tensor("op_92_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_92_cast_fp16 = slice_by_index(begin = var_92_begin_0, end = var_92_end_0, end_mask = var_92_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_92_cast_fp16")]; tensor var_98_begin_0 = const()[name = tensor("op_98_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_98_end_0 = const()[name = tensor("op_98_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_98_end_mask_0 = const()[name = tensor("op_98_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_98_cast_fp16 = slice_by_index(begin = var_98_begin_0, end = var_98_end_0, end_mask = var_98_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_98_cast_fp16")]; tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_100_cast_fp16 = mul(x = var_98_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_100_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; tensor rotated_1_cast_fp16 = concat(axis = var_19, interleave = rotated_1_interleave_0, values = (var_100_cast_fp16, var_92_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; tensor var_103_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_103_cast_fp16")]; tensor var_104_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_104_cast_fp16")]; tensor roped_1_cast_fp16 = add(x = var_103_cast_fp16, y = var_104_cast_fp16)[name = tensor("roped_1_cast_fp16")]; tensor var_117_begin_0 = const()[name = tensor("op_117_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_117_end_0 = const()[name = tensor("op_117_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_117_end_mask_0 = const()[name = tensor("op_117_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_117_cast_fp16 = slice_by_index(begin = var_117_begin_0, end = var_117_end_0, end_mask = var_117_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_117_cast_fp16")]; tensor var_123_begin_0 = const()[name = tensor("op_123_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_123_end_0 = const()[name = tensor("op_123_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_123_end_mask_0 = const()[name = tensor("op_123_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_123_cast_fp16 = slice_by_index(begin = var_123_begin_0, end = var_123_end_0, end_mask = var_123_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_123_cast_fp16")]; tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_125_cast_fp16 = mul(x = var_123_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_125_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; tensor rotated_3_cast_fp16 = concat(axis = var_19, interleave = rotated_3_interleave_0, values = (var_125_cast_fp16, var_117_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; tensor var_128_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_128_cast_fp16")]; tensor var_129_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_129_cast_fp16")]; tensor roped_3_cast_fp16 = add(x = var_128_cast_fp16, y = var_129_cast_fp16)[name = tensor("roped_3_cast_fp16")]; tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; tensor q_5_cast_fp16 = concat(axis = var_19, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; tensor new_k_cache_0 = concat(axis = var_19, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; tensor k_7_cast_fp16 = concat(axis = var_21, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; tensor v_5_cast_fp16 = concat(axis = var_21, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; tensor var_151_to_fp16 = const()[name = tensor("op_151_to_fp16"), val = tensor(0x1.6ap-4)]; tensor var_152_cast_fp16 = mul(x = q_5_cast_fp16, y = var_151_to_fp16)[name = tensor("op_152_cast_fp16")]; tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_152_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; tensor var_160_cast_fp16 = softmax(axis = var_14, x = attn_weights_3_cast_fp16)[name = tensor("op_160_cast_fp16")]; tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_160_cast_fp16)[name = tensor("attn_1_cast_fp16")]; tensor var_164 = const()[name = tensor("op_164"), val = tensor([1, 4096, 1, -1])]; tensor input_1_cast_fp16 = reshape(shape = var_164, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 1])]; tensor var_170 = const()[name = tensor("op_170"), val = tensor([1, 1])]; tensor var_172_pad_type_0 = const()[name = tensor("op_172_pad_type_0"), val = tensor("custom")]; tensor var_172_pad_0 = const()[name = tensor("op_172_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_172_cast_fp16 = conv(dilations = var_170, groups = var_28, pad = var_172_pad_0, pad_type = var_172_pad_type_0, strides = var_168, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_172_cast_fp16")]; tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor