program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] { func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; tensor var_14 = const()[name = tensor("op_14"), val = tensor(3)]; tensor var_19 = const()[name = tensor("op_19"), val = tensor(-2)]; tensor var_21 = const()[name = tensor("op_21"), val = tensor(-1)]; tensor var_28 = const()[name = tensor("op_28"), val = tensor(1)]; tensor var_29 = const()[name = tensor("op_29"), val = tensor(true)]; tensor var_37_cast_fp16 = mul(x = x, y = x)[name = tensor("op_37_cast_fp16")]; tensor var_38 = const()[name = tensor("op_38"), val = tensor([1])]; tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_38, keep_dims = var_29, x = var_37_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; tensor var_40_to_fp16 = const()[name = tensor("op_40_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_41_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_40_to_fp16)[name = tensor("op_41_cast_fp16")]; tensor var_42_epsilon_0_to_fp16 = const()[name = tensor("op_42_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_42_cast_fp16 = rsqrt(epsilon = var_42_epsilon_0_to_fp16, x = var_41_cast_fp16)[name = tensor("op_42_cast_fp16")]; tensor x_normed_1_cast_fp16 = mul(x = x, y = var_42_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920)))]; tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; tensor var_54 = const()[name = tensor("op_54"), val = tensor([1, 1])]; tensor var_56 = const()[name = tensor("op_56"), val = tensor([1, 1])]; tensor var_58_pad_type_0 = const()[name = tensor("op_58_pad_type_0"), val = tensor("custom")]; tensor var_58_pad_0 = const()[name = tensor("op_58_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_58_cast_fp16 = conv(dilations = var_56, groups = var_28, pad = var_58_pad_0, pad_type = var_58_pad_type_0, strides = var_54, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_58_cast_fp16")]; tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202386176)))]; tensor q_1_cast_fp16 = mul(x = var_58_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; tensor var_62 = const()[name = tensor("op_62"), val = tensor([1, 1])]; tensor var_64 = const()[name = tensor("op_64"), val = tensor([1, 1])]; tensor var_66_pad_type_0 = const()[name = tensor("op_66_pad_type_0"), val = tensor("custom")]; tensor var_66_pad_0 = const()[name = tensor("op_66_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_66_cast_fp16 = conv(dilations = var_64, groups = var_28, pad = var_66_pad_0, pad_type = var_66_pad_type_0, strides = var_62, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_66_cast_fp16")]; tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202394432)))]; tensor k_1_cast_fp16 = mul(x = var_66_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; tensor var_70 = const()[name = tensor("op_70"), val = tensor([1, 1])]; tensor var_72 = const()[name = tensor("op_72"), val = tensor([1, 1])]; tensor var_74_pad_type_0 = const()[name = tensor("op_74_pad_type_0"), val = tensor("custom")]; tensor var_74_pad_0 = const()[name = tensor("op_74_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_74_cast_fp16 = conv(dilations = var_72, groups = var_28, pad = var_74_pad_0, pad_type = var_74_pad_type_0, strides = var_70, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_74_cast_fp16")]; tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202402688)))]; tensor v_1_cast_fp16 = mul(x = var_74_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 32, 128, 64])]; tensor q_3_cast_fp16 = reshape(shape = var_76, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; tensor var_78 = const()[name = tensor("op_78"), val = tensor([1, 32, 128, 64])]; tensor k_3_cast_fp16 = reshape(shape = var_78, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; tensor new_v_cache_0 = reshape(shape = var_80, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; tensor var_92_begin_0 = const()[name = tensor("op_92_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_92_end_0 = const()[name = tensor("op_92_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_92_end_mask_0 = const()[name = tensor("op_92_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_92_cast_fp16 = slice_by_index(begin = var_92_begin_0, end = var_92_end_0, end_mask = var_92_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_92_cast_fp16")]; tensor var_98_begin_0 = const()[name = tensor("op_98_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_98_end_0 = const()[name = tensor("op_98_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_98_end_mask_0 = const()[name = tensor("op_98_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_98_cast_fp16 = slice_by_index(begin = var_98_begin_0, end = var_98_end_0, end_mask = var_98_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_98_cast_fp16")]; tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_100_cast_fp16 = mul(x = var_98_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_100_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; tensor rotated_1_cast_fp16 = concat(axis = var_19, interleave = rotated_1_interleave_0, values = (var_100_cast_fp16, var_92_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; tensor var_103_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_103_cast_fp16")]; tensor var_104_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_104_cast_fp16")]; tensor roped_1_cast_fp16 = add(x = var_103_cast_fp16, y = var_104_cast_fp16)[name = tensor("roped_1_cast_fp16")]; tensor var_117_begin_0 = const()[name = tensor("op_117_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_117_end_0 = const()[name = tensor("op_117_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_117_end_mask_0 = const()[name = tensor("op_117_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_117_cast_fp16 = slice_by_index(begin = var_117_begin_0, end = var_117_end_0, end_mask = var_117_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_117_cast_fp16")]; tensor var_123_begin_0 = const()[name = tensor("op_123_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_123_end_0 = const()[name = tensor("op_123_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_123_end_mask_0 = const()[name = tensor("op_123_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_123_cast_fp16 = slice_by_index(begin = var_123_begin_0, end = var_123_end_0, end_mask = var_123_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_123_cast_fp16")]; tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_125_cast_fp16 = mul(x = var_123_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_125_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; tensor rotated_3_cast_fp16 = concat(axis = var_19, interleave = rotated_3_interleave_0, values = (var_125_cast_fp16, var_117_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; tensor var_128_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_128_cast_fp16")]; tensor var_129_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_129_cast_fp16")]; tensor roped_3_cast_fp16 = add(x = var_128_cast_fp16, y = var_129_cast_fp16)[name = tensor("roped_3_cast_fp16")]; tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; tensor q_5_cast_fp16 = concat(axis = var_19, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; tensor new_k_cache_0 = concat(axis = var_19, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; tensor k_7_cast_fp16 = concat(axis = var_21, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; tensor v_5_cast_fp16 = concat(axis = var_21, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; tensor var_151_to_fp16 = const()[name = tensor("op_151_to_fp16"), val = tensor(0x1.6ap-4)]; tensor var_152_cast_fp16 = mul(x = q_5_cast_fp16, y = var_151_to_fp16)[name = tensor("op_152_cast_fp16")]; tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_152_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; tensor var_160_cast_fp16 = softmax(axis = var_14, x = attn_weights_3_cast_fp16)[name = tensor("op_160_cast_fp16")]; tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_160_cast_fp16)[name = tensor("attn_1_cast_fp16")]; tensor var_164 = const()[name = tensor("op_164"), val = tensor([1, 4096, 1, -1])]; tensor input_1_cast_fp16 = reshape(shape = var_164, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 1])]; tensor var_170 = const()[name = tensor("op_170"), val = tensor([1, 1])]; tensor var_172_pad_type_0 = const()[name = tensor("op_172_pad_type_0"), val = tensor("custom")]; tensor var_172_pad_0 = const()[name = tensor("op_172_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_172_cast_fp16 = conv(dilations = var_170, groups = var_28, pad = var_172_pad_0, pad_type = var_172_pad_type_0, strides = var_168, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_172_cast_fp16")]; tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202410944)))]; tensor attention_output_1_cast_fp16 = mul(x = var_172_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; tensor var_181_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_181_cast_fp16")]; tensor var_182 = const()[name = tensor("op_182"), val = tensor([1])]; tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_182, keep_dims = var_29, x = var_181_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; tensor var_184_to_fp16 = const()[name = tensor("op_184_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_185_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_184_to_fp16)[name = tensor("op_185_cast_fp16")]; tensor var_186_epsilon_0_to_fp16 = const()[name = tensor("op_186_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_186_cast_fp16 = rsqrt(epsilon = var_186_epsilon_0_to_fp16, x = var_185_cast_fp16)[name = tensor("op_186_cast_fp16")]; tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_186_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202419200)))]; tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 1])]; tensor var_200 = const()[name = tensor("op_200"), val = tensor([1, 1])]; tensor var_202_pad_type_0 = const()[name = tensor("op_202_pad_type_0"), val = tensor("custom")]; tensor var_202_pad_0 = const()[name = tensor("op_202_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_202_cast_fp16 = conv(dilations = var_200, groups = var_28, pad = var_202_pad_0, pad_type = var_202_pad_type_0, strides = var_198, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_202_cast_fp16")]; tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202427456)))]; tensor input_5_cast_fp16 = mul(x = var_202_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; tensor var_206 = const()[name = tensor("op_206"), val = tensor([1, 1])]; tensor var_208 = const()[name = tensor("op_208"), val = tensor([1, 1])]; tensor var_210_pad_type_0 = const()[name = tensor("op_210_pad_type_0"), val = tensor("custom")]; tensor var_210_pad_0 = const()[name = tensor("op_210_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_210_cast_fp16 = conv(dilations = var_208, groups = var_28, pad = var_210_pad_0, pad_type = var_210_pad_type_0, strides = var_206, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_210_cast_fp16")]; tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202449536)))]; tensor x_fc_2_1_cast_fp16 = mul(x = var_210_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; tensor var_212_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_212_cast_fp16")]; tensor input_7_cast_fp16 = mul(x = var_212_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; tensor var_216 = const()[name = tensor("op_216"), val = tensor([1, 1])]; tensor var_218 = const()[name = tensor("op_218"), val = tensor([1, 1])]; tensor var_220_pad_type_0 = const()[name = tensor("op_220_pad_type_0"), val = tensor("custom")]; tensor var_220_pad_0 = const()[name = tensor("op_220_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_220_cast_fp16 = conv(dilations = var_218, groups = var_28, pad = var_220_pad_0, pad_type = var_220_pad_type_0, strides = var_216, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_220_cast_fp16")]; tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202471616)))]; tensor var_221_cast_fp16 = mul(x = var_220_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_221_cast_fp16")]; tensor x_15_cast_fp16 = add(x = var_221_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; tensor var_228 = const()[name = tensor("op_228"), val = tensor(3)]; tensor var_233 = const()[name = tensor("op_233"), val = tensor(-2)]; tensor var_235 = const()[name = tensor("op_235"), val = tensor(-1)]; tensor var_242 = const()[name = tensor("op_242"), val = tensor(1)]; tensor var_243 = const()[name = tensor("op_243"), val = tensor(true)]; tensor var_250_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_250_cast_fp16")]; tensor var_251 = const()[name = tensor("op_251"), val = tensor([1])]; tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_251, keep_dims = var_243, x = var_250_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; tensor var_253_to_fp16 = const()[name = tensor("op_253_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_254_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_253_to_fp16)[name = tensor("op_254_cast_fp16")]; tensor var_255_epsilon_0_to_fp16 = const()[name = tensor("op_255_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_255_cast_fp16 = rsqrt(epsilon = var_255_epsilon_0_to_fp16, x = var_254_cast_fp16)[name = tensor("op_255_cast_fp16")]; tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_255_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202479872)))]; tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; tensor var_270 = const()[name = tensor("op_270"), val = tensor([1, 1])]; tensor var_272 = const()[name = tensor("op_272"), val = tensor([1, 1])]; tensor var_274_pad_type_0 = const()[name = tensor("op_274_pad_type_0"), val = tensor("custom")]; tensor var_274_pad_0 = const()[name = tensor("op_274_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_274_cast_fp16 = conv(dilations = var_272, groups = var_242, pad = var_274_pad_0, pad_type = var_274_pad_type_0, strides = var_270, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_274_cast_fp16")]; tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202488128)))]; tensor q_7_cast_fp16 = mul(x = var_274_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; tensor var_278 = const()[name = tensor("op_278"), val = tensor([1, 1])]; tensor var_280 = const()[name = tensor("op_280"), val = tensor([1, 1])]; tensor var_282_pad_type_0 = const()[name = tensor("op_282_pad_type_0"), val = tensor("custom")]; tensor var_282_pad_0 = const()[name = tensor("op_282_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_282_cast_fp16 = conv(dilations = var_280, groups = var_242, pad = var_282_pad_0, pad_type = var_282_pad_type_0, strides = var_278, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_282_cast_fp16")]; tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202496384)))]; tensor k_9_cast_fp16 = mul(x = var_282_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; tensor var_286 = const()[name = tensor("op_286"), val = tensor([1, 1])]; tensor var_288 = const()[name = tensor("op_288"), val = tensor([1, 1])]; tensor var_290_pad_type_0 = const()[name = tensor("op_290_pad_type_0"), val = tensor("custom")]; tensor var_290_pad_0 = const()[name = tensor("op_290_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_290_cast_fp16 = conv(dilations = var_288, groups = var_242, pad = var_290_pad_0, pad_type = var_290_pad_type_0, strides = var_286, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_290_cast_fp16")]; tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202504640)))]; tensor v_7_cast_fp16 = mul(x = var_290_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 32, 128, 64])]; tensor q_9_cast_fp16 = reshape(shape = var_292, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; tensor var_294 = const()[name = tensor("op_294"), val = tensor([1, 32, 128, 64])]; tensor k_11_cast_fp16 = reshape(shape = var_294, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; tensor new_v_cache_1 = reshape(shape = var_296, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; tensor var_308_begin_0 = const()[name = tensor("op_308_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_308_end_0 = const()[name = tensor("op_308_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_308_end_mask_0 = const()[name = tensor("op_308_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_308_cast_fp16 = slice_by_index(begin = var_308_begin_0, end = var_308_end_0, end_mask = var_308_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_308_cast_fp16")]; tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_314_cast_fp16")]; tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_316_cast_fp16 = mul(x = var_314_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_316_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; tensor rotated_5_cast_fp16 = concat(axis = var_233, interleave = rotated_5_interleave_0, values = (var_316_cast_fp16, var_308_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; tensor var_319_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_319_cast_fp16")]; tensor var_320_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_320_cast_fp16")]; tensor roped_5_cast_fp16 = add(x = var_319_cast_fp16, y = var_320_cast_fp16)[name = tensor("roped_5_cast_fp16")]; tensor var_333_begin_0 = const()[name = tensor("op_333_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_333_end_0 = const()[name = tensor("op_333_end_0"), val = tensor([1, 32, 64, 64])]; tensor var_333_end_mask_0 = const()[name = tensor("op_333_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_333_cast_fp16 = slice_by_index(begin = var_333_begin_0, end = var_333_end_0, end_mask = var_333_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_333_cast_fp16")]; tensor var_339_begin_0 = const()[name = tensor("op_339_begin_0"), val = tensor([0, 0, 64, 0])]; tensor var_339_end_0 = const()[name = tensor("op_339_end_0"), val = tensor([1, 32, 128, 64])]; tensor var_339_end_mask_0 = const()[name = tensor("op_339_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_339_cast_fp16 = slice_by_index(begin = var_339_begin_0, end = var_339_end_0, end_mask = var_339_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_339_cast_fp16")]; tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_341_cast_fp16 = mul(x = var_339_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_341_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; tensor rotated_cast_fp16 = concat(axis = var_233, interleave = rotated_interleave_0, values = (var_341_cast_fp16, var_333_cast_fp16))[name = tensor("rotated_cast_fp16")]; tensor var_344_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_344_cast_fp16")]; tensor var_345_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_345_cast_fp16")]; tensor roped_cast_fp16 = add(x = var_344_cast_fp16, y = var_345_cast_fp16)[name = tensor("roped_cast_fp16")]; tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; tensor q_cast_fp16 = concat(axis = var_233, interleave = q_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; tensor new_k_cache_1 = concat(axis = var_233, interleave = k_13_interleave_0, values = roped_cast_fp16)[name = tensor("k_13_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; tensor k_cast_fp16 = concat(axis = var_235, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; tensor v_cast_fp16 = concat(axis = var_235, interleave = v_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_cast_fp16")]; tensor var_367_to_fp16 = const()[name = tensor("op_367_to_fp16"), val = tensor(0x1.6ap-4)]; tensor var_368_cast_fp16 = mul(x = q_cast_fp16, y = var_367_to_fp16)[name = tensor("op_368_cast_fp16")]; tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_368_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; tensor attn_weights_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; tensor var_376_cast_fp16 = softmax(axis = var_228, x = attn_weights_cast_fp16)[name = tensor("op_376_cast_fp16")]; tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_cast_fp16, y = var_376_cast_fp16)[name = tensor("attn_3_cast_fp16")]; tensor var_380 = const()[name = tensor("op_380"), val = tensor([1, 4096, 1, -1])]; tensor input_9_cast_fp16 = reshape(shape = var_380, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 1])]; tensor var_386 = const()[name = tensor("op_386"), val = tensor([1, 1])]; tensor var_388_pad_type_0 = const()[name = tensor("op_388_pad_type_0"), val = tensor("custom")]; tensor var_388_pad_0 = const()[name = tensor("op_388_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_388_cast_fp16 = conv(dilations = var_386, groups = var_242, pad = var_388_pad_0, pad_type = var_388_pad_type_0, strides = var_384, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202512896)))]; tensor attention_output_cast_fp16 = mul(x = var_388_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; tensor x_25_cast_fp16 = add(x = attention_output_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; tensor var_397_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_397_cast_fp16")]; tensor var_398 = const()[name = tensor("op_398"), val = tensor([1])]; tensor norm_x_cast_fp16 = reduce_mean(axes = var_398, keep_dims = var_243, x = var_397_cast_fp16)[name = tensor("norm_x_cast_fp16")]; tensor var_400_to_fp16 = const()[name = tensor("op_400_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_401_cast_fp16 = add(x = norm_x_cast_fp16, y = var_400_to_fp16)[name = tensor("op_401_cast_fp16")]; tensor var_402_epsilon_0_to_fp16 = const()[name = tensor("op_402_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_402_cast_fp16 = rsqrt(epsilon = var_402_epsilon_0_to_fp16, x = var_401_cast_fp16)[name = tensor("op_402_cast_fp16")]; tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_402_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202521152)))]; tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; tensor var_414 = const()[name = tensor("op_414"), val = tensor([1, 1])]; tensor var_416 = const()[name = tensor("op_416"), val = tensor([1, 1])]; tensor var_418_pad_type_0 = const()[name = tensor("op_418_pad_type_0"), val = tensor("custom")]; tensor var_418_pad_0 = const()[name = tensor("op_418_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_418_cast_fp16 = conv(dilations = var_416, groups = var_242, pad = var_418_pad_0, pad_type = var_418_pad_type_0, strides = var_414, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_418_cast_fp16")]; tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202529408)))]; tensor input_13_cast_fp16 = mul(x = var_418_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; tensor var_422 = const()[name = tensor("op_422"), val = tensor([1, 1])]; tensor var_424 = const()[name = tensor("op_424"), val = tensor([1, 1])]; tensor var_426_pad_type_0 = const()[name = tensor("op_426_pad_type_0"), val = tensor("custom")]; tensor var_426_pad_0 = const()[name = tensor("op_426_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_426_cast_fp16 = conv(dilations = var_424, groups = var_242, pad = var_426_pad_0, pad_type = var_426_pad_type_0, strides = var_422, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_426_cast_fp16")]; tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202551488)))]; tensor x_fc_2_cast_fp16 = mul(x = var_426_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; tensor var_428_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_428_cast_fp16")]; tensor input_cast_fp16 = mul(x = var_428_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; tensor var_432 = const()[name = tensor("op_432"), val = tensor([1, 1])]; tensor var_434 = const()[name = tensor("op_434"), val = tensor([1, 1])]; tensor var_436_pad_type_0 = const()[name = tensor("op_436_pad_type_0"), val = tensor("custom")]; tensor var_436_pad_0 = const()[name = tensor("op_436_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_436_cast_fp16 = conv(dilations = var_434, groups = var_242, pad = var_436_pad_0, pad_type = var_436_pad_type_0, strides = var_432, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_436_cast_fp16")]; tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202573568)))]; tensor var_437_cast_fp16 = mul(x = var_436_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_437_cast_fp16")]; tensor new_x = add(x = var_437_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_438_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_v_cache_0, new_v_cache_1); }