diff --git "a/Llama-3.2-1B-Instruct_chunk2.mlmodelc/model.mil" "b/Llama-3.2-1B-Instruct_chunk2.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/Llama-3.2-1B-Instruct_chunk2.mlmodelc/model.mil" @@ -0,0 +1,2290 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor k_cache_3, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor v_cache_3, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"k_cache_3", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}, {"v_cache_3", 0}})] { + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-1)]; + tensor var_25 = const()[name = tensor("op_25"), val = tensor(-2)]; + tensor var_27 = const()[name = tensor("op_27"), val = tensor(-3)]; + tensor var_69 = const()[name = tensor("op_69"), val = tensor(1)]; + tensor var_72 = const()[name = tensor("op_72"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_69, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_72, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_96_to_fp16 = const()[name = tensor("op_96_to_fp16"), val = tensor(0x1.6ap+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_96_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 2048, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_117, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 1])]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4416)))]; + tensor q_1_cast_fp16 = conv(dilations = var_122, groups = var_69, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_120, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_126 = const()[name = tensor("op_126"), val = tensor([1, 1])]; + tensor var_128 = const()[name = tensor("op_128"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8393088)))]; + tensor k_1_cast_fp16 = conv(dilations = var_128, groups = var_69, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_126, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_132 = const()[name = tensor("op_132"), val = tensor([1, 1])]; + tensor var_134 = const()[name = tensor("op_134"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10490304)))]; + tensor v_1_cast_fp16 = conv(dilations = var_134, groups = var_69, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_132, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_137 = const()[name = tensor("op_137"), val = tensor([1, 32, 64, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_137, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, -1, 64, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_139, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_153_begin_0 = const()[name = tensor("op_153_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_153_end_0 = const()[name = tensor("op_153_end_0"), val = tensor([1, 32, 32, 64])]; + tensor var_153_end_mask_0 = const()[name = tensor("op_153_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_153_cast_fp16 = slice_by_index(begin = var_153_begin_0, end = var_153_end_0, end_mask = var_153_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_153_cast_fp16")]; + tensor var_159_begin_0 = const()[name = tensor("op_159_begin_0"), val = tensor([0, 0, 32, 0])]; + tensor var_159_end_0 = const()[name = tensor("op_159_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_159_end_mask_0 = const()[name = tensor("op_159_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_159_cast_fp16 = slice_by_index(begin = var_159_begin_0, end = var_159_end_0, end_mask = var_159_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_159_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_161_cast_fp16 = mul(x = var_159_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_161_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_25, interleave = rotated_1_interleave_0, values = (var_161_cast_fp16, var_153_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_164_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_164_cast_fp16")]; + tensor var_165_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_165_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_164_cast_fp16, y = var_165_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_178_begin_0 = const()[name = tensor("op_178_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_178_end_0 = const()[name = tensor("op_178_end_0"), val = tensor([1, 8, 32, 64])]; + tensor var_178_end_mask_0 = const()[name = tensor("op_178_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_178_cast_fp16 = slice_by_index(begin = var_178_begin_0, end = var_178_end_0, end_mask = var_178_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_178_cast_fp16")]; + tensor var_184_begin_0 = const()[name = tensor("op_184_begin_0"), val = tensor([0, 0, 32, 0])]; + tensor var_184_end_0 = const()[name = tensor("op_184_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_184_end_mask_0 = const()[name = tensor("op_184_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_184_cast_fp16 = slice_by_index(begin = var_184_begin_0, end = var_184_end_0, end_mask = var_184_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_184_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_186_cast_fp16 = mul(x = var_184_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_186_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_25, interleave = rotated_3_interleave_0, values = (var_186_cast_fp16, var_178_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_189_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_189_cast_fp16")]; + tensor var_190_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_190_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_189_cast_fp16, y = var_190_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_193 = const()[name = tensor("op_193"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_193, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_195, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_3")]; + tensor k_11_cast_fp16 = concat(axis = var_27, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_21, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_211 = const()[name = tensor("op_211"), val = tensor([1, 2048, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_211, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_216_begin_0 = const()[name = tensor("op_216_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_216_end_0 = const()[name = tensor("op_216_end_0"), val = tensor([1, 64, 1, 64])]; + tensor var_216_end_mask_0 = const()[name = tensor("op_216_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_216_cast_fp16 = slice_by_index(begin = var_216_begin_0, end = var_216_end_0, end_mask = var_216_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_216_cast_fp16")]; + tensor var_220_begin_0 = const()[name = tensor("op_220_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_220_end_0 = const()[name = tensor("op_220_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_220_end_mask_0 = const()[name = tensor("op_220_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_220_cast_fp16 = slice_by_index(begin = var_220_begin_0, end = var_220_end_0, end_mask = var_220_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_220_cast_fp16")]; + tensor var_224_begin_0 = const()[name = tensor("op_224_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_224_end_0 = const()[name = tensor("op_224_end_0"), val = tensor([1, 192, 1, 64])]; + tensor var_224_end_mask_0 = const()[name = tensor("op_224_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_224_cast_fp16 = slice_by_index(begin = var_224_begin_0, end = var_224_end_0, end_mask = var_224_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor var_228_begin_0 = const()[name = tensor("op_228_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_228_end_0 = const()[name = tensor("op_228_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_228_end_mask_0 = const()[name = tensor("op_228_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_228_cast_fp16 = slice_by_index(begin = var_228_begin_0, end = var_228_end_0, end_mask = var_228_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_228_cast_fp16")]; + tensor var_232_begin_0 = const()[name = tensor("op_232_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_232_end_0 = const()[name = tensor("op_232_end_0"), val = tensor([1, 320, 1, 64])]; + tensor var_232_end_mask_0 = const()[name = tensor("op_232_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_232_cast_fp16 = slice_by_index(begin = var_232_begin_0, end = var_232_end_0, end_mask = var_232_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_232_cast_fp16")]; + tensor var_236_begin_0 = const()[name = tensor("op_236_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_236_end_0 = const()[name = tensor("op_236_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_236_end_mask_0 = const()[name = tensor("op_236_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_236_cast_fp16 = slice_by_index(begin = var_236_begin_0, end = var_236_end_0, end_mask = var_236_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_236_cast_fp16")]; + tensor var_240_begin_0 = const()[name = tensor("op_240_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_240_end_0 = const()[name = tensor("op_240_end_0"), val = tensor([1, 448, 1, 64])]; + tensor var_240_end_mask_0 = const()[name = tensor("op_240_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_240_cast_fp16 = slice_by_index(begin = var_240_begin_0, end = var_240_end_0, end_mask = var_240_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_240_cast_fp16")]; + tensor var_244_begin_0 = const()[name = tensor("op_244_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_244_end_0 = const()[name = tensor("op_244_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_244_end_mask_0 = const()[name = tensor("op_244_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_244_cast_fp16 = slice_by_index(begin = var_244_begin_0, end = var_244_end_0, end_mask = var_244_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_244_cast_fp16")]; + tensor var_248_begin_0 = const()[name = tensor("op_248_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_248_end_0 = const()[name = tensor("op_248_end_0"), val = tensor([1, 576, 1, 64])]; + tensor var_248_end_mask_0 = const()[name = tensor("op_248_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_248_cast_fp16 = slice_by_index(begin = var_248_begin_0, end = var_248_end_0, end_mask = var_248_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_248_cast_fp16")]; + tensor var_252_begin_0 = const()[name = tensor("op_252_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_252_end_0 = const()[name = tensor("op_252_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_252_end_mask_0 = const()[name = tensor("op_252_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_252_cast_fp16 = slice_by_index(begin = var_252_begin_0, end = var_252_end_0, end_mask = var_252_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_252_cast_fp16")]; + tensor var_256_begin_0 = const()[name = tensor("op_256_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_256_end_0 = const()[name = tensor("op_256_end_0"), val = tensor([1, 704, 1, 64])]; + tensor var_256_end_mask_0 = const()[name = tensor("op_256_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_256_cast_fp16 = slice_by_index(begin = var_256_begin_0, end = var_256_end_0, end_mask = var_256_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_256_cast_fp16")]; + tensor var_260_begin_0 = const()[name = tensor("op_260_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_260_end_0 = const()[name = tensor("op_260_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_260_end_mask_0 = const()[name = tensor("op_260_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_260_cast_fp16 = slice_by_index(begin = var_260_begin_0, end = var_260_end_0, end_mask = var_260_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_260_cast_fp16")]; + tensor var_264_begin_0 = const()[name = tensor("op_264_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_264_end_0 = const()[name = tensor("op_264_end_0"), val = tensor([1, 832, 1, 64])]; + tensor var_264_end_mask_0 = const()[name = tensor("op_264_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_264_cast_fp16 = slice_by_index(begin = var_264_begin_0, end = var_264_end_0, end_mask = var_264_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_264_cast_fp16")]; + tensor var_268_begin_0 = const()[name = tensor("op_268_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_268_end_0 = const()[name = tensor("op_268_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_268_end_mask_0 = const()[name = tensor("op_268_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_268_cast_fp16 = slice_by_index(begin = var_268_begin_0, end = var_268_end_0, end_mask = var_268_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_268_cast_fp16")]; + tensor var_272_begin_0 = const()[name = tensor("op_272_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_272_end_0 = const()[name = tensor("op_272_end_0"), val = tensor([1, 960, 1, 64])]; + tensor var_272_end_mask_0 = const()[name = tensor("op_272_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_272_cast_fp16 = slice_by_index(begin = var_272_begin_0, end = var_272_end_0, end_mask = var_272_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_272_cast_fp16")]; + tensor var_276_begin_0 = const()[name = tensor("op_276_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_276_end_0 = const()[name = tensor("op_276_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_276_end_mask_0 = const()[name = tensor("op_276_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_276_cast_fp16 = slice_by_index(begin = var_276_begin_0, end = var_276_end_0, end_mask = var_276_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_276_cast_fp16")]; + tensor var_280_begin_0 = const()[name = tensor("op_280_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_280_end_0 = const()[name = tensor("op_280_end_0"), val = tensor([1, 1088, 1, 64])]; + tensor var_280_end_mask_0 = const()[name = tensor("op_280_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_280_cast_fp16 = slice_by_index(begin = var_280_begin_0, end = var_280_end_0, end_mask = var_280_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_280_cast_fp16")]; + tensor var_284_begin_0 = const()[name = tensor("op_284_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_284_end_0 = const()[name = tensor("op_284_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_284_end_mask_0 = const()[name = tensor("op_284_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_284_cast_fp16 = slice_by_index(begin = var_284_begin_0, end = var_284_end_0, end_mask = var_284_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_284_cast_fp16")]; + tensor var_288_begin_0 = const()[name = tensor("op_288_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_288_end_0 = const()[name = tensor("op_288_end_0"), val = tensor([1, 1216, 1, 64])]; + tensor var_288_end_mask_0 = const()[name = tensor("op_288_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_288_cast_fp16 = slice_by_index(begin = var_288_begin_0, end = var_288_end_0, end_mask = var_288_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_288_cast_fp16")]; + tensor var_292_begin_0 = const()[name = tensor("op_292_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_292_end_0 = const()[name = tensor("op_292_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_292_end_mask_0 = const()[name = tensor("op_292_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_292_cast_fp16 = slice_by_index(begin = var_292_begin_0, end = var_292_end_0, end_mask = var_292_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_292_cast_fp16")]; + tensor var_296_begin_0 = const()[name = tensor("op_296_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_296_end_0 = const()[name = tensor("op_296_end_0"), val = tensor([1, 1344, 1, 64])]; + tensor var_296_end_mask_0 = const()[name = tensor("op_296_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_296_cast_fp16 = slice_by_index(begin = var_296_begin_0, end = var_296_end_0, end_mask = var_296_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_296_cast_fp16")]; + tensor var_300_begin_0 = const()[name = tensor("op_300_begin_0"), val = tensor([0, 1344, 0, 0])]; + tensor var_300_end_0 = const()[name = tensor("op_300_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_300_end_mask_0 = const()[name = tensor("op_300_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_300_cast_fp16 = slice_by_index(begin = var_300_begin_0, end = var_300_end_0, end_mask = var_300_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_300_cast_fp16")]; + tensor var_304_begin_0 = const()[name = tensor("op_304_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_304_end_0 = const()[name = tensor("op_304_end_0"), val = tensor([1, 1472, 1, 64])]; + tensor var_304_end_mask_0 = const()[name = tensor("op_304_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_304_cast_fp16 = slice_by_index(begin = var_304_begin_0, end = var_304_end_0, end_mask = var_304_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_304_cast_fp16")]; + tensor var_308_begin_0 = const()[name = tensor("op_308_begin_0"), val = tensor([0, 1472, 0, 0])]; + tensor var_308_end_0 = const()[name = tensor("op_308_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_308_end_mask_0 = const()[name = tensor("op_308_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_308_cast_fp16 = slice_by_index(begin = var_308_begin_0, end = var_308_end_0, end_mask = var_308_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_308_cast_fp16")]; + tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 1600, 1, 64])]; + tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_316_begin_0 = const()[name = tensor("op_316_begin_0"), val = tensor([0, 1600, 0, 0])]; + tensor var_316_end_0 = const()[name = tensor("op_316_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_316_end_mask_0 = const()[name = tensor("op_316_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_316_cast_fp16 = slice_by_index(begin = var_316_begin_0, end = var_316_end_0, end_mask = var_316_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_316_cast_fp16")]; + tensor var_320_begin_0 = const()[name = tensor("op_320_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_320_end_0 = const()[name = tensor("op_320_end_0"), val = tensor([1, 1728, 1, 64])]; + tensor var_320_end_mask_0 = const()[name = tensor("op_320_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_320_cast_fp16 = slice_by_index(begin = var_320_begin_0, end = var_320_end_0, end_mask = var_320_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_320_cast_fp16")]; + tensor var_324_begin_0 = const()[name = tensor("op_324_begin_0"), val = tensor([0, 1728, 0, 0])]; + tensor var_324_end_0 = const()[name = tensor("op_324_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_324_end_mask_0 = const()[name = tensor("op_324_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_324_cast_fp16 = slice_by_index(begin = var_324_begin_0, end = var_324_end_0, end_mask = var_324_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_324_cast_fp16")]; + tensor var_328_begin_0 = const()[name = tensor("op_328_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_328_end_0 = const()[name = tensor("op_328_end_0"), val = tensor([1, 1856, 1, 64])]; + tensor var_328_end_mask_0 = const()[name = tensor("op_328_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_328_cast_fp16 = slice_by_index(begin = var_328_begin_0, end = var_328_end_0, end_mask = var_328_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_328_cast_fp16")]; + tensor var_332_begin_0 = const()[name = tensor("op_332_begin_0"), val = tensor([0, 1856, 0, 0])]; + tensor var_332_end_0 = const()[name = tensor("op_332_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_332_end_mask_0 = const()[name = tensor("op_332_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_332_cast_fp16 = slice_by_index(begin = var_332_begin_0, end = var_332_end_0, end_mask = var_332_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_332_cast_fp16")]; + tensor var_336_begin_0 = const()[name = tensor("op_336_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_336_end_0 = const()[name = tensor("op_336_end_0"), val = tensor([1, 1984, 1, 64])]; + tensor var_336_end_mask_0 = const()[name = tensor("op_336_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_336_cast_fp16 = slice_by_index(begin = var_336_begin_0, end = var_336_end_0, end_mask = var_336_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_336_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 1984, 0, 0])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_346_begin_0 = const()[name = tensor("op_346_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_346_end_0 = const()[name = tensor("op_346_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_346_end_mask_0 = const()[name = tensor("op_346_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = var_346_end_0, end_mask = var_346_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_346_cast_fp16")]; + tensor var_362_begin_0 = const()[name = tensor("op_362_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_362_end_0 = const()[name = tensor("op_362_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_362_end_mask_0 = const()[name = tensor("op_362_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_362_cast_fp16 = slice_by_index(begin = var_362_begin_0, end = var_362_end_0, end_mask = var_362_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_362_cast_fp16")]; + tensor var_378_begin_0 = const()[name = tensor("op_378_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_378_end_0 = const()[name = tensor("op_378_end_0"), val = tensor([1, 512, 1, 192])]; + tensor var_378_end_mask_0 = const()[name = tensor("op_378_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_378_cast_fp16 = slice_by_index(begin = var_378_begin_0, end = var_378_end_0, end_mask = var_378_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_378_cast_fp16")]; + tensor var_394_begin_0 = const()[name = tensor("op_394_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_394_end_0 = const()[name = tensor("op_394_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_394_end_mask_0 = const()[name = tensor("op_394_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_394_cast_fp16 = slice_by_index(begin = var_394_begin_0, end = var_394_end_0, end_mask = var_394_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_394_cast_fp16")]; + tensor var_410_begin_0 = const()[name = tensor("op_410_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_410_end_0 = const()[name = tensor("op_410_end_0"), val = tensor([1, 512, 1, 320])]; + tensor var_410_end_mask_0 = const()[name = tensor("op_410_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_410_cast_fp16 = slice_by_index(begin = var_410_begin_0, end = var_410_end_0, end_mask = var_410_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_410_cast_fp16")]; + tensor var_426_begin_0 = const()[name = tensor("op_426_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_426_end_0 = const()[name = tensor("op_426_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_426_end_mask_0 = const()[name = tensor("op_426_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_426_cast_fp16 = slice_by_index(begin = var_426_begin_0, end = var_426_end_0, end_mask = var_426_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_426_cast_fp16")]; + tensor var_442_begin_0 = const()[name = tensor("op_442_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_442_end_0 = const()[name = tensor("op_442_end_0"), val = tensor([1, 512, 1, 448])]; + tensor var_442_end_mask_0 = const()[name = tensor("op_442_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_442_cast_fp16 = slice_by_index(begin = var_442_begin_0, end = var_442_end_0, end_mask = var_442_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_442_cast_fp16")]; + tensor var_458_begin_0 = const()[name = tensor("op_458_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_458_end_0 = const()[name = tensor("op_458_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_458_end_mask_0 = const()[name = tensor("op_458_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_458_cast_fp16 = slice_by_index(begin = var_458_begin_0, end = var_458_end_0, end_mask = var_458_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_458_cast_fp16")]; + tensor var_472_begin_0 = const()[name = tensor("op_472_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_472_end_0 = const()[name = tensor("op_472_end_0"), val = tensor([1, 64, 1, 512])]; + tensor var_472_end_mask_0 = const()[name = tensor("op_472_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_472_cast_fp16 = slice_by_index(begin = var_472_begin_0, end = var_472_end_0, end_mask = var_472_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_472_cast_fp16")]; + tensor var_488_begin_0 = const()[name = tensor("op_488_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_488_end_0 = const()[name = tensor("op_488_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_488_end_mask_0 = const()[name = tensor("op_488_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_488_cast_fp16 = slice_by_index(begin = var_488_begin_0, end = var_488_end_0, end_mask = var_488_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_488_cast_fp16")]; + tensor var_504_begin_0 = const()[name = tensor("op_504_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_504_end_0 = const()[name = tensor("op_504_end_0"), val = tensor([1, 192, 1, 512])]; + tensor var_504_end_mask_0 = const()[name = tensor("op_504_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_504_cast_fp16 = slice_by_index(begin = var_504_begin_0, end = var_504_end_0, end_mask = var_504_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_504_cast_fp16")]; + tensor var_520_begin_0 = const()[name = tensor("op_520_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_520_end_0 = const()[name = tensor("op_520_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_520_end_mask_0 = const()[name = tensor("op_520_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_520_cast_fp16 = slice_by_index(begin = var_520_begin_0, end = var_520_end_0, end_mask = var_520_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_520_cast_fp16")]; + tensor var_536_begin_0 = const()[name = tensor("op_536_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_536_end_0 = const()[name = tensor("op_536_end_0"), val = tensor([1, 320, 1, 512])]; + tensor var_536_end_mask_0 = const()[name = tensor("op_536_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_536_cast_fp16 = slice_by_index(begin = var_536_begin_0, end = var_536_end_0, end_mask = var_536_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_536_cast_fp16")]; + tensor var_552_begin_0 = const()[name = tensor("op_552_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_552_end_0 = const()[name = tensor("op_552_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_552_end_mask_0 = const()[name = tensor("op_552_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_552_cast_fp16 = slice_by_index(begin = var_552_begin_0, end = var_552_end_0, end_mask = var_552_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_552_cast_fp16")]; + tensor var_568_begin_0 = const()[name = tensor("op_568_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_568_end_0 = const()[name = tensor("op_568_end_0"), val = tensor([1, 448, 1, 512])]; + tensor var_568_end_mask_0 = const()[name = tensor("op_568_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_568_cast_fp16 = slice_by_index(begin = var_568_begin_0, end = var_568_end_0, end_mask = var_568_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_568_cast_fp16")]; + tensor var_584_begin_0 = const()[name = tensor("op_584_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_584_end_0 = const()[name = tensor("op_584_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_584_end_mask_0 = const()[name = tensor("op_584_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_584_cast_fp16 = slice_by_index(begin = var_584_begin_0, end = var_584_end_0, end_mask = var_584_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_584_cast_fp16")]; + tensor var_600_equation_0 = const()[name = tensor("op_600_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_600_cast_fp16 = einsum(equation = var_600_equation_0, values = (var_346_cast_fp16, var_216_cast_fp16))[name = tensor("op_600_cast_fp16")]; + tensor var_601_to_fp16 = const()[name = tensor("op_601_to_fp16"), val = tensor(0x1p-3)]; + tensor var_602_cast_fp16 = mul(x = var_600_cast_fp16, y = var_601_to_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_604_equation_0 = const()[name = tensor("op_604_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_604_cast_fp16 = einsum(equation = var_604_equation_0, values = (var_346_cast_fp16, var_220_cast_fp16))[name = tensor("op_604_cast_fp16")]; + tensor var_605_to_fp16 = const()[name = tensor("op_605_to_fp16"), val = tensor(0x1p-3)]; + tensor var_606_cast_fp16 = mul(x = var_604_cast_fp16, y = var_605_to_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_608_equation_0 = const()[name = tensor("op_608_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_608_cast_fp16 = einsum(equation = var_608_equation_0, values = (var_346_cast_fp16, var_224_cast_fp16))[name = tensor("op_608_cast_fp16")]; + tensor var_609_to_fp16 = const()[name = tensor("op_609_to_fp16"), val = tensor(0x1p-3)]; + tensor var_610_cast_fp16 = mul(x = var_608_cast_fp16, y = var_609_to_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_612_equation_0 = const()[name = tensor("op_612_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_612_cast_fp16 = einsum(equation = var_612_equation_0, values = (var_346_cast_fp16, var_228_cast_fp16))[name = tensor("op_612_cast_fp16")]; + tensor var_613_to_fp16 = const()[name = tensor("op_613_to_fp16"), val = tensor(0x1p-3)]; + tensor var_614_cast_fp16 = mul(x = var_612_cast_fp16, y = var_613_to_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_616_equation_0 = const()[name = tensor("op_616_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_616_cast_fp16 = einsum(equation = var_616_equation_0, values = (var_362_cast_fp16, var_232_cast_fp16))[name = tensor("op_616_cast_fp16")]; + tensor var_617_to_fp16 = const()[name = tensor("op_617_to_fp16"), val = tensor(0x1p-3)]; + tensor var_618_cast_fp16 = mul(x = var_616_cast_fp16, y = var_617_to_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_620_equation_0 = const()[name = tensor("op_620_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_620_cast_fp16 = einsum(equation = var_620_equation_0, values = (var_362_cast_fp16, var_236_cast_fp16))[name = tensor("op_620_cast_fp16")]; + tensor var_621_to_fp16 = const()[name = tensor("op_621_to_fp16"), val = tensor(0x1p-3)]; + tensor var_622_cast_fp16 = mul(x = var_620_cast_fp16, y = var_621_to_fp16)[name = tensor("op_622_cast_fp16")]; + tensor var_624_equation_0 = const()[name = tensor("op_624_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_624_cast_fp16 = einsum(equation = var_624_equation_0, values = (var_362_cast_fp16, var_240_cast_fp16))[name = tensor("op_624_cast_fp16")]; + tensor var_625_to_fp16 = const()[name = tensor("op_625_to_fp16"), val = tensor(0x1p-3)]; + tensor var_626_cast_fp16 = mul(x = var_624_cast_fp16, y = var_625_to_fp16)[name = tensor("op_626_cast_fp16")]; + tensor var_628_equation_0 = const()[name = tensor("op_628_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_628_cast_fp16 = einsum(equation = var_628_equation_0, values = (var_362_cast_fp16, var_244_cast_fp16))[name = tensor("op_628_cast_fp16")]; + tensor var_629_to_fp16 = const()[name = tensor("op_629_to_fp16"), val = tensor(0x1p-3)]; + tensor var_630_cast_fp16 = mul(x = var_628_cast_fp16, y = var_629_to_fp16)[name = tensor("op_630_cast_fp16")]; + tensor var_632_equation_0 = const()[name = tensor("op_632_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_632_cast_fp16 = einsum(equation = var_632_equation_0, values = (var_378_cast_fp16, var_248_cast_fp16))[name = tensor("op_632_cast_fp16")]; + tensor var_633_to_fp16 = const()[name = tensor("op_633_to_fp16"), val = tensor(0x1p-3)]; + tensor var_634_cast_fp16 = mul(x = var_632_cast_fp16, y = var_633_to_fp16)[name = tensor("op_634_cast_fp16")]; + tensor var_636_equation_0 = const()[name = tensor("op_636_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_636_cast_fp16 = einsum(equation = var_636_equation_0, values = (var_378_cast_fp16, var_252_cast_fp16))[name = tensor("op_636_cast_fp16")]; + tensor var_637_to_fp16 = const()[name = tensor("op_637_to_fp16"), val = tensor(0x1p-3)]; + tensor var_638_cast_fp16 = mul(x = var_636_cast_fp16, y = var_637_to_fp16)[name = tensor("op_638_cast_fp16")]; + tensor var_640_equation_0 = const()[name = tensor("op_640_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_640_cast_fp16 = einsum(equation = var_640_equation_0, values = (var_378_cast_fp16, var_256_cast_fp16))[name = tensor("op_640_cast_fp16")]; + tensor var_641_to_fp16 = const()[name = tensor("op_641_to_fp16"), val = tensor(0x1p-3)]; + tensor var_642_cast_fp16 = mul(x = var_640_cast_fp16, y = var_641_to_fp16)[name = tensor("op_642_cast_fp16")]; + tensor var_644_equation_0 = const()[name = tensor("op_644_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_644_cast_fp16 = einsum(equation = var_644_equation_0, values = (var_378_cast_fp16, var_260_cast_fp16))[name = tensor("op_644_cast_fp16")]; + tensor var_645_to_fp16 = const()[name = tensor("op_645_to_fp16"), val = tensor(0x1p-3)]; + tensor var_646_cast_fp16 = mul(x = var_644_cast_fp16, y = var_645_to_fp16)[name = tensor("op_646_cast_fp16")]; + tensor var_648_equation_0 = const()[name = tensor("op_648_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_648_cast_fp16 = einsum(equation = var_648_equation_0, values = (var_394_cast_fp16, var_264_cast_fp16))[name = tensor("op_648_cast_fp16")]; + tensor var_649_to_fp16 = const()[name = tensor("op_649_to_fp16"), val = tensor(0x1p-3)]; + tensor var_650_cast_fp16 = mul(x = var_648_cast_fp16, y = var_649_to_fp16)[name = tensor("op_650_cast_fp16")]; + tensor var_652_equation_0 = const()[name = tensor("op_652_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_652_cast_fp16 = einsum(equation = var_652_equation_0, values = (var_394_cast_fp16, var_268_cast_fp16))[name = tensor("op_652_cast_fp16")]; + tensor var_653_to_fp16 = const()[name = tensor("op_653_to_fp16"), val = tensor(0x1p-3)]; + tensor var_654_cast_fp16 = mul(x = var_652_cast_fp16, y = var_653_to_fp16)[name = tensor("op_654_cast_fp16")]; + tensor var_656_equation_0 = const()[name = tensor("op_656_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_656_cast_fp16 = einsum(equation = var_656_equation_0, values = (var_394_cast_fp16, var_272_cast_fp16))[name = tensor("op_656_cast_fp16")]; + tensor var_657_to_fp16 = const()[name = tensor("op_657_to_fp16"), val = tensor(0x1p-3)]; + tensor var_658_cast_fp16 = mul(x = var_656_cast_fp16, y = var_657_to_fp16)[name = tensor("op_658_cast_fp16")]; + tensor var_660_equation_0 = const()[name = tensor("op_660_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_660_cast_fp16 = einsum(equation = var_660_equation_0, values = (var_394_cast_fp16, var_276_cast_fp16))[name = tensor("op_660_cast_fp16")]; + tensor var_661_to_fp16 = const()[name = tensor("op_661_to_fp16"), val = tensor(0x1p-3)]; + tensor var_662_cast_fp16 = mul(x = var_660_cast_fp16, y = var_661_to_fp16)[name = tensor("op_662_cast_fp16")]; + tensor var_664_equation_0 = const()[name = tensor("op_664_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_664_cast_fp16 = einsum(equation = var_664_equation_0, values = (var_410_cast_fp16, var_280_cast_fp16))[name = tensor("op_664_cast_fp16")]; + tensor var_665_to_fp16 = const()[name = tensor("op_665_to_fp16"), val = tensor(0x1p-3)]; + tensor var_666_cast_fp16 = mul(x = var_664_cast_fp16, y = var_665_to_fp16)[name = tensor("op_666_cast_fp16")]; + tensor var_668_equation_0 = const()[name = tensor("op_668_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_668_cast_fp16 = einsum(equation = var_668_equation_0, values = (var_410_cast_fp16, var_284_cast_fp16))[name = tensor("op_668_cast_fp16")]; + tensor var_669_to_fp16 = const()[name = tensor("op_669_to_fp16"), val = tensor(0x1p-3)]; + tensor var_670_cast_fp16 = mul(x = var_668_cast_fp16, y = var_669_to_fp16)[name = tensor("op_670_cast_fp16")]; + tensor var_672_equation_0 = const()[name = tensor("op_672_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_672_cast_fp16 = einsum(equation = var_672_equation_0, values = (var_410_cast_fp16, var_288_cast_fp16))[name = tensor("op_672_cast_fp16")]; + tensor var_673_to_fp16 = const()[name = tensor("op_673_to_fp16"), val = tensor(0x1p-3)]; + tensor var_674_cast_fp16 = mul(x = var_672_cast_fp16, y = var_673_to_fp16)[name = tensor("op_674_cast_fp16")]; + tensor var_676_equation_0 = const()[name = tensor("op_676_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_676_cast_fp16 = einsum(equation = var_676_equation_0, values = (var_410_cast_fp16, var_292_cast_fp16))[name = tensor("op_676_cast_fp16")]; + tensor var_677_to_fp16 = const()[name = tensor("op_677_to_fp16"), val = tensor(0x1p-3)]; + tensor var_678_cast_fp16 = mul(x = var_676_cast_fp16, y = var_677_to_fp16)[name = tensor("op_678_cast_fp16")]; + tensor var_680_equation_0 = const()[name = tensor("op_680_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_680_cast_fp16 = einsum(equation = var_680_equation_0, values = (var_426_cast_fp16, var_296_cast_fp16))[name = tensor("op_680_cast_fp16")]; + tensor var_681_to_fp16 = const()[name = tensor("op_681_to_fp16"), val = tensor(0x1p-3)]; + tensor var_682_cast_fp16 = mul(x = var_680_cast_fp16, y = var_681_to_fp16)[name = tensor("op_682_cast_fp16")]; + tensor var_684_equation_0 = const()[name = tensor("op_684_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_684_cast_fp16 = einsum(equation = var_684_equation_0, values = (var_426_cast_fp16, var_300_cast_fp16))[name = tensor("op_684_cast_fp16")]; + tensor var_685_to_fp16 = const()[name = tensor("op_685_to_fp16"), val = tensor(0x1p-3)]; + tensor var_686_cast_fp16 = mul(x = var_684_cast_fp16, y = var_685_to_fp16)[name = tensor("op_686_cast_fp16")]; + tensor var_688_equation_0 = const()[name = tensor("op_688_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_688_cast_fp16 = einsum(equation = var_688_equation_0, values = (var_426_cast_fp16, var_304_cast_fp16))[name = tensor("op_688_cast_fp16")]; + tensor var_689_to_fp16 = const()[name = tensor("op_689_to_fp16"), val = tensor(0x1p-3)]; + tensor var_690_cast_fp16 = mul(x = var_688_cast_fp16, y = var_689_to_fp16)[name = tensor("op_690_cast_fp16")]; + tensor var_692_equation_0 = const()[name = tensor("op_692_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_692_cast_fp16 = einsum(equation = var_692_equation_0, values = (var_426_cast_fp16, var_308_cast_fp16))[name = tensor("op_692_cast_fp16")]; + tensor var_693_to_fp16 = const()[name = tensor("op_693_to_fp16"), val = tensor(0x1p-3)]; + tensor var_694_cast_fp16 = mul(x = var_692_cast_fp16, y = var_693_to_fp16)[name = tensor("op_694_cast_fp16")]; + tensor var_696_equation_0 = const()[name = tensor("op_696_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_696_cast_fp16 = einsum(equation = var_696_equation_0, values = (var_442_cast_fp16, var_312_cast_fp16))[name = tensor("op_696_cast_fp16")]; + tensor var_697_to_fp16 = const()[name = tensor("op_697_to_fp16"), val = tensor(0x1p-3)]; + tensor var_698_cast_fp16 = mul(x = var_696_cast_fp16, y = var_697_to_fp16)[name = tensor("op_698_cast_fp16")]; + tensor var_700_equation_0 = const()[name = tensor("op_700_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_700_cast_fp16 = einsum(equation = var_700_equation_0, values = (var_442_cast_fp16, var_316_cast_fp16))[name = tensor("op_700_cast_fp16")]; + tensor var_701_to_fp16 = const()[name = tensor("op_701_to_fp16"), val = tensor(0x1p-3)]; + tensor var_702_cast_fp16 = mul(x = var_700_cast_fp16, y = var_701_to_fp16)[name = tensor("op_702_cast_fp16")]; + tensor var_704_equation_0 = const()[name = tensor("op_704_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_704_cast_fp16 = einsum(equation = var_704_equation_0, values = (var_442_cast_fp16, var_320_cast_fp16))[name = tensor("op_704_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1p-3)]; + tensor var_706_cast_fp16 = mul(x = var_704_cast_fp16, y = var_705_to_fp16)[name = tensor("op_706_cast_fp16")]; + tensor var_708_equation_0 = const()[name = tensor("op_708_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_708_cast_fp16 = einsum(equation = var_708_equation_0, values = (var_442_cast_fp16, var_324_cast_fp16))[name = tensor("op_708_cast_fp16")]; + tensor var_709_to_fp16 = const()[name = tensor("op_709_to_fp16"), val = tensor(0x1p-3)]; + tensor var_710_cast_fp16 = mul(x = var_708_cast_fp16, y = var_709_to_fp16)[name = tensor("op_710_cast_fp16")]; + tensor var_712_equation_0 = const()[name = tensor("op_712_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_712_cast_fp16 = einsum(equation = var_712_equation_0, values = (var_458_cast_fp16, var_328_cast_fp16))[name = tensor("op_712_cast_fp16")]; + tensor var_713_to_fp16 = const()[name = tensor("op_713_to_fp16"), val = tensor(0x1p-3)]; + tensor var_714_cast_fp16 = mul(x = var_712_cast_fp16, y = var_713_to_fp16)[name = tensor("op_714_cast_fp16")]; + tensor var_716_equation_0 = const()[name = tensor("op_716_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_716_cast_fp16 = einsum(equation = var_716_equation_0, values = (var_458_cast_fp16, var_332_cast_fp16))[name = tensor("op_716_cast_fp16")]; + tensor var_717_to_fp16 = const()[name = tensor("op_717_to_fp16"), val = tensor(0x1p-3)]; + tensor var_718_cast_fp16 = mul(x = var_716_cast_fp16, y = var_717_to_fp16)[name = tensor("op_718_cast_fp16")]; + tensor var_720_equation_0 = const()[name = tensor("op_720_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_720_cast_fp16 = einsum(equation = var_720_equation_0, values = (var_458_cast_fp16, var_336_cast_fp16))[name = tensor("op_720_cast_fp16")]; + tensor var_721_to_fp16 = const()[name = tensor("op_721_to_fp16"), val = tensor(0x1p-3)]; + tensor var_722_cast_fp16 = mul(x = var_720_cast_fp16, y = var_721_to_fp16)[name = tensor("op_722_cast_fp16")]; + tensor var_724_equation_0 = const()[name = tensor("op_724_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_724_cast_fp16 = einsum(equation = var_724_equation_0, values = (var_458_cast_fp16, var_340_cast_fp16))[name = tensor("op_724_cast_fp16")]; + tensor var_725_to_fp16 = const()[name = tensor("op_725_to_fp16"), val = tensor(0x1p-3)]; + tensor var_726_cast_fp16 = mul(x = var_724_cast_fp16, y = var_725_to_fp16)[name = tensor("op_726_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_602_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_606_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_610_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_614_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_618_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_622_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_626_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_630_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_634_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_638_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_642_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_646_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_650_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_654_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_658_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_662_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_666_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_670_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_674_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_678_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_682_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_686_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_690_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_694_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_698_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_702_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_706_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_710_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_714_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_718_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_722_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_726_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_69, x = aw_1_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_69, x = aw_3_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_69, x = aw_5_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_69, x = aw_7_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_69, x = aw_9_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_69, x = aw_11_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_69, x = aw_13_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_69, x = aw_15_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_69, x = aw_17_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_69, x = aw_19_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_69, x = aw_21_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_69, x = aw_23_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_69, x = aw_25_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_69, x = aw_27_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_69, x = aw_29_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_69, x = aw_31_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_69, x = aw_33_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_69, x = aw_35_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_69, x = aw_37_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_69, x = aw_39_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_69, x = aw_41_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_69, x = aw_43_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = softmax(axis = var_69, x = aw_45_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = softmax(axis = var_69, x = aw_47_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = softmax(axis = var_69, x = aw_49_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_69, x = aw_51_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_785_cast_fp16 = softmax(axis = var_69, x = aw_53_cast_fp16)[name = tensor("op_785_cast_fp16")]; + tensor var_786_cast_fp16 = softmax(axis = var_69, x = aw_55_cast_fp16)[name = tensor("op_786_cast_fp16")]; + tensor var_787_cast_fp16 = softmax(axis = var_69, x = aw_57_cast_fp16)[name = tensor("op_787_cast_fp16")]; + tensor var_788_cast_fp16 = softmax(axis = var_69, x = aw_59_cast_fp16)[name = tensor("op_788_cast_fp16")]; + tensor var_789_cast_fp16 = softmax(axis = var_69, x = aw_61_cast_fp16)[name = tensor("op_789_cast_fp16")]; + tensor var_790_cast_fp16 = softmax(axis = var_69, x = aw_63_cast_fp16)[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_472_cast_fp16, var_759_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_472_cast_fp16, var_760_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_472_cast_fp16, var_761_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_472_cast_fp16, var_762_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_488_cast_fp16, var_763_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_488_cast_fp16, var_764_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_488_cast_fp16, var_765_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_488_cast_fp16, var_766_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_504_cast_fp16, var_767_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_504_cast_fp16, var_768_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_504_cast_fp16, var_769_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_504_cast_fp16, var_770_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_520_cast_fp16, var_771_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_520_cast_fp16, var_772_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_520_cast_fp16, var_773_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_520_cast_fp16, var_774_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_536_cast_fp16, var_775_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_536_cast_fp16, var_776_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_536_cast_fp16, var_777_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_536_cast_fp16, var_778_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_552_cast_fp16, var_779_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_552_cast_fp16, var_780_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_552_cast_fp16, var_781_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_552_cast_fp16, var_782_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_568_cast_fp16, var_783_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_568_cast_fp16, var_784_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_568_cast_fp16, var_785_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_568_cast_fp16, var_786_cast_fp16))[name = tensor("op_846_cast_fp16")]; + tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_584_cast_fp16, var_787_cast_fp16))[name = tensor("op_848_cast_fp16")]; + tensor var_850_equation_0 = const()[name = tensor("op_850_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_850_cast_fp16 = einsum(equation = var_850_equation_0, values = (var_584_cast_fp16, var_788_cast_fp16))[name = tensor("op_850_cast_fp16")]; + tensor var_852_equation_0 = const()[name = tensor("op_852_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_852_cast_fp16 = einsum(equation = var_852_equation_0, values = (var_584_cast_fp16, var_789_cast_fp16))[name = tensor("op_852_cast_fp16")]; + tensor var_854_equation_0 = const()[name = tensor("op_854_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_854_cast_fp16 = einsum(equation = var_854_equation_0, values = (var_584_cast_fp16, var_790_cast_fp16))[name = tensor("op_854_cast_fp16")]; + tensor x_15_interleave_0 = const()[name = tensor("x_15_interleave_0"), val = tensor(false)]; + tensor x_15_cast_fp16 = concat(axis = var_69, interleave = x_15_interleave_0, values = (var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16, var_850_cast_fp16, var_852_cast_fp16, var_854_cast_fp16))[name = tensor("x_15_cast_fp16")]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 2048, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_859, x = x_15_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_862 = const()[name = tensor("op_862"), val = tensor([1, 1])]; + tensor var_864 = const()[name = tensor("op_864"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12587520)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_864, groups = var_69, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_862, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_17_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20976192)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_69, interleave = x_eps_3_interleave_0, values = (x_17_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_72, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_890_to_fp16 = const()[name = tensor("op_890_to_fp16"), val = tensor(0x1.6ap+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_890_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20976384)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_901 = const()[name = tensor("op_901"), val = tensor([1, 1])]; + tensor var_903 = const()[name = tensor("op_903"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20980544)))]; + tensor input_7_cast_fp16 = conv(dilations = var_903, groups = var_69, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_901, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, 1])]; + tensor var_909 = const()[name = tensor("op_909"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54535040)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_909, groups = var_69, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_907, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_912_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_912_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 1])]; + tensor var_917 = const()[name = tensor("op_917"), val = tensor([1, 1])]; + tensor var_919_pad_type_0 = const()[name = tensor("op_919_pad_type_0"), val = tensor("custom")]; + tensor var_919_pad_0 = const()[name = tensor("op_919_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88089536)))]; + tensor var_919_cast_fp16 = conv(dilations = var_917, groups = var_69, pad = var_919_pad_0, pad_type = var_919_pad_type_0, strides = var_915, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_919_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = var_919_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_925 = const()[name = tensor("op_925"), val = tensor(-1)]; + tensor var_929 = const()[name = tensor("op_929"), val = tensor(-2)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(-3)]; + tensor var_973 = const()[name = tensor("op_973"), val = tensor(1)]; + tensor var_976 = const()[name = tensor("op_976"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121644032)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_973, interleave = x_eps_5_interleave_0, values = (x_21_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_976, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_21_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_999_to_fp16 = const()[name = tensor("op_999_to_fp16"), val = tensor(0x1.6ap+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_999_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121644224)))]; + tensor x_25_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_1023 = const()[name = tensor("op_1023"), val = tensor([1, 2048, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1023, x = x_25_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1026 = const()[name = tensor("op_1026"), val = tensor([1, 1])]; + tensor var_1028 = const()[name = tensor("op_1028"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121648384)))]; + tensor q_9_cast_fp16 = conv(dilations = var_1028, groups = var_973, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_1026, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1032 = const()[name = tensor("op_1032"), val = tensor([1, 1])]; + tensor var_1034 = const()[name = tensor("op_1034"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130037056)))]; + tensor k_13_cast_fp16 = conv(dilations = var_1034, groups = var_973, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_1032, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1038 = const()[name = tensor("op_1038"), val = tensor([1, 1])]; + tensor var_1040 = const()[name = tensor("op_1040"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132134272)))]; + tensor v_11_cast_fp16 = conv(dilations = var_1040, groups = var_973, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_1038, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1043 = const()[name = tensor("op_1043"), val = tensor([1, 32, 64, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1043, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, -1, 64, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_1045, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_1059_begin_0 = const()[name = tensor("op_1059_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1059_end_0 = const()[name = tensor("op_1059_end_0"), val = tensor([1, 32, 32, 64])]; + tensor var_1059_end_mask_0 = const()[name = tensor("op_1059_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1059_cast_fp16 = slice_by_index(begin = var_1059_begin_0, end = var_1059_end_0, end_mask = var_1059_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1059_cast_fp16")]; + tensor var_1065_begin_0 = const()[name = tensor("op_1065_begin_0"), val = tensor([0, 0, 32, 0])]; + tensor var_1065_end_0 = const()[name = tensor("op_1065_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1065_end_mask_0 = const()[name = tensor("op_1065_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1065_cast_fp16 = slice_by_index(begin = var_1065_begin_0, end = var_1065_end_0, end_mask = var_1065_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1065_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1067_cast_fp16 = mul(x = var_1065_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_1067_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_929, interleave = rotated_5_interleave_0, values = (var_1067_cast_fp16, var_1059_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1070_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1070_cast_fp16")]; + tensor var_1071_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1071_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1070_cast_fp16, y = var_1071_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1084_begin_0 = const()[name = tensor("op_1084_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1084_end_0 = const()[name = tensor("op_1084_end_0"), val = tensor([1, 8, 32, 64])]; + tensor var_1084_end_mask_0 = const()[name = tensor("op_1084_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1084_cast_fp16 = slice_by_index(begin = var_1084_begin_0, end = var_1084_end_0, end_mask = var_1084_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_1084_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 32, 0])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1092_cast_fp16 = mul(x = var_1090_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1092_cast_fp16")]; + tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_929, interleave = rotated_7_interleave_0, values = (var_1092_cast_fp16, var_1084_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_1095_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_1095_cast_fp16")]; + tensor var_1096_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_1096_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_1095_cast_fp16, y = var_1096_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor var_1099 = const()[name = tensor("op_1099"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_1099, x = roped_7_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_1101 = const()[name = tensor("op_1101"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1101, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_23_interleave_0 = const()[name = tensor("k_23_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_2")]; + tensor k_23_cast_fp16 = concat(axis = var_931, interleave = k_23_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_23_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_925, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1117 = const()[name = tensor("op_1117"), val = tensor([1, 2048, 1, -1])]; + tensor q_15_cast_fp16 = reshape(shape = var_1117, x = roped_5_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 64, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 192, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 320, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 448, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 576, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 704, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 832, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 960, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 1088, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 1216, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 1344, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 1344, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 1472, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 1472, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 1600, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 1600, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 1728, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 1728, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 1856, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 1856, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 1984, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 1984, 0, 0])]; + tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 192])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 320])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 448])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 64, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 192, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 320, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 448, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1252_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1252_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1252_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1268_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1268_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1268_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1284_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1284_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1284_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1300_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1300_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1300_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1316_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1316_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1316_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1332_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1332_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1332_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1348_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1348_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1348_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1364_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1364_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1364_cast_fp16, var_1246_cast_fp16))[name = tensor("op_1630_cast_fp16")]; + tensor var_1631_to_fp16 = const()[name = tensor("op_1631_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1632_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1631_to_fp16)[name = tensor("op_1632_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_127_cast_fp16 = add(x = var_1632_cast_fp16, y = mask)[name = tensor("aw_127_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_973, x = aw_65_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_973, x = aw_67_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_973, x = aw_69_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_973, x = aw_71_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_973, x = aw_73_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_973, x = aw_75_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_973, x = aw_77_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_973, x = aw_79_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_973, x = aw_81_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_973, x = aw_83_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_973, x = aw_85_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_973, x = aw_87_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_973, x = aw_89_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_973, x = aw_91_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_973, x = aw_93_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_973, x = aw_95_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_973, x = aw_97_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_973, x = aw_99_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_973, x = aw_101_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_973, x = aw_103_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_973, x = aw_105_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_973, x = aw_107_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_973, x = aw_109_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_973, x = aw_111_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_973, x = aw_113_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_973, x = aw_115_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_973, x = aw_117_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_973, x = aw_119_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1693_cast_fp16 = softmax(axis = var_973, x = aw_121_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1694_cast_fp16 = softmax(axis = var_973, x = aw_123_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1695_cast_fp16 = softmax(axis = var_973, x = aw_125_cast_fp16)[name = tensor("op_1695_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_973, x = aw_127_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1378_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1378_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1378_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1378_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1394_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1394_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1394_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1394_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1410_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1410_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1410_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1410_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1426_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1426_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1426_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1426_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1442_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1442_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1442_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1442_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1458_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1458_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1458_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1458_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1474_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1474_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1474_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1474_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1490_cast_fp16, var_1693_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1490_cast_fp16, var_1694_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1490_cast_fp16, var_1695_cast_fp16))[name = tensor("op_1758_cast_fp16")]; + tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1490_cast_fp16, var_1696_cast_fp16))[name = tensor("op_1760_cast_fp16")]; + tensor x_35_interleave_0 = const()[name = tensor("x_35_interleave_0"), val = tensor(false)]; + tensor x_35_cast_fp16 = concat(axis = var_973, interleave = x_35_interleave_0, values = (var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16))[name = tensor("x_35_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 2048, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1765, x = x_35_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1768 = const()[name = tensor("op_1768"), val = tensor([1, 1])]; + tensor var_1770 = const()[name = tensor("op_1770"), val = tensor([1, 1])]; + tensor attention_output_3_pad_type_0 = const()[name = tensor("attention_output_3_pad_type_0"), val = tensor("custom")]; + tensor attention_output_3_pad_0 = const()[name = tensor("attention_output_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134231488)))]; + tensor attention_output_3_cast_fp16 = conv(dilations = var_1770, groups = var_973, pad = attention_output_3_pad_0, pad_type = attention_output_3_pad_type_0, strides = var_1768, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_37_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_21_cast_fp16)[name = tensor("x_37_cast_fp16")]; + tensor x_eps_7_interleave_0 = const()[name = tensor("x_eps_7_interleave_0"), val = tensor(false)]; + tensor eps_chan_7_to_fp16 = const()[name = tensor("eps_chan_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142620160)))]; + tensor x_eps_7_cast_fp16 = concat(axis = var_973, interleave = x_eps_7_interleave_0, values = (x_37_cast_fp16, eps_chan_7_to_fp16))[name = tensor("x_eps_7_cast_fp16")]; + tensor norm_x_7_axes_0 = const()[name = tensor("norm_x_7_axes_0"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_l2_norm(axes = norm_x_7_axes_0, keep_dims = var_976, x = x_eps_7_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_37_cast_fp16, y = norm_x_7_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1796_to_fp16 = const()[name = tensor("op_1796_to_fp16"), val = tensor(0x1.6ap+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1796_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142620352)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1807 = const()[name = tensor("op_1807"), val = tensor([1, 1])]; + tensor var_1809 = const()[name = tensor("op_1809"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142624512)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1809, groups = var_973, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1807, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1813 = const()[name = tensor("op_1813"), val = tensor([1, 1])]; + tensor var_1815 = const()[name = tensor("op_1815"), val = tensor([1, 1])]; + tensor x_fc_2_3_pad_type_0 = const()[name = tensor("x_fc_2_3_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_3_pad_0 = const()[name = tensor("x_fc_2_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176179008)))]; + tensor x_fc_2_3_cast_fp16 = conv(dilations = var_1815, groups = var_973, pad = x_fc_2_3_pad_0, pad_type = x_fc_2_3_pad_type_0, strides = var_1813, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_1818_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1818_cast_fp16")]; + tensor input_19_cast_fp16 = mul(x = var_1818_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_1821 = const()[name = tensor("op_1821"), val = tensor([1, 1])]; + tensor var_1823 = const()[name = tensor("op_1823"), val = tensor([1, 1])]; + tensor var_1825_pad_type_0 = const()[name = tensor("op_1825_pad_type_0"), val = tensor("custom")]; + tensor var_1825_pad_0 = const()[name = tensor("op_1825_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209733504)))]; + tensor var_1825_cast_fp16 = conv(dilations = var_1823, groups = var_973, pad = var_1825_pad_0, pad_type = var_1825_pad_type_0, strides = var_1821, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_19_cast_fp16)[name = tensor("op_1825_cast_fp16")]; + tensor x_41_cast_fp16 = add(x = var_1825_cast_fp16, y = x_37_cast_fp16)[name = tensor("x_41_cast_fp16")]; + tensor var_1831 = const()[name = tensor("op_1831"), val = tensor(-1)]; + tensor var_1835 = const()[name = tensor("op_1835"), val = tensor(-2)]; + tensor var_1837 = const()[name = tensor("op_1837"), val = tensor(-3)]; + tensor var_1879 = const()[name = tensor("op_1879"), val = tensor(1)]; + tensor var_1882 = const()[name = tensor("op_1882"), val = tensor(true)]; + tensor x_eps_9_interleave_0 = const()[name = tensor("x_eps_9_interleave_0"), val = tensor(false)]; + tensor eps_chan_9_to_fp16 = const()[name = tensor("eps_chan_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(243288000)))]; + tensor x_eps_9_cast_fp16 = concat(axis = var_1879, interleave = x_eps_9_interleave_0, values = (x_41_cast_fp16, eps_chan_9_to_fp16))[name = tensor("x_eps_9_cast_fp16")]; + tensor norm_x_9_axes_0 = const()[name = tensor("norm_x_9_axes_0"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_l2_norm(axes = norm_x_9_axes_0, keep_dims = var_1882, x = x_eps_9_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor x_normed_25_cast_fp16 = real_div(x = x_41_cast_fp16, y = norm_x_9_cast_fp16)[name = tensor("x_normed_25_cast_fp16")]; + tensor var_1905_to_fp16 = const()[name = tensor("op_1905_to_fp16"), val = tensor(0x1.6ap+5)]; + tensor x_normed_27_cast_fp16 = mul(x = x_normed_25_cast_fp16, y = var_1905_to_fp16)[name = tensor("x_normed_27_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(243288192)))]; + tensor x_45_cast_fp16 = mul(x = x_normed_27_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_45_cast_fp16")]; + tensor var_1929 = const()[name = tensor("op_1929"), val = tensor([1, 2048, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_1929, x = x_45_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_1932 = const()[name = tensor("op_1932"), val = tensor([1, 1])]; + tensor var_1934 = const()[name = tensor("op_1934"), val = tensor([1, 1])]; + tensor q_17_pad_type_0 = const()[name = tensor("q_17_pad_type_0"), val = tensor("custom")]; + tensor q_17_pad_0 = const()[name = tensor("q_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_2_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(243292352)))]; + tensor q_17_cast_fp16 = conv(dilations = var_1934, groups = var_1879, pad = q_17_pad_0, pad_type = q_17_pad_type_0, strides = var_1932, weight = blocks_2_attn_q_proj_weight_to_fp16, x = input_21_cast_fp16)[name = tensor("q_17_cast_fp16")]; + tensor var_1938 = const()[name = tensor("op_1938"), val = tensor([1, 1])]; + tensor var_1940 = const()[name = tensor("op_1940"), val = tensor([1, 1])]; + tensor k_25_pad_type_0 = const()[name = tensor("k_25_pad_type_0"), val = tensor("custom")]; + tensor k_25_pad_0 = const()[name = tensor("k_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_2_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251681024)))]; + tensor k_25_cast_fp16 = conv(dilations = var_1940, groups = var_1879, pad = k_25_pad_0, pad_type = k_25_pad_type_0, strides = var_1938, weight = blocks_2_attn_k_proj_weight_to_fp16, x = input_21_cast_fp16)[name = tensor("k_25_cast_fp16")]; + tensor var_1944 = const()[name = tensor("op_1944"), val = tensor([1, 1])]; + tensor var_1946 = const()[name = tensor("op_1946"), val = tensor([1, 1])]; + tensor v_21_pad_type_0 = const()[name = tensor("v_21_pad_type_0"), val = tensor("custom")]; + tensor v_21_pad_0 = const()[name = tensor("v_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_2_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(253778240)))]; + tensor v_21_cast_fp16 = conv(dilations = var_1946, groups = var_1879, pad = v_21_pad_0, pad_type = v_21_pad_type_0, strides = var_1944, weight = blocks_2_attn_v_proj_weight_to_fp16, x = input_21_cast_fp16)[name = tensor("v_21_cast_fp16")]; + tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 32, 64, 64])]; + tensor q_19_cast_fp16 = reshape(shape = var_1949, x = q_17_cast_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_1951 = const()[name = tensor("op_1951"), val = tensor([1, -1, 64, 64])]; + tensor k_27_cast_fp16 = reshape(shape = var_1951, x = k_25_cast_fp16)[name = tensor("k_27_cast_fp16")]; + tensor var_1965_begin_0 = const()[name = tensor("op_1965_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1965_end_0 = const()[name = tensor("op_1965_end_0"), val = tensor([1, 32, 32, 64])]; + tensor var_1965_end_mask_0 = const()[name = tensor("op_1965_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1965_cast_fp16 = slice_by_index(begin = var_1965_begin_0, end = var_1965_end_0, end_mask = var_1965_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1965_cast_fp16")]; + tensor var_1971_begin_0 = const()[name = tensor("op_1971_begin_0"), val = tensor([0, 0, 32, 0])]; + tensor var_1971_end_0 = const()[name = tensor("op_1971_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1971_end_mask_0 = const()[name = tensor("op_1971_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1971_cast_fp16 = slice_by_index(begin = var_1971_begin_0, end = var_1971_end_0, end_mask = var_1971_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1971_cast_fp16")]; + tensor const_50_promoted_to_fp16 = const()[name = tensor("const_50_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1973_cast_fp16 = mul(x = var_1971_cast_fp16, y = const_50_promoted_to_fp16)[name = tensor("op_1973_cast_fp16")]; + tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_1835, interleave = rotated_9_interleave_0, values = (var_1973_cast_fp16, var_1965_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_1976_cast_fp16 = mul(x = q_19_cast_fp16, y = cos)[name = tensor("op_1976_cast_fp16")]; + tensor var_1977_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_1977_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_1976_cast_fp16, y = var_1977_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_1990_begin_0 = const()[name = tensor("op_1990_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1990_end_0 = const()[name = tensor("op_1990_end_0"), val = tensor([1, 8, 32, 64])]; + tensor var_1990_end_mask_0 = const()[name = tensor("op_1990_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1990_cast_fp16 = slice_by_index(begin = var_1990_begin_0, end = var_1990_end_0, end_mask = var_1990_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_1990_cast_fp16")]; + tensor var_1996_begin_0 = const()[name = tensor("op_1996_begin_0"), val = tensor([0, 0, 32, 0])]; + tensor var_1996_end_0 = const()[name = tensor("op_1996_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_1996_end_mask_0 = const()[name = tensor("op_1996_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1996_cast_fp16 = slice_by_index(begin = var_1996_begin_0, end = var_1996_end_0, end_mask = var_1996_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_1996_cast_fp16")]; + tensor const_52_promoted_to_fp16 = const()[name = tensor("const_52_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1998_cast_fp16 = mul(x = var_1996_cast_fp16, y = const_52_promoted_to_fp16)[name = tensor("op_1998_cast_fp16")]; + tensor rotated_11_interleave_0 = const()[name = tensor("rotated_11_interleave_0"), val = tensor(false)]; + tensor rotated_11_cast_fp16 = concat(axis = var_1835, interleave = rotated_11_interleave_0, values = (var_1998_cast_fp16, var_1990_cast_fp16))[name = tensor("rotated_11_cast_fp16")]; + tensor var_2001_cast_fp16 = mul(x = k_27_cast_fp16, y = cos)[name = tensor("op_2001_cast_fp16")]; + tensor var_2002_cast_fp16 = mul(x = rotated_11_cast_fp16, y = sin)[name = tensor("op_2002_cast_fp16")]; + tensor roped_11_cast_fp16 = add(x = var_2001_cast_fp16, y = var_2002_cast_fp16)[name = tensor("roped_11_cast_fp16")]; + tensor var_2005 = const()[name = tensor("op_2005"), val = tensor([1, -1, 1, 64])]; + tensor k_31_cast_fp16 = reshape(shape = var_2005, x = roped_11_cast_fp16)[name = tensor("k_31_cast_fp16")]; + tensor var_2007 = const()[name = tensor("op_2007"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_2 = reshape(shape = var_2007, x = v_21_cast_fp16)[name = tensor("new_v_cache_2_type_fp32_cast_fp16")]; + tensor k_33_perm_0 = const()[name = tensor("k_33_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_35_interleave_0 = const()[name = tensor("k_35_interleave_0"), val = tensor(false)]; + tensor new_k_cache_2 = transpose(perm = k_33_perm_0, x = k_31_cast_fp16)[name = tensor("transpose_1")]; + tensor k_35_cast_fp16 = concat(axis = var_1837, interleave = k_35_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_35_cast_fp16")]; + tensor v_27_interleave_0 = const()[name = tensor("v_27_interleave_0"), val = tensor(false)]; + tensor v_27_cast_fp16 = concat(axis = var_1831, interleave = v_27_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_27_cast_fp16")]; + tensor var_2023 = const()[name = tensor("op_2023"), val = tensor([1, 2048, 1, -1])]; + tensor q_23_cast_fp16 = reshape(shape = var_2023, x = roped_9_cast_fp16)[name = tensor("q_23_cast_fp16")]; + tensor var_2028_begin_0 = const()[name = tensor("op_2028_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2028_end_0 = const()[name = tensor("op_2028_end_0"), val = tensor([1, 64, 1, 64])]; + tensor var_2028_end_mask_0 = const()[name = tensor("op_2028_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2028_cast_fp16 = slice_by_index(begin = var_2028_begin_0, end = var_2028_end_0, end_mask = var_2028_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2028_cast_fp16")]; + tensor var_2032_begin_0 = const()[name = tensor("op_2032_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_2032_end_0 = const()[name = tensor("op_2032_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2032_end_mask_0 = const()[name = tensor("op_2032_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2032_cast_fp16 = slice_by_index(begin = var_2032_begin_0, end = var_2032_end_0, end_mask = var_2032_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2032_cast_fp16")]; + tensor var_2036_begin_0 = const()[name = tensor("op_2036_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2036_end_0 = const()[name = tensor("op_2036_end_0"), val = tensor([1, 192, 1, 64])]; + tensor var_2036_end_mask_0 = const()[name = tensor("op_2036_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2036_cast_fp16 = slice_by_index(begin = var_2036_begin_0, end = var_2036_end_0, end_mask = var_2036_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2036_cast_fp16")]; + tensor var_2040_begin_0 = const()[name = tensor("op_2040_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_2040_end_0 = const()[name = tensor("op_2040_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2040_end_mask_0 = const()[name = tensor("op_2040_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2040_cast_fp16 = slice_by_index(begin = var_2040_begin_0, end = var_2040_end_0, end_mask = var_2040_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2040_cast_fp16")]; + tensor var_2044_begin_0 = const()[name = tensor("op_2044_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2044_end_0 = const()[name = tensor("op_2044_end_0"), val = tensor([1, 320, 1, 64])]; + tensor var_2044_end_mask_0 = const()[name = tensor("op_2044_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2044_cast_fp16 = slice_by_index(begin = var_2044_begin_0, end = var_2044_end_0, end_mask = var_2044_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2044_cast_fp16")]; + tensor var_2048_begin_0 = const()[name = tensor("op_2048_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_2048_end_0 = const()[name = tensor("op_2048_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2048_end_mask_0 = const()[name = tensor("op_2048_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2048_cast_fp16 = slice_by_index(begin = var_2048_begin_0, end = var_2048_end_0, end_mask = var_2048_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2048_cast_fp16")]; + tensor var_2052_begin_0 = const()[name = tensor("op_2052_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2052_end_0 = const()[name = tensor("op_2052_end_0"), val = tensor([1, 448, 1, 64])]; + tensor var_2052_end_mask_0 = const()[name = tensor("op_2052_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2052_cast_fp16 = slice_by_index(begin = var_2052_begin_0, end = var_2052_end_0, end_mask = var_2052_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2052_cast_fp16")]; + tensor var_2056_begin_0 = const()[name = tensor("op_2056_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_2056_end_0 = const()[name = tensor("op_2056_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2056_end_mask_0 = const()[name = tensor("op_2056_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2056_cast_fp16 = slice_by_index(begin = var_2056_begin_0, end = var_2056_end_0, end_mask = var_2056_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2056_cast_fp16")]; + tensor var_2060_begin_0 = const()[name = tensor("op_2060_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2060_end_0 = const()[name = tensor("op_2060_end_0"), val = tensor([1, 576, 1, 64])]; + tensor var_2060_end_mask_0 = const()[name = tensor("op_2060_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2060_cast_fp16 = slice_by_index(begin = var_2060_begin_0, end = var_2060_end_0, end_mask = var_2060_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2060_cast_fp16")]; + tensor var_2064_begin_0 = const()[name = tensor("op_2064_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_2064_end_0 = const()[name = tensor("op_2064_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2064_end_mask_0 = const()[name = tensor("op_2064_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2064_cast_fp16 = slice_by_index(begin = var_2064_begin_0, end = var_2064_end_0, end_mask = var_2064_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2064_cast_fp16")]; + tensor var_2068_begin_0 = const()[name = tensor("op_2068_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2068_end_0 = const()[name = tensor("op_2068_end_0"), val = tensor([1, 704, 1, 64])]; + tensor var_2068_end_mask_0 = const()[name = tensor("op_2068_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2068_cast_fp16 = slice_by_index(begin = var_2068_begin_0, end = var_2068_end_0, end_mask = var_2068_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2068_cast_fp16")]; + tensor var_2072_begin_0 = const()[name = tensor("op_2072_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_2072_end_0 = const()[name = tensor("op_2072_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2072_end_mask_0 = const()[name = tensor("op_2072_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2072_cast_fp16 = slice_by_index(begin = var_2072_begin_0, end = var_2072_end_0, end_mask = var_2072_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2072_cast_fp16")]; + tensor var_2076_begin_0 = const()[name = tensor("op_2076_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2076_end_0 = const()[name = tensor("op_2076_end_0"), val = tensor([1, 832, 1, 64])]; + tensor var_2076_end_mask_0 = const()[name = tensor("op_2076_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2076_cast_fp16 = slice_by_index(begin = var_2076_begin_0, end = var_2076_end_0, end_mask = var_2076_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2076_cast_fp16")]; + tensor var_2080_begin_0 = const()[name = tensor("op_2080_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_2080_end_0 = const()[name = tensor("op_2080_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2080_end_mask_0 = const()[name = tensor("op_2080_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2080_cast_fp16 = slice_by_index(begin = var_2080_begin_0, end = var_2080_end_0, end_mask = var_2080_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2080_cast_fp16")]; + tensor var_2084_begin_0 = const()[name = tensor("op_2084_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2084_end_0 = const()[name = tensor("op_2084_end_0"), val = tensor([1, 960, 1, 64])]; + tensor var_2084_end_mask_0 = const()[name = tensor("op_2084_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2084_cast_fp16 = slice_by_index(begin = var_2084_begin_0, end = var_2084_end_0, end_mask = var_2084_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2084_cast_fp16")]; + tensor var_2088_begin_0 = const()[name = tensor("op_2088_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_2088_end_0 = const()[name = tensor("op_2088_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2088_end_mask_0 = const()[name = tensor("op_2088_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2088_cast_fp16 = slice_by_index(begin = var_2088_begin_0, end = var_2088_end_0, end_mask = var_2088_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2088_cast_fp16")]; + tensor var_2092_begin_0 = const()[name = tensor("op_2092_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2092_end_0 = const()[name = tensor("op_2092_end_0"), val = tensor([1, 1088, 1, 64])]; + tensor var_2092_end_mask_0 = const()[name = tensor("op_2092_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2092_cast_fp16 = slice_by_index(begin = var_2092_begin_0, end = var_2092_end_0, end_mask = var_2092_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2092_cast_fp16")]; + tensor var_2096_begin_0 = const()[name = tensor("op_2096_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_2096_end_0 = const()[name = tensor("op_2096_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_2096_end_mask_0 = const()[name = tensor("op_2096_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2096_cast_fp16 = slice_by_index(begin = var_2096_begin_0, end = var_2096_end_0, end_mask = var_2096_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2096_cast_fp16")]; + tensor var_2100_begin_0 = const()[name = tensor("op_2100_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2100_end_0 = const()[name = tensor("op_2100_end_0"), val = tensor([1, 1216, 1, 64])]; + tensor var_2100_end_mask_0 = const()[name = tensor("op_2100_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2100_cast_fp16 = slice_by_index(begin = var_2100_begin_0, end = var_2100_end_0, end_mask = var_2100_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2100_cast_fp16")]; + tensor var_2104_begin_0 = const()[name = tensor("op_2104_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_2104_end_0 = const()[name = tensor("op_2104_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_2104_end_mask_0 = const()[name = tensor("op_2104_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2104_cast_fp16 = slice_by_index(begin = var_2104_begin_0, end = var_2104_end_0, end_mask = var_2104_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2104_cast_fp16")]; + tensor var_2108_begin_0 = const()[name = tensor("op_2108_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2108_end_0 = const()[name = tensor("op_2108_end_0"), val = tensor([1, 1344, 1, 64])]; + tensor var_2108_end_mask_0 = const()[name = tensor("op_2108_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2108_cast_fp16 = slice_by_index(begin = var_2108_begin_0, end = var_2108_end_0, end_mask = var_2108_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2108_cast_fp16")]; + tensor var_2112_begin_0 = const()[name = tensor("op_2112_begin_0"), val = tensor([0, 1344, 0, 0])]; + tensor var_2112_end_0 = const()[name = tensor("op_2112_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_2112_end_mask_0 = const()[name = tensor("op_2112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2112_cast_fp16 = slice_by_index(begin = var_2112_begin_0, end = var_2112_end_0, end_mask = var_2112_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2112_cast_fp16")]; + tensor var_2116_begin_0 = const()[name = tensor("op_2116_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2116_end_0 = const()[name = tensor("op_2116_end_0"), val = tensor([1, 1472, 1, 64])]; + tensor var_2116_end_mask_0 = const()[name = tensor("op_2116_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2116_cast_fp16 = slice_by_index(begin = var_2116_begin_0, end = var_2116_end_0, end_mask = var_2116_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2116_cast_fp16")]; + tensor var_2120_begin_0 = const()[name = tensor("op_2120_begin_0"), val = tensor([0, 1472, 0, 0])]; + tensor var_2120_end_0 = const()[name = tensor("op_2120_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_2120_end_mask_0 = const()[name = tensor("op_2120_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2120_cast_fp16 = slice_by_index(begin = var_2120_begin_0, end = var_2120_end_0, end_mask = var_2120_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2120_cast_fp16")]; + tensor var_2124_begin_0 = const()[name = tensor("op_2124_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2124_end_0 = const()[name = tensor("op_2124_end_0"), val = tensor([1, 1600, 1, 64])]; + tensor var_2124_end_mask_0 = const()[name = tensor("op_2124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2124_cast_fp16 = slice_by_index(begin = var_2124_begin_0, end = var_2124_end_0, end_mask = var_2124_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2124_cast_fp16")]; + tensor var_2128_begin_0 = const()[name = tensor("op_2128_begin_0"), val = tensor([0, 1600, 0, 0])]; + tensor var_2128_end_0 = const()[name = tensor("op_2128_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_2128_end_mask_0 = const()[name = tensor("op_2128_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2128_cast_fp16 = slice_by_index(begin = var_2128_begin_0, end = var_2128_end_0, end_mask = var_2128_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2128_cast_fp16")]; + tensor var_2132_begin_0 = const()[name = tensor("op_2132_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2132_end_0 = const()[name = tensor("op_2132_end_0"), val = tensor([1, 1728, 1, 64])]; + tensor var_2132_end_mask_0 = const()[name = tensor("op_2132_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2132_cast_fp16 = slice_by_index(begin = var_2132_begin_0, end = var_2132_end_0, end_mask = var_2132_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2132_cast_fp16")]; + tensor var_2136_begin_0 = const()[name = tensor("op_2136_begin_0"), val = tensor([0, 1728, 0, 0])]; + tensor var_2136_end_0 = const()[name = tensor("op_2136_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_2136_end_mask_0 = const()[name = tensor("op_2136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2136_cast_fp16 = slice_by_index(begin = var_2136_begin_0, end = var_2136_end_0, end_mask = var_2136_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2136_cast_fp16")]; + tensor var_2140_begin_0 = const()[name = tensor("op_2140_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2140_end_0 = const()[name = tensor("op_2140_end_0"), val = tensor([1, 1856, 1, 64])]; + tensor var_2140_end_mask_0 = const()[name = tensor("op_2140_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2140_cast_fp16 = slice_by_index(begin = var_2140_begin_0, end = var_2140_end_0, end_mask = var_2140_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2140_cast_fp16")]; + tensor var_2144_begin_0 = const()[name = tensor("op_2144_begin_0"), val = tensor([0, 1856, 0, 0])]; + tensor var_2144_end_0 = const()[name = tensor("op_2144_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_2144_end_mask_0 = const()[name = tensor("op_2144_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2144_cast_fp16 = slice_by_index(begin = var_2144_begin_0, end = var_2144_end_0, end_mask = var_2144_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2144_cast_fp16")]; + tensor var_2148_begin_0 = const()[name = tensor("op_2148_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2148_end_0 = const()[name = tensor("op_2148_end_0"), val = tensor([1, 1984, 1, 64])]; + tensor var_2148_end_mask_0 = const()[name = tensor("op_2148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2148_cast_fp16 = slice_by_index(begin = var_2148_begin_0, end = var_2148_end_0, end_mask = var_2148_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2148_cast_fp16")]; + tensor var_2152_begin_0 = const()[name = tensor("op_2152_begin_0"), val = tensor([0, 1984, 0, 0])]; + tensor var_2152_end_0 = const()[name = tensor("op_2152_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_2152_end_mask_0 = const()[name = tensor("op_2152_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2152_cast_fp16 = slice_by_index(begin = var_2152_begin_0, end = var_2152_end_0, end_mask = var_2152_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_2152_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = tensor("op_2158_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2158_end_0 = const()[name = tensor("op_2158_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2158_end_mask_0 = const()[name = tensor("op_2158_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor var_2174_begin_0 = const()[name = tensor("op_2174_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_2174_end_0 = const()[name = tensor("op_2174_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_2174_end_mask_0 = const()[name = tensor("op_2174_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2174_cast_fp16 = slice_by_index(begin = var_2174_begin_0, end = var_2174_end_0, end_mask = var_2174_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_2174_cast_fp16")]; + tensor var_2190_begin_0 = const()[name = tensor("op_2190_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2190_end_0 = const()[name = tensor("op_2190_end_0"), val = tensor([1, 512, 1, 192])]; + tensor var_2190_end_mask_0 = const()[name = tensor("op_2190_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2190_cast_fp16 = slice_by_index(begin = var_2190_begin_0, end = var_2190_end_0, end_mask = var_2190_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_2190_cast_fp16")]; + tensor var_2206_begin_0 = const()[name = tensor("op_2206_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_2206_end_0 = const()[name = tensor("op_2206_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_2206_end_mask_0 = const()[name = tensor("op_2206_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2206_cast_fp16 = slice_by_index(begin = var_2206_begin_0, end = var_2206_end_0, end_mask = var_2206_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_2206_cast_fp16")]; + tensor var_2222_begin_0 = const()[name = tensor("op_2222_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2222_end_0 = const()[name = tensor("op_2222_end_0"), val = tensor([1, 512, 1, 320])]; + tensor var_2222_end_mask_0 = const()[name = tensor("op_2222_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2222_cast_fp16 = slice_by_index(begin = var_2222_begin_0, end = var_2222_end_0, end_mask = var_2222_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_2222_cast_fp16")]; + tensor var_2238_begin_0 = const()[name = tensor("op_2238_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_2238_end_0 = const()[name = tensor("op_2238_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_2238_end_mask_0 = const()[name = tensor("op_2238_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2238_cast_fp16 = slice_by_index(begin = var_2238_begin_0, end = var_2238_end_0, end_mask = var_2238_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_2238_cast_fp16")]; + tensor var_2254_begin_0 = const()[name = tensor("op_2254_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2254_end_0 = const()[name = tensor("op_2254_end_0"), val = tensor([1, 512, 1, 448])]; + tensor var_2254_end_mask_0 = const()[name = tensor("op_2254_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2254_cast_fp16 = slice_by_index(begin = var_2254_begin_0, end = var_2254_end_0, end_mask = var_2254_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_2254_cast_fp16")]; + tensor var_2270_begin_0 = const()[name = tensor("op_2270_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_2270_end_0 = const()[name = tensor("op_2270_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2270_end_mask_0 = const()[name = tensor("op_2270_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2270_cast_fp16 = slice_by_index(begin = var_2270_begin_0, end = var_2270_end_0, end_mask = var_2270_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_2270_cast_fp16")]; + tensor var_2284_begin_0 = const()[name = tensor("op_2284_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2284_end_0 = const()[name = tensor("op_2284_end_0"), val = tensor([1, 64, 1, 512])]; + tensor var_2284_end_mask_0 = const()[name = tensor("op_2284_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2284_cast_fp16")]; + tensor var_2300_begin_0 = const()[name = tensor("op_2300_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_2300_end_0 = const()[name = tensor("op_2300_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_2300_end_mask_0 = const()[name = tensor("op_2300_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2300_cast_fp16 = slice_by_index(begin = var_2300_begin_0, end = var_2300_end_0, end_mask = var_2300_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2300_cast_fp16")]; + tensor var_2316_begin_0 = const()[name = tensor("op_2316_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2316_end_0 = const()[name = tensor("op_2316_end_0"), val = tensor([1, 192, 1, 512])]; + tensor var_2316_end_mask_0 = const()[name = tensor("op_2316_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2316_cast_fp16 = slice_by_index(begin = var_2316_begin_0, end = var_2316_end_0, end_mask = var_2316_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2316_cast_fp16")]; + tensor var_2332_begin_0 = const()[name = tensor("op_2332_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_2332_end_0 = const()[name = tensor("op_2332_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_2332_end_mask_0 = const()[name = tensor("op_2332_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2332_cast_fp16 = slice_by_index(begin = var_2332_begin_0, end = var_2332_end_0, end_mask = var_2332_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2332_cast_fp16")]; + tensor var_2348_begin_0 = const()[name = tensor("op_2348_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2348_end_0 = const()[name = tensor("op_2348_end_0"), val = tensor([1, 320, 1, 512])]; + tensor var_2348_end_mask_0 = const()[name = tensor("op_2348_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2348_cast_fp16 = slice_by_index(begin = var_2348_begin_0, end = var_2348_end_0, end_mask = var_2348_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2348_cast_fp16")]; + tensor var_2364_begin_0 = const()[name = tensor("op_2364_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_2364_end_0 = const()[name = tensor("op_2364_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2364_end_mask_0 = const()[name = tensor("op_2364_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2364_cast_fp16 = slice_by_index(begin = var_2364_begin_0, end = var_2364_end_0, end_mask = var_2364_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2364_cast_fp16")]; + tensor var_2380_begin_0 = const()[name = tensor("op_2380_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2380_end_0 = const()[name = tensor("op_2380_end_0"), val = tensor([1, 448, 1, 512])]; + tensor var_2380_end_mask_0 = const()[name = tensor("op_2380_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2380_cast_fp16 = slice_by_index(begin = var_2380_begin_0, end = var_2380_end_0, end_mask = var_2380_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2380_cast_fp16")]; + tensor var_2396_begin_0 = const()[name = tensor("op_2396_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_2396_end_0 = const()[name = tensor("op_2396_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2396_end_mask_0 = const()[name = tensor("op_2396_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2396_cast_fp16 = slice_by_index(begin = var_2396_begin_0, end = var_2396_end_0, end_mask = var_2396_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2396_cast_fp16")]; + tensor var_2412_equation_0 = const()[name = tensor("op_2412_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2412_cast_fp16 = einsum(equation = var_2412_equation_0, values = (var_2158_cast_fp16, var_2028_cast_fp16))[name = tensor("op_2412_cast_fp16")]; + tensor var_2413_to_fp16 = const()[name = tensor("op_2413_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2414_cast_fp16 = mul(x = var_2412_cast_fp16, y = var_2413_to_fp16)[name = tensor("op_2414_cast_fp16")]; + tensor var_2416_equation_0 = const()[name = tensor("op_2416_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2416_cast_fp16 = einsum(equation = var_2416_equation_0, values = (var_2158_cast_fp16, var_2032_cast_fp16))[name = tensor("op_2416_cast_fp16")]; + tensor var_2417_to_fp16 = const()[name = tensor("op_2417_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2418_cast_fp16 = mul(x = var_2416_cast_fp16, y = var_2417_to_fp16)[name = tensor("op_2418_cast_fp16")]; + tensor var_2420_equation_0 = const()[name = tensor("op_2420_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2420_cast_fp16 = einsum(equation = var_2420_equation_0, values = (var_2158_cast_fp16, var_2036_cast_fp16))[name = tensor("op_2420_cast_fp16")]; + tensor var_2421_to_fp16 = const()[name = tensor("op_2421_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2422_cast_fp16 = mul(x = var_2420_cast_fp16, y = var_2421_to_fp16)[name = tensor("op_2422_cast_fp16")]; + tensor var_2424_equation_0 = const()[name = tensor("op_2424_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2424_cast_fp16 = einsum(equation = var_2424_equation_0, values = (var_2158_cast_fp16, var_2040_cast_fp16))[name = tensor("op_2424_cast_fp16")]; + tensor var_2425_to_fp16 = const()[name = tensor("op_2425_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2426_cast_fp16 = mul(x = var_2424_cast_fp16, y = var_2425_to_fp16)[name = tensor("op_2426_cast_fp16")]; + tensor var_2428_equation_0 = const()[name = tensor("op_2428_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2428_cast_fp16 = einsum(equation = var_2428_equation_0, values = (var_2174_cast_fp16, var_2044_cast_fp16))[name = tensor("op_2428_cast_fp16")]; + tensor var_2429_to_fp16 = const()[name = tensor("op_2429_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2430_cast_fp16 = mul(x = var_2428_cast_fp16, y = var_2429_to_fp16)[name = tensor("op_2430_cast_fp16")]; + tensor var_2432_equation_0 = const()[name = tensor("op_2432_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2432_cast_fp16 = einsum(equation = var_2432_equation_0, values = (var_2174_cast_fp16, var_2048_cast_fp16))[name = tensor("op_2432_cast_fp16")]; + tensor var_2433_to_fp16 = const()[name = tensor("op_2433_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2434_cast_fp16 = mul(x = var_2432_cast_fp16, y = var_2433_to_fp16)[name = tensor("op_2434_cast_fp16")]; + tensor var_2436_equation_0 = const()[name = tensor("op_2436_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2436_cast_fp16 = einsum(equation = var_2436_equation_0, values = (var_2174_cast_fp16, var_2052_cast_fp16))[name = tensor("op_2436_cast_fp16")]; + tensor var_2437_to_fp16 = const()[name = tensor("op_2437_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2438_cast_fp16 = mul(x = var_2436_cast_fp16, y = var_2437_to_fp16)[name = tensor("op_2438_cast_fp16")]; + tensor var_2440_equation_0 = const()[name = tensor("op_2440_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2440_cast_fp16 = einsum(equation = var_2440_equation_0, values = (var_2174_cast_fp16, var_2056_cast_fp16))[name = tensor("op_2440_cast_fp16")]; + tensor var_2441_to_fp16 = const()[name = tensor("op_2441_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2442_cast_fp16 = mul(x = var_2440_cast_fp16, y = var_2441_to_fp16)[name = tensor("op_2442_cast_fp16")]; + tensor var_2444_equation_0 = const()[name = tensor("op_2444_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2444_cast_fp16 = einsum(equation = var_2444_equation_0, values = (var_2190_cast_fp16, var_2060_cast_fp16))[name = tensor("op_2444_cast_fp16")]; + tensor var_2445_to_fp16 = const()[name = tensor("op_2445_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2446_cast_fp16 = mul(x = var_2444_cast_fp16, y = var_2445_to_fp16)[name = tensor("op_2446_cast_fp16")]; + tensor var_2448_equation_0 = const()[name = tensor("op_2448_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2448_cast_fp16 = einsum(equation = var_2448_equation_0, values = (var_2190_cast_fp16, var_2064_cast_fp16))[name = tensor("op_2448_cast_fp16")]; + tensor var_2449_to_fp16 = const()[name = tensor("op_2449_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2450_cast_fp16 = mul(x = var_2448_cast_fp16, y = var_2449_to_fp16)[name = tensor("op_2450_cast_fp16")]; + tensor var_2452_equation_0 = const()[name = tensor("op_2452_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2452_cast_fp16 = einsum(equation = var_2452_equation_0, values = (var_2190_cast_fp16, var_2068_cast_fp16))[name = tensor("op_2452_cast_fp16")]; + tensor var_2453_to_fp16 = const()[name = tensor("op_2453_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2454_cast_fp16 = mul(x = var_2452_cast_fp16, y = var_2453_to_fp16)[name = tensor("op_2454_cast_fp16")]; + tensor var_2456_equation_0 = const()[name = tensor("op_2456_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2456_cast_fp16 = einsum(equation = var_2456_equation_0, values = (var_2190_cast_fp16, var_2072_cast_fp16))[name = tensor("op_2456_cast_fp16")]; + tensor var_2457_to_fp16 = const()[name = tensor("op_2457_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2458_cast_fp16 = mul(x = var_2456_cast_fp16, y = var_2457_to_fp16)[name = tensor("op_2458_cast_fp16")]; + tensor var_2460_equation_0 = const()[name = tensor("op_2460_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2460_cast_fp16 = einsum(equation = var_2460_equation_0, values = (var_2206_cast_fp16, var_2076_cast_fp16))[name = tensor("op_2460_cast_fp16")]; + tensor var_2461_to_fp16 = const()[name = tensor("op_2461_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2462_cast_fp16 = mul(x = var_2460_cast_fp16, y = var_2461_to_fp16)[name = tensor("op_2462_cast_fp16")]; + tensor var_2464_equation_0 = const()[name = tensor("op_2464_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2464_cast_fp16 = einsum(equation = var_2464_equation_0, values = (var_2206_cast_fp16, var_2080_cast_fp16))[name = tensor("op_2464_cast_fp16")]; + tensor var_2465_to_fp16 = const()[name = tensor("op_2465_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2466_cast_fp16 = mul(x = var_2464_cast_fp16, y = var_2465_to_fp16)[name = tensor("op_2466_cast_fp16")]; + tensor var_2468_equation_0 = const()[name = tensor("op_2468_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2468_cast_fp16 = einsum(equation = var_2468_equation_0, values = (var_2206_cast_fp16, var_2084_cast_fp16))[name = tensor("op_2468_cast_fp16")]; + tensor var_2469_to_fp16 = const()[name = tensor("op_2469_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2470_cast_fp16 = mul(x = var_2468_cast_fp16, y = var_2469_to_fp16)[name = tensor("op_2470_cast_fp16")]; + tensor var_2472_equation_0 = const()[name = tensor("op_2472_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2472_cast_fp16 = einsum(equation = var_2472_equation_0, values = (var_2206_cast_fp16, var_2088_cast_fp16))[name = tensor("op_2472_cast_fp16")]; + tensor var_2473_to_fp16 = const()[name = tensor("op_2473_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2474_cast_fp16 = mul(x = var_2472_cast_fp16, y = var_2473_to_fp16)[name = tensor("op_2474_cast_fp16")]; + tensor var_2476_equation_0 = const()[name = tensor("op_2476_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2476_cast_fp16 = einsum(equation = var_2476_equation_0, values = (var_2222_cast_fp16, var_2092_cast_fp16))[name = tensor("op_2476_cast_fp16")]; + tensor var_2477_to_fp16 = const()[name = tensor("op_2477_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2478_cast_fp16 = mul(x = var_2476_cast_fp16, y = var_2477_to_fp16)[name = tensor("op_2478_cast_fp16")]; + tensor var_2480_equation_0 = const()[name = tensor("op_2480_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2480_cast_fp16 = einsum(equation = var_2480_equation_0, values = (var_2222_cast_fp16, var_2096_cast_fp16))[name = tensor("op_2480_cast_fp16")]; + tensor var_2481_to_fp16 = const()[name = tensor("op_2481_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2482_cast_fp16 = mul(x = var_2480_cast_fp16, y = var_2481_to_fp16)[name = tensor("op_2482_cast_fp16")]; + tensor var_2484_equation_0 = const()[name = tensor("op_2484_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2484_cast_fp16 = einsum(equation = var_2484_equation_0, values = (var_2222_cast_fp16, var_2100_cast_fp16))[name = tensor("op_2484_cast_fp16")]; + tensor var_2485_to_fp16 = const()[name = tensor("op_2485_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2486_cast_fp16 = mul(x = var_2484_cast_fp16, y = var_2485_to_fp16)[name = tensor("op_2486_cast_fp16")]; + tensor var_2488_equation_0 = const()[name = tensor("op_2488_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2488_cast_fp16 = einsum(equation = var_2488_equation_0, values = (var_2222_cast_fp16, var_2104_cast_fp16))[name = tensor("op_2488_cast_fp16")]; + tensor var_2489_to_fp16 = const()[name = tensor("op_2489_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2490_cast_fp16 = mul(x = var_2488_cast_fp16, y = var_2489_to_fp16)[name = tensor("op_2490_cast_fp16")]; + tensor var_2492_equation_0 = const()[name = tensor("op_2492_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2492_cast_fp16 = einsum(equation = var_2492_equation_0, values = (var_2238_cast_fp16, var_2108_cast_fp16))[name = tensor("op_2492_cast_fp16")]; + tensor var_2493_to_fp16 = const()[name = tensor("op_2493_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2494_cast_fp16 = mul(x = var_2492_cast_fp16, y = var_2493_to_fp16)[name = tensor("op_2494_cast_fp16")]; + tensor var_2496_equation_0 = const()[name = tensor("op_2496_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2496_cast_fp16 = einsum(equation = var_2496_equation_0, values = (var_2238_cast_fp16, var_2112_cast_fp16))[name = tensor("op_2496_cast_fp16")]; + tensor var_2497_to_fp16 = const()[name = tensor("op_2497_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2498_cast_fp16 = mul(x = var_2496_cast_fp16, y = var_2497_to_fp16)[name = tensor("op_2498_cast_fp16")]; + tensor var_2500_equation_0 = const()[name = tensor("op_2500_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2500_cast_fp16 = einsum(equation = var_2500_equation_0, values = (var_2238_cast_fp16, var_2116_cast_fp16))[name = tensor("op_2500_cast_fp16")]; + tensor var_2501_to_fp16 = const()[name = tensor("op_2501_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2502_cast_fp16 = mul(x = var_2500_cast_fp16, y = var_2501_to_fp16)[name = tensor("op_2502_cast_fp16")]; + tensor var_2504_equation_0 = const()[name = tensor("op_2504_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2504_cast_fp16 = einsum(equation = var_2504_equation_0, values = (var_2238_cast_fp16, var_2120_cast_fp16))[name = tensor("op_2504_cast_fp16")]; + tensor var_2505_to_fp16 = const()[name = tensor("op_2505_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2506_cast_fp16 = mul(x = var_2504_cast_fp16, y = var_2505_to_fp16)[name = tensor("op_2506_cast_fp16")]; + tensor var_2508_equation_0 = const()[name = tensor("op_2508_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2508_cast_fp16 = einsum(equation = var_2508_equation_0, values = (var_2254_cast_fp16, var_2124_cast_fp16))[name = tensor("op_2508_cast_fp16")]; + tensor var_2509_to_fp16 = const()[name = tensor("op_2509_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2510_cast_fp16 = mul(x = var_2508_cast_fp16, y = var_2509_to_fp16)[name = tensor("op_2510_cast_fp16")]; + tensor var_2512_equation_0 = const()[name = tensor("op_2512_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2512_cast_fp16 = einsum(equation = var_2512_equation_0, values = (var_2254_cast_fp16, var_2128_cast_fp16))[name = tensor("op_2512_cast_fp16")]; + tensor var_2513_to_fp16 = const()[name = tensor("op_2513_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2514_cast_fp16 = mul(x = var_2512_cast_fp16, y = var_2513_to_fp16)[name = tensor("op_2514_cast_fp16")]; + tensor var_2516_equation_0 = const()[name = tensor("op_2516_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2516_cast_fp16 = einsum(equation = var_2516_equation_0, values = (var_2254_cast_fp16, var_2132_cast_fp16))[name = tensor("op_2516_cast_fp16")]; + tensor var_2517_to_fp16 = const()[name = tensor("op_2517_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2518_cast_fp16 = mul(x = var_2516_cast_fp16, y = var_2517_to_fp16)[name = tensor("op_2518_cast_fp16")]; + tensor var_2520_equation_0 = const()[name = tensor("op_2520_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2520_cast_fp16 = einsum(equation = var_2520_equation_0, values = (var_2254_cast_fp16, var_2136_cast_fp16))[name = tensor("op_2520_cast_fp16")]; + tensor var_2521_to_fp16 = const()[name = tensor("op_2521_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2522_cast_fp16 = mul(x = var_2520_cast_fp16, y = var_2521_to_fp16)[name = tensor("op_2522_cast_fp16")]; + tensor var_2524_equation_0 = const()[name = tensor("op_2524_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2524_cast_fp16 = einsum(equation = var_2524_equation_0, values = (var_2270_cast_fp16, var_2140_cast_fp16))[name = tensor("op_2524_cast_fp16")]; + tensor var_2525_to_fp16 = const()[name = tensor("op_2525_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2526_cast_fp16 = mul(x = var_2524_cast_fp16, y = var_2525_to_fp16)[name = tensor("op_2526_cast_fp16")]; + tensor var_2528_equation_0 = const()[name = tensor("op_2528_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2528_cast_fp16 = einsum(equation = var_2528_equation_0, values = (var_2270_cast_fp16, var_2144_cast_fp16))[name = tensor("op_2528_cast_fp16")]; + tensor var_2529_to_fp16 = const()[name = tensor("op_2529_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2530_cast_fp16 = mul(x = var_2528_cast_fp16, y = var_2529_to_fp16)[name = tensor("op_2530_cast_fp16")]; + tensor var_2532_equation_0 = const()[name = tensor("op_2532_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2532_cast_fp16 = einsum(equation = var_2532_equation_0, values = (var_2270_cast_fp16, var_2148_cast_fp16))[name = tensor("op_2532_cast_fp16")]; + tensor var_2533_to_fp16 = const()[name = tensor("op_2533_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2534_cast_fp16 = mul(x = var_2532_cast_fp16, y = var_2533_to_fp16)[name = tensor("op_2534_cast_fp16")]; + tensor var_2536_equation_0 = const()[name = tensor("op_2536_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2536_cast_fp16 = einsum(equation = var_2536_equation_0, values = (var_2270_cast_fp16, var_2152_cast_fp16))[name = tensor("op_2536_cast_fp16")]; + tensor var_2537_to_fp16 = const()[name = tensor("op_2537_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2538_cast_fp16 = mul(x = var_2536_cast_fp16, y = var_2537_to_fp16)[name = tensor("op_2538_cast_fp16")]; + tensor aw_129_cast_fp16 = add(x = var_2414_cast_fp16, y = mask)[name = tensor("aw_129_cast_fp16")]; + tensor aw_131_cast_fp16 = add(x = var_2418_cast_fp16, y = mask)[name = tensor("aw_131_cast_fp16")]; + tensor aw_133_cast_fp16 = add(x = var_2422_cast_fp16, y = mask)[name = tensor("aw_133_cast_fp16")]; + tensor aw_135_cast_fp16 = add(x = var_2426_cast_fp16, y = mask)[name = tensor("aw_135_cast_fp16")]; + tensor aw_137_cast_fp16 = add(x = var_2430_cast_fp16, y = mask)[name = tensor("aw_137_cast_fp16")]; + tensor aw_139_cast_fp16 = add(x = var_2434_cast_fp16, y = mask)[name = tensor("aw_139_cast_fp16")]; + tensor aw_141_cast_fp16 = add(x = var_2438_cast_fp16, y = mask)[name = tensor("aw_141_cast_fp16")]; + tensor aw_143_cast_fp16 = add(x = var_2442_cast_fp16, y = mask)[name = tensor("aw_143_cast_fp16")]; + tensor aw_145_cast_fp16 = add(x = var_2446_cast_fp16, y = mask)[name = tensor("aw_145_cast_fp16")]; + tensor aw_147_cast_fp16 = add(x = var_2450_cast_fp16, y = mask)[name = tensor("aw_147_cast_fp16")]; + tensor aw_149_cast_fp16 = add(x = var_2454_cast_fp16, y = mask)[name = tensor("aw_149_cast_fp16")]; + tensor aw_151_cast_fp16 = add(x = var_2458_cast_fp16, y = mask)[name = tensor("aw_151_cast_fp16")]; + tensor aw_153_cast_fp16 = add(x = var_2462_cast_fp16, y = mask)[name = tensor("aw_153_cast_fp16")]; + tensor aw_155_cast_fp16 = add(x = var_2466_cast_fp16, y = mask)[name = tensor("aw_155_cast_fp16")]; + tensor aw_157_cast_fp16 = add(x = var_2470_cast_fp16, y = mask)[name = tensor("aw_157_cast_fp16")]; + tensor aw_159_cast_fp16 = add(x = var_2474_cast_fp16, y = mask)[name = tensor("aw_159_cast_fp16")]; + tensor aw_161_cast_fp16 = add(x = var_2478_cast_fp16, y = mask)[name = tensor("aw_161_cast_fp16")]; + tensor aw_163_cast_fp16 = add(x = var_2482_cast_fp16, y = mask)[name = tensor("aw_163_cast_fp16")]; + tensor aw_165_cast_fp16 = add(x = var_2486_cast_fp16, y = mask)[name = tensor("aw_165_cast_fp16")]; + tensor aw_167_cast_fp16 = add(x = var_2490_cast_fp16, y = mask)[name = tensor("aw_167_cast_fp16")]; + tensor aw_169_cast_fp16 = add(x = var_2494_cast_fp16, y = mask)[name = tensor("aw_169_cast_fp16")]; + tensor aw_171_cast_fp16 = add(x = var_2498_cast_fp16, y = mask)[name = tensor("aw_171_cast_fp16")]; + tensor aw_173_cast_fp16 = add(x = var_2502_cast_fp16, y = mask)[name = tensor("aw_173_cast_fp16")]; + tensor aw_175_cast_fp16 = add(x = var_2506_cast_fp16, y = mask)[name = tensor("aw_175_cast_fp16")]; + tensor aw_177_cast_fp16 = add(x = var_2510_cast_fp16, y = mask)[name = tensor("aw_177_cast_fp16")]; + tensor aw_179_cast_fp16 = add(x = var_2514_cast_fp16, y = mask)[name = tensor("aw_179_cast_fp16")]; + tensor aw_181_cast_fp16 = add(x = var_2518_cast_fp16, y = mask)[name = tensor("aw_181_cast_fp16")]; + tensor aw_183_cast_fp16 = add(x = var_2522_cast_fp16, y = mask)[name = tensor("aw_183_cast_fp16")]; + tensor aw_185_cast_fp16 = add(x = var_2526_cast_fp16, y = mask)[name = tensor("aw_185_cast_fp16")]; + tensor aw_187_cast_fp16 = add(x = var_2530_cast_fp16, y = mask)[name = tensor("aw_187_cast_fp16")]; + tensor aw_189_cast_fp16 = add(x = var_2534_cast_fp16, y = mask)[name = tensor("aw_189_cast_fp16")]; + tensor aw_191_cast_fp16 = add(x = var_2538_cast_fp16, y = mask)[name = tensor("aw_191_cast_fp16")]; + tensor var_2571_cast_fp16 = softmax(axis = var_1879, x = aw_129_cast_fp16)[name = tensor("op_2571_cast_fp16")]; + tensor var_2572_cast_fp16 = softmax(axis = var_1879, x = aw_131_cast_fp16)[name = tensor("op_2572_cast_fp16")]; + tensor var_2573_cast_fp16 = softmax(axis = var_1879, x = aw_133_cast_fp16)[name = tensor("op_2573_cast_fp16")]; + tensor var_2574_cast_fp16 = softmax(axis = var_1879, x = aw_135_cast_fp16)[name = tensor("op_2574_cast_fp16")]; + tensor var_2575_cast_fp16 = softmax(axis = var_1879, x = aw_137_cast_fp16)[name = tensor("op_2575_cast_fp16")]; + tensor var_2576_cast_fp16 = softmax(axis = var_1879, x = aw_139_cast_fp16)[name = tensor("op_2576_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_1879, x = aw_141_cast_fp16)[name = tensor("op_2577_cast_fp16")]; + tensor var_2578_cast_fp16 = softmax(axis = var_1879, x = aw_143_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2579_cast_fp16 = softmax(axis = var_1879, x = aw_145_cast_fp16)[name = tensor("op_2579_cast_fp16")]; + tensor var_2580_cast_fp16 = softmax(axis = var_1879, x = aw_147_cast_fp16)[name = tensor("op_2580_cast_fp16")]; + tensor var_2581_cast_fp16 = softmax(axis = var_1879, x = aw_149_cast_fp16)[name = tensor("op_2581_cast_fp16")]; + tensor var_2582_cast_fp16 = softmax(axis = var_1879, x = aw_151_cast_fp16)[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_cast_fp16 = softmax(axis = var_1879, x = aw_153_cast_fp16)[name = tensor("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = softmax(axis = var_1879, x = aw_155_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_1879, x = aw_157_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586_cast_fp16 = softmax(axis = var_1879, x = aw_159_cast_fp16)[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_cast_fp16 = softmax(axis = var_1879, x = aw_161_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor var_2588_cast_fp16 = softmax(axis = var_1879, x = aw_163_cast_fp16)[name = tensor("op_2588_cast_fp16")]; + tensor var_2589_cast_fp16 = softmax(axis = var_1879, x = aw_165_cast_fp16)[name = tensor("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = softmax(axis = var_1879, x = aw_167_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_cast_fp16 = softmax(axis = var_1879, x = aw_169_cast_fp16)[name = tensor("op_2591_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_1879, x = aw_171_cast_fp16)[name = tensor("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = softmax(axis = var_1879, x = aw_173_cast_fp16)[name = tensor("op_2593_cast_fp16")]; + tensor var_2594_cast_fp16 = softmax(axis = var_1879, x = aw_175_cast_fp16)[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_cast_fp16 = softmax(axis = var_1879, x = aw_177_cast_fp16)[name = tensor("op_2595_cast_fp16")]; + tensor var_2596_cast_fp16 = softmax(axis = var_1879, x = aw_179_cast_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597_cast_fp16 = softmax(axis = var_1879, x = aw_181_cast_fp16)[name = tensor("op_2597_cast_fp16")]; + tensor var_2598_cast_fp16 = softmax(axis = var_1879, x = aw_183_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_cast_fp16 = softmax(axis = var_1879, x = aw_185_cast_fp16)[name = tensor("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = softmax(axis = var_1879, x = aw_187_cast_fp16)[name = tensor("op_2600_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_1879, x = aw_189_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = softmax(axis = var_1879, x = aw_191_cast_fp16)[name = tensor("op_2602_cast_fp16")]; + tensor var_2604_equation_0 = const()[name = tensor("op_2604_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2604_cast_fp16 = einsum(equation = var_2604_equation_0, values = (var_2284_cast_fp16, var_2571_cast_fp16))[name = tensor("op_2604_cast_fp16")]; + tensor var_2606_equation_0 = const()[name = tensor("op_2606_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2606_cast_fp16 = einsum(equation = var_2606_equation_0, values = (var_2284_cast_fp16, var_2572_cast_fp16))[name = tensor("op_2606_cast_fp16")]; + tensor var_2608_equation_0 = const()[name = tensor("op_2608_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2608_cast_fp16 = einsum(equation = var_2608_equation_0, values = (var_2284_cast_fp16, var_2573_cast_fp16))[name = tensor("op_2608_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2284_cast_fp16, var_2574_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2300_cast_fp16, var_2575_cast_fp16))[name = tensor("op_2612_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2300_cast_fp16, var_2576_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2300_cast_fp16, var_2577_cast_fp16))[name = tensor("op_2616_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2300_cast_fp16, var_2578_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2316_cast_fp16, var_2579_cast_fp16))[name = tensor("op_2620_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2316_cast_fp16, var_2580_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2316_cast_fp16, var_2581_cast_fp16))[name = tensor("op_2624_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2316_cast_fp16, var_2582_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2332_cast_fp16, var_2583_cast_fp16))[name = tensor("op_2628_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2332_cast_fp16, var_2584_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2332_cast_fp16, var_2585_cast_fp16))[name = tensor("op_2632_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2332_cast_fp16, var_2586_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2348_cast_fp16, var_2587_cast_fp16))[name = tensor("op_2636_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2348_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2348_cast_fp16, var_2589_cast_fp16))[name = tensor("op_2640_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2348_cast_fp16, var_2590_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2364_cast_fp16, var_2591_cast_fp16))[name = tensor("op_2644_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2364_cast_fp16, var_2592_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2364_cast_fp16, var_2593_cast_fp16))[name = tensor("op_2648_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2364_cast_fp16, var_2594_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2380_cast_fp16, var_2595_cast_fp16))[name = tensor("op_2652_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2380_cast_fp16, var_2596_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2380_cast_fp16, var_2597_cast_fp16))[name = tensor("op_2656_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2380_cast_fp16, var_2598_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2396_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2660_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2396_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2664_cast_fp16 = einsum(equation = var_2664_equation_0, values = (var_2396_cast_fp16, var_2601_cast_fp16))[name = tensor("op_2664_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2396_cast_fp16, var_2602_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor x_55_interleave_0 = const()[name = tensor("x_55_interleave_0"), val = tensor(false)]; + tensor x_55_cast_fp16 = concat(axis = var_1879, interleave = x_55_interleave_0, values = (var_2604_cast_fp16, var_2606_cast_fp16, var_2608_cast_fp16, var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16, var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16, var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16, var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16))[name = tensor("x_55_cast_fp16")]; + tensor var_2671 = const()[name = tensor("op_2671"), val = tensor([1, 2048, -1, 8])]; + tensor input_23_cast_fp16 = reshape(shape = var_2671, x = x_55_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_2674 = const()[name = tensor("op_2674"), val = tensor([1, 1])]; + tensor var_2676 = const()[name = tensor("op_2676"), val = tensor([1, 1])]; + tensor attention_output_5_pad_type_0 = const()[name = tensor("attention_output_5_pad_type_0"), val = tensor("custom")]; + tensor attention_output_5_pad_0 = const()[name = tensor("attention_output_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_2_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_2_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(255875456)))]; + tensor attention_output_5_cast_fp16 = conv(dilations = var_2676, groups = var_1879, pad = attention_output_5_pad_0, pad_type = attention_output_5_pad_type_0, strides = var_2674, weight = blocks_2_attn_proj_weight_to_fp16, x = input_23_cast_fp16)[name = tensor("attention_output_5_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = attention_output_5_cast_fp16, y = x_41_cast_fp16)[name = tensor("x_57_cast_fp16")]; + tensor x_eps_11_interleave_0 = const()[name = tensor("x_eps_11_interleave_0"), val = tensor(false)]; + tensor eps_chan_11_to_fp16 = const()[name = tensor("eps_chan_11_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264264128)))]; + tensor x_eps_11_cast_fp16 = concat(axis = var_1879, interleave = x_eps_11_interleave_0, values = (x_57_cast_fp16, eps_chan_11_to_fp16))[name = tensor("x_eps_11_cast_fp16")]; + tensor norm_x_11_axes_0 = const()[name = tensor("norm_x_11_axes_0"), val = tensor([1])]; + tensor norm_x_11_cast_fp16 = reduce_l2_norm(axes = norm_x_11_axes_0, keep_dims = var_1882, x = x_eps_11_cast_fp16)[name = tensor("norm_x_11_cast_fp16")]; + tensor x_normed_31_cast_fp16 = real_div(x = x_57_cast_fp16, y = norm_x_11_cast_fp16)[name = tensor("x_normed_31_cast_fp16")]; + tensor var_2702_to_fp16 = const()[name = tensor("op_2702_to_fp16"), val = tensor(0x1.6ap+5)]; + tensor x_normed_33_cast_fp16 = mul(x = x_normed_31_cast_fp16, y = var_2702_to_fp16)[name = tensor("x_normed_33_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264264320)))]; + tensor input_25_cast_fp16 = mul(x = x_normed_33_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_2713 = const()[name = tensor("op_2713"), val = tensor([1, 1])]; + tensor var_2715 = const()[name = tensor("op_2715"), val = tensor([1, 1])]; + tensor input_27_pad_type_0 = const()[name = tensor("input_27_pad_type_0"), val = tensor("custom")]; + tensor input_27_pad_0 = const()[name = tensor("input_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_2_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264268480)))]; + tensor input_27_cast_fp16 = conv(dilations = var_2715, groups = var_1879, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = var_2713, weight = blocks_2_mlp_fc_1_weight_to_fp16, x = input_25_cast_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_2719 = const()[name = tensor("op_2719"), val = tensor([1, 1])]; + tensor var_2721 = const()[name = tensor("op_2721"), val = tensor([1, 1])]; + tensor x_fc_2_5_pad_type_0 = const()[name = tensor("x_fc_2_5_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_5_pad_0 = const()[name = tensor("x_fc_2_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_2_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297822976)))]; + tensor x_fc_2_5_cast_fp16 = conv(dilations = var_2721, groups = var_1879, pad = x_fc_2_5_pad_0, pad_type = x_fc_2_5_pad_type_0, strides = var_2719, weight = blocks_2_mlp_fc_2_weight_to_fp16, x = input_25_cast_fp16)[name = tensor("x_fc_2_5_cast_fp16")]; + tensor var_2724_cast_fp16 = silu(x = input_27_cast_fp16)[name = tensor("op_2724_cast_fp16")]; + tensor input_29_cast_fp16 = mul(x = var_2724_cast_fp16, y = x_fc_2_5_cast_fp16)[name = tensor("input_29_cast_fp16")]; + tensor var_2727 = const()[name = tensor("op_2727"), val = tensor([1, 1])]; + tensor var_2729 = const()[name = tensor("op_2729"), val = tensor([1, 1])]; + tensor var_2731_pad_type_0 = const()[name = tensor("op_2731_pad_type_0"), val = tensor("custom")]; + tensor var_2731_pad_0 = const()[name = tensor("op_2731_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_2_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(331377472)))]; + tensor var_2731_cast_fp16 = conv(dilations = var_2729, groups = var_1879, pad = var_2731_pad_0, pad_type = var_2731_pad_type_0, strides = var_2727, weight = blocks_2_mlp_proj_weight_to_fp16, x = input_29_cast_fp16)[name = tensor("op_2731_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = var_2731_cast_fp16, y = x_57_cast_fp16)[name = tensor("x_61_cast_fp16")]; + tensor var_2737 = const()[name = tensor("op_2737"), val = tensor(-1)]; + tensor var_2741 = const()[name = tensor("op_2741"), val = tensor(-2)]; + tensor var_2743 = const()[name = tensor("op_2743"), val = tensor(-3)]; + tensor var_2785 = const()[name = tensor("op_2785"), val = tensor(1)]; + tensor var_2788 = const()[name = tensor("op_2788"), val = tensor(true)]; + tensor x_eps_13_interleave_0 = const()[name = tensor("x_eps_13_interleave_0"), val = tensor(false)]; + tensor eps_chan_13_to_fp16 = const()[name = tensor("eps_chan_13_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364931968)))]; + tensor x_eps_13_cast_fp16 = concat(axis = var_2785, interleave = x_eps_13_interleave_0, values = (x_61_cast_fp16, eps_chan_13_to_fp16))[name = tensor("x_eps_13_cast_fp16")]; + tensor norm_x_13_axes_0 = const()[name = tensor("norm_x_13_axes_0"), val = tensor([1])]; + tensor norm_x_13_cast_fp16 = reduce_l2_norm(axes = norm_x_13_axes_0, keep_dims = var_2788, x = x_eps_13_cast_fp16)[name = tensor("norm_x_13_cast_fp16")]; + tensor x_normed_37_cast_fp16 = real_div(x = x_61_cast_fp16, y = norm_x_13_cast_fp16)[name = tensor("x_normed_37_cast_fp16")]; + tensor var_2811_to_fp16 = const()[name = tensor("op_2811_to_fp16"), val = tensor(0x1.6ap+5)]; + tensor x_normed_39_cast_fp16 = mul(x = x_normed_37_cast_fp16, y = var_2811_to_fp16)[name = tensor("x_normed_39_cast_fp16")]; + tensor blocks_3_norm_1_weight_to_fp16 = const()[name = tensor("blocks_3_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364932160)))]; + tensor x_65_cast_fp16 = mul(x = x_normed_39_cast_fp16, y = blocks_3_norm_1_weight_to_fp16)[name = tensor("x_65_cast_fp16")]; + tensor var_2835 = const()[name = tensor("op_2835"), val = tensor([1, 2048, 1, -1])]; + tensor input_31_cast_fp16 = reshape(shape = var_2835, x = x_65_cast_fp16)[name = tensor("input_31_cast_fp16")]; + tensor var_2838 = const()[name = tensor("op_2838"), val = tensor([1, 1])]; + tensor var_2840 = const()[name = tensor("op_2840"), val = tensor([1, 1])]; + tensor q_25_pad_type_0 = const()[name = tensor("q_25_pad_type_0"), val = tensor("custom")]; + tensor q_25_pad_0 = const()[name = tensor("q_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_3_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_3_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364936320)))]; + tensor q_25_cast_fp16 = conv(dilations = var_2840, groups = var_2785, pad = q_25_pad_0, pad_type = q_25_pad_type_0, strides = var_2838, weight = blocks_3_attn_q_proj_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("q_25_cast_fp16")]; + tensor var_2844 = const()[name = tensor("op_2844"), val = tensor([1, 1])]; + tensor var_2846 = const()[name = tensor("op_2846"), val = tensor([1, 1])]; + tensor k_37_pad_type_0 = const()[name = tensor("k_37_pad_type_0"), val = tensor("custom")]; + tensor k_37_pad_0 = const()[name = tensor("k_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_3_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_3_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373324992)))]; + tensor k_37_cast_fp16 = conv(dilations = var_2846, groups = var_2785, pad = k_37_pad_0, pad_type = k_37_pad_type_0, strides = var_2844, weight = blocks_3_attn_k_proj_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("k_37_cast_fp16")]; + tensor var_2850 = const()[name = tensor("op_2850"), val = tensor([1, 1])]; + tensor var_2852 = const()[name = tensor("op_2852"), val = tensor([1, 1])]; + tensor v_31_pad_type_0 = const()[name = tensor("v_31_pad_type_0"), val = tensor("custom")]; + tensor v_31_pad_0 = const()[name = tensor("v_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_3_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_3_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375422208)))]; + tensor v_31_cast_fp16 = conv(dilations = var_2852, groups = var_2785, pad = v_31_pad_0, pad_type = v_31_pad_type_0, strides = var_2850, weight = blocks_3_attn_v_proj_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("v_31_cast_fp16")]; + tensor var_2855 = const()[name = tensor("op_2855"), val = tensor([1, 32, 64, 64])]; + tensor q_27_cast_fp16 = reshape(shape = var_2855, x = q_25_cast_fp16)[name = tensor("q_27_cast_fp16")]; + tensor var_2857 = const()[name = tensor("op_2857"), val = tensor([1, -1, 64, 64])]; + tensor k_39_cast_fp16 = reshape(shape = var_2857, x = k_37_cast_fp16)[name = tensor("k_39_cast_fp16")]; + tensor var_2871_begin_0 = const()[name = tensor("op_2871_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2871_end_0 = const()[name = tensor("op_2871_end_0"), val = tensor([1, 32, 32, 64])]; + tensor var_2871_end_mask_0 = const()[name = tensor("op_2871_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2871_cast_fp16 = slice_by_index(begin = var_2871_begin_0, end = var_2871_end_0, end_mask = var_2871_end_mask_0, x = q_27_cast_fp16)[name = tensor("op_2871_cast_fp16")]; + tensor var_2877_begin_0 = const()[name = tensor("op_2877_begin_0"), val = tensor([0, 0, 32, 0])]; + tensor var_2877_end_0 = const()[name = tensor("op_2877_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_2877_end_mask_0 = const()[name = tensor("op_2877_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2877_cast_fp16 = slice_by_index(begin = var_2877_begin_0, end = var_2877_end_0, end_mask = var_2877_end_mask_0, x = q_27_cast_fp16)[name = tensor("op_2877_cast_fp16")]; + tensor const_70_promoted_to_fp16 = const()[name = tensor("const_70_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2879_cast_fp16 = mul(x = var_2877_cast_fp16, y = const_70_promoted_to_fp16)[name = tensor("op_2879_cast_fp16")]; + tensor rotated_13_interleave_0 = const()[name = tensor("rotated_13_interleave_0"), val = tensor(false)]; + tensor rotated_13_cast_fp16 = concat(axis = var_2741, interleave = rotated_13_interleave_0, values = (var_2879_cast_fp16, var_2871_cast_fp16))[name = tensor("rotated_13_cast_fp16")]; + tensor var_2882_cast_fp16 = mul(x = q_27_cast_fp16, y = cos)[name = tensor("op_2882_cast_fp16")]; + tensor var_2883_cast_fp16 = mul(x = rotated_13_cast_fp16, y = sin)[name = tensor("op_2883_cast_fp16")]; + tensor roped_13_cast_fp16 = add(x = var_2882_cast_fp16, y = var_2883_cast_fp16)[name = tensor("roped_13_cast_fp16")]; + tensor var_2896_begin_0 = const()[name = tensor("op_2896_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2896_end_0 = const()[name = tensor("op_2896_end_0"), val = tensor([1, 8, 32, 64])]; + tensor var_2896_end_mask_0 = const()[name = tensor("op_2896_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2896_cast_fp16 = slice_by_index(begin = var_2896_begin_0, end = var_2896_end_0, end_mask = var_2896_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_2896_cast_fp16")]; + tensor var_2902_begin_0 = const()[name = tensor("op_2902_begin_0"), val = tensor([0, 0, 32, 0])]; + tensor var_2902_end_0 = const()[name = tensor("op_2902_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_2902_end_mask_0 = const()[name = tensor("op_2902_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2902_cast_fp16 = slice_by_index(begin = var_2902_begin_0, end = var_2902_end_0, end_mask = var_2902_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_2902_cast_fp16")]; + tensor const_72_promoted_to_fp16 = const()[name = tensor("const_72_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2904_cast_fp16 = mul(x = var_2902_cast_fp16, y = const_72_promoted_to_fp16)[name = tensor("op_2904_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_2741, interleave = rotated_interleave_0, values = (var_2904_cast_fp16, var_2896_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_2907_cast_fp16 = mul(x = k_39_cast_fp16, y = cos)[name = tensor("op_2907_cast_fp16")]; + tensor var_2908_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_2908_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_2907_cast_fp16, y = var_2908_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_2911 = const()[name = tensor("op_2911"), val = tensor([1, -1, 1, 64])]; + tensor k_43_cast_fp16 = reshape(shape = var_2911, x = roped_cast_fp16)[name = tensor("k_43_cast_fp16")]; + tensor var_2913 = const()[name = tensor("op_2913"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_3 = reshape(shape = var_2913, x = v_31_cast_fp16)[name = tensor("new_v_cache_3_type_fp32_cast_fp16")]; + tensor k_45_perm_0 = const()[name = tensor("k_45_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_3 = transpose(perm = k_45_perm_0, x = k_43_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_2743, interleave = k_interleave_0, values = (k_cache_3, new_k_cache_3))[name = tensor("k_cast_fp16")]; + tensor v_37_interleave_0 = const()[name = tensor("v_37_interleave_0"), val = tensor(false)]; + tensor v_37_cast_fp16 = concat(axis = var_2737, interleave = v_37_interleave_0, values = (v_cache_3, new_v_cache_3))[name = tensor("v_37_cast_fp16")]; + tensor var_2929 = const()[name = tensor("op_2929"), val = tensor([1, 2048, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_2929, x = roped_13_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_2934_begin_0 = const()[name = tensor("op_2934_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2934_end_0 = const()[name = tensor("op_2934_end_0"), val = tensor([1, 64, 1, 64])]; + tensor var_2934_end_mask_0 = const()[name = tensor("op_2934_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2934_cast_fp16 = slice_by_index(begin = var_2934_begin_0, end = var_2934_end_0, end_mask = var_2934_end_mask_0, x = q_cast_fp16)[name = tensor("op_2934_cast_fp16")]; + tensor var_2938_begin_0 = const()[name = tensor("op_2938_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_2938_end_0 = const()[name = tensor("op_2938_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2938_end_mask_0 = const()[name = tensor("op_2938_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2938_cast_fp16 = slice_by_index(begin = var_2938_begin_0, end = var_2938_end_0, end_mask = var_2938_end_mask_0, x = q_cast_fp16)[name = tensor("op_2938_cast_fp16")]; + tensor var_2942_begin_0 = const()[name = tensor("op_2942_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2942_end_0 = const()[name = tensor("op_2942_end_0"), val = tensor([1, 192, 1, 64])]; + tensor var_2942_end_mask_0 = const()[name = tensor("op_2942_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2942_cast_fp16 = slice_by_index(begin = var_2942_begin_0, end = var_2942_end_0, end_mask = var_2942_end_mask_0, x = q_cast_fp16)[name = tensor("op_2942_cast_fp16")]; + tensor var_2946_begin_0 = const()[name = tensor("op_2946_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_2946_end_0 = const()[name = tensor("op_2946_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2946_end_mask_0 = const()[name = tensor("op_2946_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2946_cast_fp16 = slice_by_index(begin = var_2946_begin_0, end = var_2946_end_0, end_mask = var_2946_end_mask_0, x = q_cast_fp16)[name = tensor("op_2946_cast_fp16")]; + tensor var_2950_begin_0 = const()[name = tensor("op_2950_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2950_end_0 = const()[name = tensor("op_2950_end_0"), val = tensor([1, 320, 1, 64])]; + tensor var_2950_end_mask_0 = const()[name = tensor("op_2950_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2950_cast_fp16 = slice_by_index(begin = var_2950_begin_0, end = var_2950_end_0, end_mask = var_2950_end_mask_0, x = q_cast_fp16)[name = tensor("op_2950_cast_fp16")]; + tensor var_2954_begin_0 = const()[name = tensor("op_2954_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_2954_end_0 = const()[name = tensor("op_2954_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2954_end_mask_0 = const()[name = tensor("op_2954_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2954_cast_fp16 = slice_by_index(begin = var_2954_begin_0, end = var_2954_end_0, end_mask = var_2954_end_mask_0, x = q_cast_fp16)[name = tensor("op_2954_cast_fp16")]; + tensor var_2958_begin_0 = const()[name = tensor("op_2958_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2958_end_0 = const()[name = tensor("op_2958_end_0"), val = tensor([1, 448, 1, 64])]; + tensor var_2958_end_mask_0 = const()[name = tensor("op_2958_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2958_cast_fp16 = slice_by_index(begin = var_2958_begin_0, end = var_2958_end_0, end_mask = var_2958_end_mask_0, x = q_cast_fp16)[name = tensor("op_2958_cast_fp16")]; + tensor var_2962_begin_0 = const()[name = tensor("op_2962_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_2962_end_0 = const()[name = tensor("op_2962_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2962_end_mask_0 = const()[name = tensor("op_2962_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2962_cast_fp16 = slice_by_index(begin = var_2962_begin_0, end = var_2962_end_0, end_mask = var_2962_end_mask_0, x = q_cast_fp16)[name = tensor("op_2962_cast_fp16")]; + tensor var_2966_begin_0 = const()[name = tensor("op_2966_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2966_end_0 = const()[name = tensor("op_2966_end_0"), val = tensor([1, 576, 1, 64])]; + tensor var_2966_end_mask_0 = const()[name = tensor("op_2966_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2966_cast_fp16 = slice_by_index(begin = var_2966_begin_0, end = var_2966_end_0, end_mask = var_2966_end_mask_0, x = q_cast_fp16)[name = tensor("op_2966_cast_fp16")]; + tensor var_2970_begin_0 = const()[name = tensor("op_2970_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_2970_end_0 = const()[name = tensor("op_2970_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2970_end_mask_0 = const()[name = tensor("op_2970_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2970_cast_fp16 = slice_by_index(begin = var_2970_begin_0, end = var_2970_end_0, end_mask = var_2970_end_mask_0, x = q_cast_fp16)[name = tensor("op_2970_cast_fp16")]; + tensor var_2974_begin_0 = const()[name = tensor("op_2974_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2974_end_0 = const()[name = tensor("op_2974_end_0"), val = tensor([1, 704, 1, 64])]; + tensor var_2974_end_mask_0 = const()[name = tensor("op_2974_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2974_cast_fp16 = slice_by_index(begin = var_2974_begin_0, end = var_2974_end_0, end_mask = var_2974_end_mask_0, x = q_cast_fp16)[name = tensor("op_2974_cast_fp16")]; + tensor var_2978_begin_0 = const()[name = tensor("op_2978_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_2978_end_0 = const()[name = tensor("op_2978_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2978_end_mask_0 = const()[name = tensor("op_2978_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2978_cast_fp16 = slice_by_index(begin = var_2978_begin_0, end = var_2978_end_0, end_mask = var_2978_end_mask_0, x = q_cast_fp16)[name = tensor("op_2978_cast_fp16")]; + tensor var_2982_begin_0 = const()[name = tensor("op_2982_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2982_end_0 = const()[name = tensor("op_2982_end_0"), val = tensor([1, 832, 1, 64])]; + tensor var_2982_end_mask_0 = const()[name = tensor("op_2982_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2982_cast_fp16 = slice_by_index(begin = var_2982_begin_0, end = var_2982_end_0, end_mask = var_2982_end_mask_0, x = q_cast_fp16)[name = tensor("op_2982_cast_fp16")]; + tensor var_2986_begin_0 = const()[name = tensor("op_2986_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_2986_end_0 = const()[name = tensor("op_2986_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2986_end_mask_0 = const()[name = tensor("op_2986_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2986_cast_fp16 = slice_by_index(begin = var_2986_begin_0, end = var_2986_end_0, end_mask = var_2986_end_mask_0, x = q_cast_fp16)[name = tensor("op_2986_cast_fp16")]; + tensor var_2990_begin_0 = const()[name = tensor("op_2990_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2990_end_0 = const()[name = tensor("op_2990_end_0"), val = tensor([1, 960, 1, 64])]; + tensor var_2990_end_mask_0 = const()[name = tensor("op_2990_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2990_cast_fp16 = slice_by_index(begin = var_2990_begin_0, end = var_2990_end_0, end_mask = var_2990_end_mask_0, x = q_cast_fp16)[name = tensor("op_2990_cast_fp16")]; + tensor var_2994_begin_0 = const()[name = tensor("op_2994_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_2994_end_0 = const()[name = tensor("op_2994_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2994_end_mask_0 = const()[name = tensor("op_2994_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2994_cast_fp16 = slice_by_index(begin = var_2994_begin_0, end = var_2994_end_0, end_mask = var_2994_end_mask_0, x = q_cast_fp16)[name = tensor("op_2994_cast_fp16")]; + tensor var_2998_begin_0 = const()[name = tensor("op_2998_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2998_end_0 = const()[name = tensor("op_2998_end_0"), val = tensor([1, 1088, 1, 64])]; + tensor var_2998_end_mask_0 = const()[name = tensor("op_2998_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2998_cast_fp16 = slice_by_index(begin = var_2998_begin_0, end = var_2998_end_0, end_mask = var_2998_end_mask_0, x = q_cast_fp16)[name = tensor("op_2998_cast_fp16")]; + tensor var_3002_begin_0 = const()[name = tensor("op_3002_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_3002_end_0 = const()[name = tensor("op_3002_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_3002_end_mask_0 = const()[name = tensor("op_3002_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3002_cast_fp16 = slice_by_index(begin = var_3002_begin_0, end = var_3002_end_0, end_mask = var_3002_end_mask_0, x = q_cast_fp16)[name = tensor("op_3002_cast_fp16")]; + tensor var_3006_begin_0 = const()[name = tensor("op_3006_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_3006_end_0 = const()[name = tensor("op_3006_end_0"), val = tensor([1, 1216, 1, 64])]; + tensor var_3006_end_mask_0 = const()[name = tensor("op_3006_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3006_cast_fp16 = slice_by_index(begin = var_3006_begin_0, end = var_3006_end_0, end_mask = var_3006_end_mask_0, x = q_cast_fp16)[name = tensor("op_3006_cast_fp16")]; + tensor var_3010_begin_0 = const()[name = tensor("op_3010_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_3010_end_0 = const()[name = tensor("op_3010_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_3010_end_mask_0 = const()[name = tensor("op_3010_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3010_cast_fp16 = slice_by_index(begin = var_3010_begin_0, end = var_3010_end_0, end_mask = var_3010_end_mask_0, x = q_cast_fp16)[name = tensor("op_3010_cast_fp16")]; + tensor var_3014_begin_0 = const()[name = tensor("op_3014_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_3014_end_0 = const()[name = tensor("op_3014_end_0"), val = tensor([1, 1344, 1, 64])]; + tensor var_3014_end_mask_0 = const()[name = tensor("op_3014_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3014_cast_fp16 = slice_by_index(begin = var_3014_begin_0, end = var_3014_end_0, end_mask = var_3014_end_mask_0, x = q_cast_fp16)[name = tensor("op_3014_cast_fp16")]; + tensor var_3018_begin_0 = const()[name = tensor("op_3018_begin_0"), val = tensor([0, 1344, 0, 0])]; + tensor var_3018_end_0 = const()[name = tensor("op_3018_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_3018_end_mask_0 = const()[name = tensor("op_3018_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3018_cast_fp16 = slice_by_index(begin = var_3018_begin_0, end = var_3018_end_0, end_mask = var_3018_end_mask_0, x = q_cast_fp16)[name = tensor("op_3018_cast_fp16")]; + tensor var_3022_begin_0 = const()[name = tensor("op_3022_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_3022_end_0 = const()[name = tensor("op_3022_end_0"), val = tensor([1, 1472, 1, 64])]; + tensor var_3022_end_mask_0 = const()[name = tensor("op_3022_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3022_cast_fp16 = slice_by_index(begin = var_3022_begin_0, end = var_3022_end_0, end_mask = var_3022_end_mask_0, x = q_cast_fp16)[name = tensor("op_3022_cast_fp16")]; + tensor var_3026_begin_0 = const()[name = tensor("op_3026_begin_0"), val = tensor([0, 1472, 0, 0])]; + tensor var_3026_end_0 = const()[name = tensor("op_3026_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_3026_end_mask_0 = const()[name = tensor("op_3026_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3026_cast_fp16 = slice_by_index(begin = var_3026_begin_0, end = var_3026_end_0, end_mask = var_3026_end_mask_0, x = q_cast_fp16)[name = tensor("op_3026_cast_fp16")]; + tensor var_3030_begin_0 = const()[name = tensor("op_3030_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_3030_end_0 = const()[name = tensor("op_3030_end_0"), val = tensor([1, 1600, 1, 64])]; + tensor var_3030_end_mask_0 = const()[name = tensor("op_3030_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3030_cast_fp16 = slice_by_index(begin = var_3030_begin_0, end = var_3030_end_0, end_mask = var_3030_end_mask_0, x = q_cast_fp16)[name = tensor("op_3030_cast_fp16")]; + tensor var_3034_begin_0 = const()[name = tensor("op_3034_begin_0"), val = tensor([0, 1600, 0, 0])]; + tensor var_3034_end_0 = const()[name = tensor("op_3034_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_3034_end_mask_0 = const()[name = tensor("op_3034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3034_cast_fp16 = slice_by_index(begin = var_3034_begin_0, end = var_3034_end_0, end_mask = var_3034_end_mask_0, x = q_cast_fp16)[name = tensor("op_3034_cast_fp16")]; + tensor var_3038_begin_0 = const()[name = tensor("op_3038_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_3038_end_0 = const()[name = tensor("op_3038_end_0"), val = tensor([1, 1728, 1, 64])]; + tensor var_3038_end_mask_0 = const()[name = tensor("op_3038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3038_cast_fp16 = slice_by_index(begin = var_3038_begin_0, end = var_3038_end_0, end_mask = var_3038_end_mask_0, x = q_cast_fp16)[name = tensor("op_3038_cast_fp16")]; + tensor var_3042_begin_0 = const()[name = tensor("op_3042_begin_0"), val = tensor([0, 1728, 0, 0])]; + tensor var_3042_end_0 = const()[name = tensor("op_3042_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_3042_end_mask_0 = const()[name = tensor("op_3042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3042_cast_fp16 = slice_by_index(begin = var_3042_begin_0, end = var_3042_end_0, end_mask = var_3042_end_mask_0, x = q_cast_fp16)[name = tensor("op_3042_cast_fp16")]; + tensor var_3046_begin_0 = const()[name = tensor("op_3046_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_3046_end_0 = const()[name = tensor("op_3046_end_0"), val = tensor([1, 1856, 1, 64])]; + tensor var_3046_end_mask_0 = const()[name = tensor("op_3046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3046_cast_fp16 = slice_by_index(begin = var_3046_begin_0, end = var_3046_end_0, end_mask = var_3046_end_mask_0, x = q_cast_fp16)[name = tensor("op_3046_cast_fp16")]; + tensor var_3050_begin_0 = const()[name = tensor("op_3050_begin_0"), val = tensor([0, 1856, 0, 0])]; + tensor var_3050_end_0 = const()[name = tensor("op_3050_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_3050_end_mask_0 = const()[name = tensor("op_3050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3050_cast_fp16 = slice_by_index(begin = var_3050_begin_0, end = var_3050_end_0, end_mask = var_3050_end_mask_0, x = q_cast_fp16)[name = tensor("op_3050_cast_fp16")]; + tensor var_3054_begin_0 = const()[name = tensor("op_3054_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_3054_end_0 = const()[name = tensor("op_3054_end_0"), val = tensor([1, 1984, 1, 64])]; + tensor var_3054_end_mask_0 = const()[name = tensor("op_3054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3054_cast_fp16 = slice_by_index(begin = var_3054_begin_0, end = var_3054_end_0, end_mask = var_3054_end_mask_0, x = q_cast_fp16)[name = tensor("op_3054_cast_fp16")]; + tensor var_3058_begin_0 = const()[name = tensor("op_3058_begin_0"), val = tensor([0, 1984, 0, 0])]; + tensor var_3058_end_0 = const()[name = tensor("op_3058_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_3058_end_mask_0 = const()[name = tensor("op_3058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3058_cast_fp16 = slice_by_index(begin = var_3058_begin_0, end = var_3058_end_0, end_mask = var_3058_end_mask_0, x = q_cast_fp16)[name = tensor("op_3058_cast_fp16")]; + tensor var_3064_begin_0 = const()[name = tensor("op_3064_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3064_end_0 = const()[name = tensor("op_3064_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_3064_end_mask_0 = const()[name = tensor("op_3064_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3064_cast_fp16 = slice_by_index(begin = var_3064_begin_0, end = var_3064_end_0, end_mask = var_3064_end_mask_0, x = k_cast_fp16)[name = tensor("op_3064_cast_fp16")]; + tensor var_3080_begin_0 = const()[name = tensor("op_3080_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_3080_end_0 = const()[name = tensor("op_3080_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_3080_end_mask_0 = const()[name = tensor("op_3080_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3080_cast_fp16 = slice_by_index(begin = var_3080_begin_0, end = var_3080_end_0, end_mask = var_3080_end_mask_0, x = k_cast_fp16)[name = tensor("op_3080_cast_fp16")]; + tensor var_3096_begin_0 = const()[name = tensor("op_3096_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_3096_end_0 = const()[name = tensor("op_3096_end_0"), val = tensor([1, 512, 1, 192])]; + tensor var_3096_end_mask_0 = const()[name = tensor("op_3096_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3096_cast_fp16 = slice_by_index(begin = var_3096_begin_0, end = var_3096_end_0, end_mask = var_3096_end_mask_0, x = k_cast_fp16)[name = tensor("op_3096_cast_fp16")]; + tensor var_3112_begin_0 = const()[name = tensor("op_3112_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_3112_end_0 = const()[name = tensor("op_3112_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_3112_end_mask_0 = const()[name = tensor("op_3112_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3112_cast_fp16 = slice_by_index(begin = var_3112_begin_0, end = var_3112_end_0, end_mask = var_3112_end_mask_0, x = k_cast_fp16)[name = tensor("op_3112_cast_fp16")]; + tensor var_3128_begin_0 = const()[name = tensor("op_3128_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_3128_end_0 = const()[name = tensor("op_3128_end_0"), val = tensor([1, 512, 1, 320])]; + tensor var_3128_end_mask_0 = const()[name = tensor("op_3128_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3128_cast_fp16 = slice_by_index(begin = var_3128_begin_0, end = var_3128_end_0, end_mask = var_3128_end_mask_0, x = k_cast_fp16)[name = tensor("op_3128_cast_fp16")]; + tensor var_3144_begin_0 = const()[name = tensor("op_3144_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_3144_end_0 = const()[name = tensor("op_3144_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_3144_end_mask_0 = const()[name = tensor("op_3144_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3144_cast_fp16 = slice_by_index(begin = var_3144_begin_0, end = var_3144_end_0, end_mask = var_3144_end_mask_0, x = k_cast_fp16)[name = tensor("op_3144_cast_fp16")]; + tensor var_3160_begin_0 = const()[name = tensor("op_3160_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_3160_end_0 = const()[name = tensor("op_3160_end_0"), val = tensor([1, 512, 1, 448])]; + tensor var_3160_end_mask_0 = const()[name = tensor("op_3160_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3160_cast_fp16 = slice_by_index(begin = var_3160_begin_0, end = var_3160_end_0, end_mask = var_3160_end_mask_0, x = k_cast_fp16)[name = tensor("op_3160_cast_fp16")]; + tensor var_3176_begin_0 = const()[name = tensor("op_3176_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_3176_end_0 = const()[name = tensor("op_3176_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_3176_end_mask_0 = const()[name = tensor("op_3176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3176_cast_fp16 = slice_by_index(begin = var_3176_begin_0, end = var_3176_end_0, end_mask = var_3176_end_mask_0, x = k_cast_fp16)[name = tensor("op_3176_cast_fp16")]; + tensor var_3190_begin_0 = const()[name = tensor("op_3190_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3190_end_0 = const()[name = tensor("op_3190_end_0"), val = tensor([1, 64, 1, 512])]; + tensor var_3190_end_mask_0 = const()[name = tensor("op_3190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3190_cast_fp16 = slice_by_index(begin = var_3190_begin_0, end = var_3190_end_0, end_mask = var_3190_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_3190_cast_fp16")]; + tensor var_3206_begin_0 = const()[name = tensor("op_3206_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_3206_end_0 = const()[name = tensor("op_3206_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_3206_end_mask_0 = const()[name = tensor("op_3206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3206_cast_fp16 = slice_by_index(begin = var_3206_begin_0, end = var_3206_end_0, end_mask = var_3206_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_3206_cast_fp16")]; + tensor var_3222_begin_0 = const()[name = tensor("op_3222_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_3222_end_0 = const()[name = tensor("op_3222_end_0"), val = tensor([1, 192, 1, 512])]; + tensor var_3222_end_mask_0 = const()[name = tensor("op_3222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3222_cast_fp16 = slice_by_index(begin = var_3222_begin_0, end = var_3222_end_0, end_mask = var_3222_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_3222_cast_fp16")]; + tensor var_3238_begin_0 = const()[name = tensor("op_3238_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_3238_end_0 = const()[name = tensor("op_3238_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_3238_end_mask_0 = const()[name = tensor("op_3238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3238_cast_fp16 = slice_by_index(begin = var_3238_begin_0, end = var_3238_end_0, end_mask = var_3238_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_3238_cast_fp16")]; + tensor var_3254_begin_0 = const()[name = tensor("op_3254_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_3254_end_0 = const()[name = tensor("op_3254_end_0"), val = tensor([1, 320, 1, 512])]; + tensor var_3254_end_mask_0 = const()[name = tensor("op_3254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3254_cast_fp16 = slice_by_index(begin = var_3254_begin_0, end = var_3254_end_0, end_mask = var_3254_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_3254_cast_fp16")]; + tensor var_3270_begin_0 = const()[name = tensor("op_3270_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_3270_end_0 = const()[name = tensor("op_3270_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_3270_end_mask_0 = const()[name = tensor("op_3270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3270_cast_fp16 = slice_by_index(begin = var_3270_begin_0, end = var_3270_end_0, end_mask = var_3270_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_3270_cast_fp16")]; + tensor var_3286_begin_0 = const()[name = tensor("op_3286_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_3286_end_0 = const()[name = tensor("op_3286_end_0"), val = tensor([1, 448, 1, 512])]; + tensor var_3286_end_mask_0 = const()[name = tensor("op_3286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3286_cast_fp16 = slice_by_index(begin = var_3286_begin_0, end = var_3286_end_0, end_mask = var_3286_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_3286_cast_fp16")]; + tensor var_3302_begin_0 = const()[name = tensor("op_3302_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_3302_end_0 = const()[name = tensor("op_3302_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_3302_end_mask_0 = const()[name = tensor("op_3302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3302_cast_fp16 = slice_by_index(begin = var_3302_begin_0, end = var_3302_end_0, end_mask = var_3302_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_3302_cast_fp16")]; + tensor var_3318_equation_0 = const()[name = tensor("op_3318_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3318_cast_fp16 = einsum(equation = var_3318_equation_0, values = (var_3064_cast_fp16, var_2934_cast_fp16))[name = tensor("op_3318_cast_fp16")]; + tensor var_3319_to_fp16 = const()[name = tensor("op_3319_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3320_cast_fp16 = mul(x = var_3318_cast_fp16, y = var_3319_to_fp16)[name = tensor("op_3320_cast_fp16")]; + tensor var_3322_equation_0 = const()[name = tensor("op_3322_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3322_cast_fp16 = einsum(equation = var_3322_equation_0, values = (var_3064_cast_fp16, var_2938_cast_fp16))[name = tensor("op_3322_cast_fp16")]; + tensor var_3323_to_fp16 = const()[name = tensor("op_3323_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3324_cast_fp16 = mul(x = var_3322_cast_fp16, y = var_3323_to_fp16)[name = tensor("op_3324_cast_fp16")]; + tensor var_3326_equation_0 = const()[name = tensor("op_3326_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3326_cast_fp16 = einsum(equation = var_3326_equation_0, values = (var_3064_cast_fp16, var_2942_cast_fp16))[name = tensor("op_3326_cast_fp16")]; + tensor var_3327_to_fp16 = const()[name = tensor("op_3327_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3328_cast_fp16 = mul(x = var_3326_cast_fp16, y = var_3327_to_fp16)[name = tensor("op_3328_cast_fp16")]; + tensor var_3330_equation_0 = const()[name = tensor("op_3330_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3330_cast_fp16 = einsum(equation = var_3330_equation_0, values = (var_3064_cast_fp16, var_2946_cast_fp16))[name = tensor("op_3330_cast_fp16")]; + tensor var_3331_to_fp16 = const()[name = tensor("op_3331_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3332_cast_fp16 = mul(x = var_3330_cast_fp16, y = var_3331_to_fp16)[name = tensor("op_3332_cast_fp16")]; + tensor var_3334_equation_0 = const()[name = tensor("op_3334_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3334_cast_fp16 = einsum(equation = var_3334_equation_0, values = (var_3080_cast_fp16, var_2950_cast_fp16))[name = tensor("op_3334_cast_fp16")]; + tensor var_3335_to_fp16 = const()[name = tensor("op_3335_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3336_cast_fp16 = mul(x = var_3334_cast_fp16, y = var_3335_to_fp16)[name = tensor("op_3336_cast_fp16")]; + tensor var_3338_equation_0 = const()[name = tensor("op_3338_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3338_cast_fp16 = einsum(equation = var_3338_equation_0, values = (var_3080_cast_fp16, var_2954_cast_fp16))[name = tensor("op_3338_cast_fp16")]; + tensor var_3339_to_fp16 = const()[name = tensor("op_3339_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3340_cast_fp16 = mul(x = var_3338_cast_fp16, y = var_3339_to_fp16)[name = tensor("op_3340_cast_fp16")]; + tensor var_3342_equation_0 = const()[name = tensor("op_3342_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3342_cast_fp16 = einsum(equation = var_3342_equation_0, values = (var_3080_cast_fp16, var_2958_cast_fp16))[name = tensor("op_3342_cast_fp16")]; + tensor var_3343_to_fp16 = const()[name = tensor("op_3343_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3344_cast_fp16 = mul(x = var_3342_cast_fp16, y = var_3343_to_fp16)[name = tensor("op_3344_cast_fp16")]; + tensor var_3346_equation_0 = const()[name = tensor("op_3346_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3346_cast_fp16 = einsum(equation = var_3346_equation_0, values = (var_3080_cast_fp16, var_2962_cast_fp16))[name = tensor("op_3346_cast_fp16")]; + tensor var_3347_to_fp16 = const()[name = tensor("op_3347_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3348_cast_fp16 = mul(x = var_3346_cast_fp16, y = var_3347_to_fp16)[name = tensor("op_3348_cast_fp16")]; + tensor var_3350_equation_0 = const()[name = tensor("op_3350_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3350_cast_fp16 = einsum(equation = var_3350_equation_0, values = (var_3096_cast_fp16, var_2966_cast_fp16))[name = tensor("op_3350_cast_fp16")]; + tensor var_3351_to_fp16 = const()[name = tensor("op_3351_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3352_cast_fp16 = mul(x = var_3350_cast_fp16, y = var_3351_to_fp16)[name = tensor("op_3352_cast_fp16")]; + tensor var_3354_equation_0 = const()[name = tensor("op_3354_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3354_cast_fp16 = einsum(equation = var_3354_equation_0, values = (var_3096_cast_fp16, var_2970_cast_fp16))[name = tensor("op_3354_cast_fp16")]; + tensor var_3355_to_fp16 = const()[name = tensor("op_3355_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3356_cast_fp16 = mul(x = var_3354_cast_fp16, y = var_3355_to_fp16)[name = tensor("op_3356_cast_fp16")]; + tensor var_3358_equation_0 = const()[name = tensor("op_3358_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3358_cast_fp16 = einsum(equation = var_3358_equation_0, values = (var_3096_cast_fp16, var_2974_cast_fp16))[name = tensor("op_3358_cast_fp16")]; + tensor var_3359_to_fp16 = const()[name = tensor("op_3359_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3360_cast_fp16 = mul(x = var_3358_cast_fp16, y = var_3359_to_fp16)[name = tensor("op_3360_cast_fp16")]; + tensor var_3362_equation_0 = const()[name = tensor("op_3362_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3362_cast_fp16 = einsum(equation = var_3362_equation_0, values = (var_3096_cast_fp16, var_2978_cast_fp16))[name = tensor("op_3362_cast_fp16")]; + tensor var_3363_to_fp16 = const()[name = tensor("op_3363_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3364_cast_fp16 = mul(x = var_3362_cast_fp16, y = var_3363_to_fp16)[name = tensor("op_3364_cast_fp16")]; + tensor var_3366_equation_0 = const()[name = tensor("op_3366_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3366_cast_fp16 = einsum(equation = var_3366_equation_0, values = (var_3112_cast_fp16, var_2982_cast_fp16))[name = tensor("op_3366_cast_fp16")]; + tensor var_3367_to_fp16 = const()[name = tensor("op_3367_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3368_cast_fp16 = mul(x = var_3366_cast_fp16, y = var_3367_to_fp16)[name = tensor("op_3368_cast_fp16")]; + tensor var_3370_equation_0 = const()[name = tensor("op_3370_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3370_cast_fp16 = einsum(equation = var_3370_equation_0, values = (var_3112_cast_fp16, var_2986_cast_fp16))[name = tensor("op_3370_cast_fp16")]; + tensor var_3371_to_fp16 = const()[name = tensor("op_3371_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3372_cast_fp16 = mul(x = var_3370_cast_fp16, y = var_3371_to_fp16)[name = tensor("op_3372_cast_fp16")]; + tensor var_3374_equation_0 = const()[name = tensor("op_3374_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3374_cast_fp16 = einsum(equation = var_3374_equation_0, values = (var_3112_cast_fp16, var_2990_cast_fp16))[name = tensor("op_3374_cast_fp16")]; + tensor var_3375_to_fp16 = const()[name = tensor("op_3375_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3376_cast_fp16 = mul(x = var_3374_cast_fp16, y = var_3375_to_fp16)[name = tensor("op_3376_cast_fp16")]; + tensor var_3378_equation_0 = const()[name = tensor("op_3378_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3378_cast_fp16 = einsum(equation = var_3378_equation_0, values = (var_3112_cast_fp16, var_2994_cast_fp16))[name = tensor("op_3378_cast_fp16")]; + tensor var_3379_to_fp16 = const()[name = tensor("op_3379_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3380_cast_fp16 = mul(x = var_3378_cast_fp16, y = var_3379_to_fp16)[name = tensor("op_3380_cast_fp16")]; + tensor var_3382_equation_0 = const()[name = tensor("op_3382_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3382_cast_fp16 = einsum(equation = var_3382_equation_0, values = (var_3128_cast_fp16, var_2998_cast_fp16))[name = tensor("op_3382_cast_fp16")]; + tensor var_3383_to_fp16 = const()[name = tensor("op_3383_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3384_cast_fp16 = mul(x = var_3382_cast_fp16, y = var_3383_to_fp16)[name = tensor("op_3384_cast_fp16")]; + tensor var_3386_equation_0 = const()[name = tensor("op_3386_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3386_cast_fp16 = einsum(equation = var_3386_equation_0, values = (var_3128_cast_fp16, var_3002_cast_fp16))[name = tensor("op_3386_cast_fp16")]; + tensor var_3387_to_fp16 = const()[name = tensor("op_3387_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3388_cast_fp16 = mul(x = var_3386_cast_fp16, y = var_3387_to_fp16)[name = tensor("op_3388_cast_fp16")]; + tensor var_3390_equation_0 = const()[name = tensor("op_3390_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3390_cast_fp16 = einsum(equation = var_3390_equation_0, values = (var_3128_cast_fp16, var_3006_cast_fp16))[name = tensor("op_3390_cast_fp16")]; + tensor var_3391_to_fp16 = const()[name = tensor("op_3391_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3392_cast_fp16 = mul(x = var_3390_cast_fp16, y = var_3391_to_fp16)[name = tensor("op_3392_cast_fp16")]; + tensor var_3394_equation_0 = const()[name = tensor("op_3394_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3394_cast_fp16 = einsum(equation = var_3394_equation_0, values = (var_3128_cast_fp16, var_3010_cast_fp16))[name = tensor("op_3394_cast_fp16")]; + tensor var_3395_to_fp16 = const()[name = tensor("op_3395_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3396_cast_fp16 = mul(x = var_3394_cast_fp16, y = var_3395_to_fp16)[name = tensor("op_3396_cast_fp16")]; + tensor var_3398_equation_0 = const()[name = tensor("op_3398_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3398_cast_fp16 = einsum(equation = var_3398_equation_0, values = (var_3144_cast_fp16, var_3014_cast_fp16))[name = tensor("op_3398_cast_fp16")]; + tensor var_3399_to_fp16 = const()[name = tensor("op_3399_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3400_cast_fp16 = mul(x = var_3398_cast_fp16, y = var_3399_to_fp16)[name = tensor("op_3400_cast_fp16")]; + tensor var_3402_equation_0 = const()[name = tensor("op_3402_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3402_cast_fp16 = einsum(equation = var_3402_equation_0, values = (var_3144_cast_fp16, var_3018_cast_fp16))[name = tensor("op_3402_cast_fp16")]; + tensor var_3403_to_fp16 = const()[name = tensor("op_3403_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3404_cast_fp16 = mul(x = var_3402_cast_fp16, y = var_3403_to_fp16)[name = tensor("op_3404_cast_fp16")]; + tensor var_3406_equation_0 = const()[name = tensor("op_3406_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3406_cast_fp16 = einsum(equation = var_3406_equation_0, values = (var_3144_cast_fp16, var_3022_cast_fp16))[name = tensor("op_3406_cast_fp16")]; + tensor var_3407_to_fp16 = const()[name = tensor("op_3407_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3408_cast_fp16 = mul(x = var_3406_cast_fp16, y = var_3407_to_fp16)[name = tensor("op_3408_cast_fp16")]; + tensor var_3410_equation_0 = const()[name = tensor("op_3410_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3410_cast_fp16 = einsum(equation = var_3410_equation_0, values = (var_3144_cast_fp16, var_3026_cast_fp16))[name = tensor("op_3410_cast_fp16")]; + tensor var_3411_to_fp16 = const()[name = tensor("op_3411_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3412_cast_fp16 = mul(x = var_3410_cast_fp16, y = var_3411_to_fp16)[name = tensor("op_3412_cast_fp16")]; + tensor var_3414_equation_0 = const()[name = tensor("op_3414_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3414_cast_fp16 = einsum(equation = var_3414_equation_0, values = (var_3160_cast_fp16, var_3030_cast_fp16))[name = tensor("op_3414_cast_fp16")]; + tensor var_3415_to_fp16 = const()[name = tensor("op_3415_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3416_cast_fp16 = mul(x = var_3414_cast_fp16, y = var_3415_to_fp16)[name = tensor("op_3416_cast_fp16")]; + tensor var_3418_equation_0 = const()[name = tensor("op_3418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3418_cast_fp16 = einsum(equation = var_3418_equation_0, values = (var_3160_cast_fp16, var_3034_cast_fp16))[name = tensor("op_3418_cast_fp16")]; + tensor var_3419_to_fp16 = const()[name = tensor("op_3419_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3420_cast_fp16 = mul(x = var_3418_cast_fp16, y = var_3419_to_fp16)[name = tensor("op_3420_cast_fp16")]; + tensor var_3422_equation_0 = const()[name = tensor("op_3422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3422_cast_fp16 = einsum(equation = var_3422_equation_0, values = (var_3160_cast_fp16, var_3038_cast_fp16))[name = tensor("op_3422_cast_fp16")]; + tensor var_3423_to_fp16 = const()[name = tensor("op_3423_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3424_cast_fp16 = mul(x = var_3422_cast_fp16, y = var_3423_to_fp16)[name = tensor("op_3424_cast_fp16")]; + tensor var_3426_equation_0 = const()[name = tensor("op_3426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3426_cast_fp16 = einsum(equation = var_3426_equation_0, values = (var_3160_cast_fp16, var_3042_cast_fp16))[name = tensor("op_3426_cast_fp16")]; + tensor var_3427_to_fp16 = const()[name = tensor("op_3427_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3428_cast_fp16 = mul(x = var_3426_cast_fp16, y = var_3427_to_fp16)[name = tensor("op_3428_cast_fp16")]; + tensor var_3430_equation_0 = const()[name = tensor("op_3430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3430_cast_fp16 = einsum(equation = var_3430_equation_0, values = (var_3176_cast_fp16, var_3046_cast_fp16))[name = tensor("op_3430_cast_fp16")]; + tensor var_3431_to_fp16 = const()[name = tensor("op_3431_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3432_cast_fp16 = mul(x = var_3430_cast_fp16, y = var_3431_to_fp16)[name = tensor("op_3432_cast_fp16")]; + tensor var_3434_equation_0 = const()[name = tensor("op_3434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3434_cast_fp16 = einsum(equation = var_3434_equation_0, values = (var_3176_cast_fp16, var_3050_cast_fp16))[name = tensor("op_3434_cast_fp16")]; + tensor var_3435_to_fp16 = const()[name = tensor("op_3435_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3436_cast_fp16 = mul(x = var_3434_cast_fp16, y = var_3435_to_fp16)[name = tensor("op_3436_cast_fp16")]; + tensor var_3438_equation_0 = const()[name = tensor("op_3438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3438_cast_fp16 = einsum(equation = var_3438_equation_0, values = (var_3176_cast_fp16, var_3054_cast_fp16))[name = tensor("op_3438_cast_fp16")]; + tensor var_3439_to_fp16 = const()[name = tensor("op_3439_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3440_cast_fp16 = mul(x = var_3438_cast_fp16, y = var_3439_to_fp16)[name = tensor("op_3440_cast_fp16")]; + tensor var_3442_equation_0 = const()[name = tensor("op_3442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_3442_cast_fp16 = einsum(equation = var_3442_equation_0, values = (var_3176_cast_fp16, var_3058_cast_fp16))[name = tensor("op_3442_cast_fp16")]; + tensor var_3443_to_fp16 = const()[name = tensor("op_3443_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3444_cast_fp16 = mul(x = var_3442_cast_fp16, y = var_3443_to_fp16)[name = tensor("op_3444_cast_fp16")]; + tensor aw_193_cast_fp16 = add(x = var_3320_cast_fp16, y = mask)[name = tensor("aw_193_cast_fp16")]; + tensor aw_195_cast_fp16 = add(x = var_3324_cast_fp16, y = mask)[name = tensor("aw_195_cast_fp16")]; + tensor aw_197_cast_fp16 = add(x = var_3328_cast_fp16, y = mask)[name = tensor("aw_197_cast_fp16")]; + tensor aw_199_cast_fp16 = add(x = var_3332_cast_fp16, y = mask)[name = tensor("aw_199_cast_fp16")]; + tensor aw_201_cast_fp16 = add(x = var_3336_cast_fp16, y = mask)[name = tensor("aw_201_cast_fp16")]; + tensor aw_203_cast_fp16 = add(x = var_3340_cast_fp16, y = mask)[name = tensor("aw_203_cast_fp16")]; + tensor aw_205_cast_fp16 = add(x = var_3344_cast_fp16, y = mask)[name = tensor("aw_205_cast_fp16")]; + tensor aw_207_cast_fp16 = add(x = var_3348_cast_fp16, y = mask)[name = tensor("aw_207_cast_fp16")]; + tensor aw_209_cast_fp16 = add(x = var_3352_cast_fp16, y = mask)[name = tensor("aw_209_cast_fp16")]; + tensor aw_211_cast_fp16 = add(x = var_3356_cast_fp16, y = mask)[name = tensor("aw_211_cast_fp16")]; + tensor aw_213_cast_fp16 = add(x = var_3360_cast_fp16, y = mask)[name = tensor("aw_213_cast_fp16")]; + tensor aw_215_cast_fp16 = add(x = var_3364_cast_fp16, y = mask)[name = tensor("aw_215_cast_fp16")]; + tensor aw_217_cast_fp16 = add(x = var_3368_cast_fp16, y = mask)[name = tensor("aw_217_cast_fp16")]; + tensor aw_219_cast_fp16 = add(x = var_3372_cast_fp16, y = mask)[name = tensor("aw_219_cast_fp16")]; + tensor aw_221_cast_fp16 = add(x = var_3376_cast_fp16, y = mask)[name = tensor("aw_221_cast_fp16")]; + tensor aw_223_cast_fp16 = add(x = var_3380_cast_fp16, y = mask)[name = tensor("aw_223_cast_fp16")]; + tensor aw_225_cast_fp16 = add(x = var_3384_cast_fp16, y = mask)[name = tensor("aw_225_cast_fp16")]; + tensor aw_227_cast_fp16 = add(x = var_3388_cast_fp16, y = mask)[name = tensor("aw_227_cast_fp16")]; + tensor aw_229_cast_fp16 = add(x = var_3392_cast_fp16, y = mask)[name = tensor("aw_229_cast_fp16")]; + tensor aw_231_cast_fp16 = add(x = var_3396_cast_fp16, y = mask)[name = tensor("aw_231_cast_fp16")]; + tensor aw_233_cast_fp16 = add(x = var_3400_cast_fp16, y = mask)[name = tensor("aw_233_cast_fp16")]; + tensor aw_235_cast_fp16 = add(x = var_3404_cast_fp16, y = mask)[name = tensor("aw_235_cast_fp16")]; + tensor aw_237_cast_fp16 = add(x = var_3408_cast_fp16, y = mask)[name = tensor("aw_237_cast_fp16")]; + tensor aw_239_cast_fp16 = add(x = var_3412_cast_fp16, y = mask)[name = tensor("aw_239_cast_fp16")]; + tensor aw_241_cast_fp16 = add(x = var_3416_cast_fp16, y = mask)[name = tensor("aw_241_cast_fp16")]; + tensor aw_243_cast_fp16 = add(x = var_3420_cast_fp16, y = mask)[name = tensor("aw_243_cast_fp16")]; + tensor aw_245_cast_fp16 = add(x = var_3424_cast_fp16, y = mask)[name = tensor("aw_245_cast_fp16")]; + tensor aw_247_cast_fp16 = add(x = var_3428_cast_fp16, y = mask)[name = tensor("aw_247_cast_fp16")]; + tensor aw_249_cast_fp16 = add(x = var_3432_cast_fp16, y = mask)[name = tensor("aw_249_cast_fp16")]; + tensor aw_251_cast_fp16 = add(x = var_3436_cast_fp16, y = mask)[name = tensor("aw_251_cast_fp16")]; + tensor aw_253_cast_fp16 = add(x = var_3440_cast_fp16, y = mask)[name = tensor("aw_253_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_3444_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_3477_cast_fp16 = softmax(axis = var_2785, x = aw_193_cast_fp16)[name = tensor("op_3477_cast_fp16")]; + tensor var_3478_cast_fp16 = softmax(axis = var_2785, x = aw_195_cast_fp16)[name = tensor("op_3478_cast_fp16")]; + tensor var_3479_cast_fp16 = softmax(axis = var_2785, x = aw_197_cast_fp16)[name = tensor("op_3479_cast_fp16")]; + tensor var_3480_cast_fp16 = softmax(axis = var_2785, x = aw_199_cast_fp16)[name = tensor("op_3480_cast_fp16")]; + tensor var_3481_cast_fp16 = softmax(axis = var_2785, x = aw_201_cast_fp16)[name = tensor("op_3481_cast_fp16")]; + tensor var_3482_cast_fp16 = softmax(axis = var_2785, x = aw_203_cast_fp16)[name = tensor("op_3482_cast_fp16")]; + tensor var_3483_cast_fp16 = softmax(axis = var_2785, x = aw_205_cast_fp16)[name = tensor("op_3483_cast_fp16")]; + tensor var_3484_cast_fp16 = softmax(axis = var_2785, x = aw_207_cast_fp16)[name = tensor("op_3484_cast_fp16")]; + tensor var_3485_cast_fp16 = softmax(axis = var_2785, x = aw_209_cast_fp16)[name = tensor("op_3485_cast_fp16")]; + tensor var_3486_cast_fp16 = softmax(axis = var_2785, x = aw_211_cast_fp16)[name = tensor("op_3486_cast_fp16")]; + tensor var_3487_cast_fp16 = softmax(axis = var_2785, x = aw_213_cast_fp16)[name = tensor("op_3487_cast_fp16")]; + tensor var_3488_cast_fp16 = softmax(axis = var_2785, x = aw_215_cast_fp16)[name = tensor("op_3488_cast_fp16")]; + tensor var_3489_cast_fp16 = softmax(axis = var_2785, x = aw_217_cast_fp16)[name = tensor("op_3489_cast_fp16")]; + tensor var_3490_cast_fp16 = softmax(axis = var_2785, x = aw_219_cast_fp16)[name = tensor("op_3490_cast_fp16")]; + tensor var_3491_cast_fp16 = softmax(axis = var_2785, x = aw_221_cast_fp16)[name = tensor("op_3491_cast_fp16")]; + tensor var_3492_cast_fp16 = softmax(axis = var_2785, x = aw_223_cast_fp16)[name = tensor("op_3492_cast_fp16")]; + tensor var_3493_cast_fp16 = softmax(axis = var_2785, x = aw_225_cast_fp16)[name = tensor("op_3493_cast_fp16")]; + tensor var_3494_cast_fp16 = softmax(axis = var_2785, x = aw_227_cast_fp16)[name = tensor("op_3494_cast_fp16")]; + tensor var_3495_cast_fp16 = softmax(axis = var_2785, x = aw_229_cast_fp16)[name = tensor("op_3495_cast_fp16")]; + tensor var_3496_cast_fp16 = softmax(axis = var_2785, x = aw_231_cast_fp16)[name = tensor("op_3496_cast_fp16")]; + tensor var_3497_cast_fp16 = softmax(axis = var_2785, x = aw_233_cast_fp16)[name = tensor("op_3497_cast_fp16")]; + tensor var_3498_cast_fp16 = softmax(axis = var_2785, x = aw_235_cast_fp16)[name = tensor("op_3498_cast_fp16")]; + tensor var_3499_cast_fp16 = softmax(axis = var_2785, x = aw_237_cast_fp16)[name = tensor("op_3499_cast_fp16")]; + tensor var_3500_cast_fp16 = softmax(axis = var_2785, x = aw_239_cast_fp16)[name = tensor("op_3500_cast_fp16")]; + tensor var_3501_cast_fp16 = softmax(axis = var_2785, x = aw_241_cast_fp16)[name = tensor("op_3501_cast_fp16")]; + tensor var_3502_cast_fp16 = softmax(axis = var_2785, x = aw_243_cast_fp16)[name = tensor("op_3502_cast_fp16")]; + tensor var_3503_cast_fp16 = softmax(axis = var_2785, x = aw_245_cast_fp16)[name = tensor("op_3503_cast_fp16")]; + tensor var_3504_cast_fp16 = softmax(axis = var_2785, x = aw_247_cast_fp16)[name = tensor("op_3504_cast_fp16")]; + tensor var_3505_cast_fp16 = softmax(axis = var_2785, x = aw_249_cast_fp16)[name = tensor("op_3505_cast_fp16")]; + tensor var_3506_cast_fp16 = softmax(axis = var_2785, x = aw_251_cast_fp16)[name = tensor("op_3506_cast_fp16")]; + tensor var_3507_cast_fp16 = softmax(axis = var_2785, x = aw_253_cast_fp16)[name = tensor("op_3507_cast_fp16")]; + tensor var_3508_cast_fp16 = softmax(axis = var_2785, x = aw_cast_fp16)[name = tensor("op_3508_cast_fp16")]; + tensor var_3510_equation_0 = const()[name = tensor("op_3510_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3510_cast_fp16 = einsum(equation = var_3510_equation_0, values = (var_3190_cast_fp16, var_3477_cast_fp16))[name = tensor("op_3510_cast_fp16")]; + tensor var_3512_equation_0 = const()[name = tensor("op_3512_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3512_cast_fp16 = einsum(equation = var_3512_equation_0, values = (var_3190_cast_fp16, var_3478_cast_fp16))[name = tensor("op_3512_cast_fp16")]; + tensor var_3514_equation_0 = const()[name = tensor("op_3514_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3514_cast_fp16 = einsum(equation = var_3514_equation_0, values = (var_3190_cast_fp16, var_3479_cast_fp16))[name = tensor("op_3514_cast_fp16")]; + tensor var_3516_equation_0 = const()[name = tensor("op_3516_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3516_cast_fp16 = einsum(equation = var_3516_equation_0, values = (var_3190_cast_fp16, var_3480_cast_fp16))[name = tensor("op_3516_cast_fp16")]; + tensor var_3518_equation_0 = const()[name = tensor("op_3518_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3518_cast_fp16 = einsum(equation = var_3518_equation_0, values = (var_3206_cast_fp16, var_3481_cast_fp16))[name = tensor("op_3518_cast_fp16")]; + tensor var_3520_equation_0 = const()[name = tensor("op_3520_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3520_cast_fp16 = einsum(equation = var_3520_equation_0, values = (var_3206_cast_fp16, var_3482_cast_fp16))[name = tensor("op_3520_cast_fp16")]; + tensor var_3522_equation_0 = const()[name = tensor("op_3522_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3522_cast_fp16 = einsum(equation = var_3522_equation_0, values = (var_3206_cast_fp16, var_3483_cast_fp16))[name = tensor("op_3522_cast_fp16")]; + tensor var_3524_equation_0 = const()[name = tensor("op_3524_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3524_cast_fp16 = einsum(equation = var_3524_equation_0, values = (var_3206_cast_fp16, var_3484_cast_fp16))[name = tensor("op_3524_cast_fp16")]; + tensor var_3526_equation_0 = const()[name = tensor("op_3526_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3526_cast_fp16 = einsum(equation = var_3526_equation_0, values = (var_3222_cast_fp16, var_3485_cast_fp16))[name = tensor("op_3526_cast_fp16")]; + tensor var_3528_equation_0 = const()[name = tensor("op_3528_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3528_cast_fp16 = einsum(equation = var_3528_equation_0, values = (var_3222_cast_fp16, var_3486_cast_fp16))[name = tensor("op_3528_cast_fp16")]; + tensor var_3530_equation_0 = const()[name = tensor("op_3530_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3530_cast_fp16 = einsum(equation = var_3530_equation_0, values = (var_3222_cast_fp16, var_3487_cast_fp16))[name = tensor("op_3530_cast_fp16")]; + tensor var_3532_equation_0 = const()[name = tensor("op_3532_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3532_cast_fp16 = einsum(equation = var_3532_equation_0, values = (var_3222_cast_fp16, var_3488_cast_fp16))[name = tensor("op_3532_cast_fp16")]; + tensor var_3534_equation_0 = const()[name = tensor("op_3534_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3534_cast_fp16 = einsum(equation = var_3534_equation_0, values = (var_3238_cast_fp16, var_3489_cast_fp16))[name = tensor("op_3534_cast_fp16")]; + tensor var_3536_equation_0 = const()[name = tensor("op_3536_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3536_cast_fp16 = einsum(equation = var_3536_equation_0, values = (var_3238_cast_fp16, var_3490_cast_fp16))[name = tensor("op_3536_cast_fp16")]; + tensor var_3538_equation_0 = const()[name = tensor("op_3538_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3538_cast_fp16 = einsum(equation = var_3538_equation_0, values = (var_3238_cast_fp16, var_3491_cast_fp16))[name = tensor("op_3538_cast_fp16")]; + tensor var_3540_equation_0 = const()[name = tensor("op_3540_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3540_cast_fp16 = einsum(equation = var_3540_equation_0, values = (var_3238_cast_fp16, var_3492_cast_fp16))[name = tensor("op_3540_cast_fp16")]; + tensor var_3542_equation_0 = const()[name = tensor("op_3542_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3542_cast_fp16 = einsum(equation = var_3542_equation_0, values = (var_3254_cast_fp16, var_3493_cast_fp16))[name = tensor("op_3542_cast_fp16")]; + tensor var_3544_equation_0 = const()[name = tensor("op_3544_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3544_cast_fp16 = einsum(equation = var_3544_equation_0, values = (var_3254_cast_fp16, var_3494_cast_fp16))[name = tensor("op_3544_cast_fp16")]; + tensor var_3546_equation_0 = const()[name = tensor("op_3546_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3546_cast_fp16 = einsum(equation = var_3546_equation_0, values = (var_3254_cast_fp16, var_3495_cast_fp16))[name = tensor("op_3546_cast_fp16")]; + tensor var_3548_equation_0 = const()[name = tensor("op_3548_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3548_cast_fp16 = einsum(equation = var_3548_equation_0, values = (var_3254_cast_fp16, var_3496_cast_fp16))[name = tensor("op_3548_cast_fp16")]; + tensor var_3550_equation_0 = const()[name = tensor("op_3550_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3550_cast_fp16 = einsum(equation = var_3550_equation_0, values = (var_3270_cast_fp16, var_3497_cast_fp16))[name = tensor("op_3550_cast_fp16")]; + tensor var_3552_equation_0 = const()[name = tensor("op_3552_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3552_cast_fp16 = einsum(equation = var_3552_equation_0, values = (var_3270_cast_fp16, var_3498_cast_fp16))[name = tensor("op_3552_cast_fp16")]; + tensor var_3554_equation_0 = const()[name = tensor("op_3554_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3554_cast_fp16 = einsum(equation = var_3554_equation_0, values = (var_3270_cast_fp16, var_3499_cast_fp16))[name = tensor("op_3554_cast_fp16")]; + tensor var_3556_equation_0 = const()[name = tensor("op_3556_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3556_cast_fp16 = einsum(equation = var_3556_equation_0, values = (var_3270_cast_fp16, var_3500_cast_fp16))[name = tensor("op_3556_cast_fp16")]; + tensor var_3558_equation_0 = const()[name = tensor("op_3558_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3558_cast_fp16 = einsum(equation = var_3558_equation_0, values = (var_3286_cast_fp16, var_3501_cast_fp16))[name = tensor("op_3558_cast_fp16")]; + tensor var_3560_equation_0 = const()[name = tensor("op_3560_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3560_cast_fp16 = einsum(equation = var_3560_equation_0, values = (var_3286_cast_fp16, var_3502_cast_fp16))[name = tensor("op_3560_cast_fp16")]; + tensor var_3562_equation_0 = const()[name = tensor("op_3562_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3562_cast_fp16 = einsum(equation = var_3562_equation_0, values = (var_3286_cast_fp16, var_3503_cast_fp16))[name = tensor("op_3562_cast_fp16")]; + tensor var_3564_equation_0 = const()[name = tensor("op_3564_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3564_cast_fp16 = einsum(equation = var_3564_equation_0, values = (var_3286_cast_fp16, var_3504_cast_fp16))[name = tensor("op_3564_cast_fp16")]; + tensor var_3566_equation_0 = const()[name = tensor("op_3566_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3566_cast_fp16 = einsum(equation = var_3566_equation_0, values = (var_3302_cast_fp16, var_3505_cast_fp16))[name = tensor("op_3566_cast_fp16")]; + tensor var_3568_equation_0 = const()[name = tensor("op_3568_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3568_cast_fp16 = einsum(equation = var_3568_equation_0, values = (var_3302_cast_fp16, var_3506_cast_fp16))[name = tensor("op_3568_cast_fp16")]; + tensor var_3570_equation_0 = const()[name = tensor("op_3570_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3570_cast_fp16 = einsum(equation = var_3570_equation_0, values = (var_3302_cast_fp16, var_3507_cast_fp16))[name = tensor("op_3570_cast_fp16")]; + tensor var_3572_equation_0 = const()[name = tensor("op_3572_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3572_cast_fp16 = einsum(equation = var_3572_equation_0, values = (var_3302_cast_fp16, var_3508_cast_fp16))[name = tensor("op_3572_cast_fp16")]; + tensor x_75_interleave_0 = const()[name = tensor("x_75_interleave_0"), val = tensor(false)]; + tensor x_75_cast_fp16 = concat(axis = var_2785, interleave = x_75_interleave_0, values = (var_3510_cast_fp16, var_3512_cast_fp16, var_3514_cast_fp16, var_3516_cast_fp16, var_3518_cast_fp16, var_3520_cast_fp16, var_3522_cast_fp16, var_3524_cast_fp16, var_3526_cast_fp16, var_3528_cast_fp16, var_3530_cast_fp16, var_3532_cast_fp16, var_3534_cast_fp16, var_3536_cast_fp16, var_3538_cast_fp16, var_3540_cast_fp16, var_3542_cast_fp16, var_3544_cast_fp16, var_3546_cast_fp16, var_3548_cast_fp16, var_3550_cast_fp16, var_3552_cast_fp16, var_3554_cast_fp16, var_3556_cast_fp16, var_3558_cast_fp16, var_3560_cast_fp16, var_3562_cast_fp16, var_3564_cast_fp16, var_3566_cast_fp16, var_3568_cast_fp16, var_3570_cast_fp16, var_3572_cast_fp16))[name = tensor("x_75_cast_fp16")]; + tensor var_3577 = const()[name = tensor("op_3577"), val = tensor([1, 2048, -1, 8])]; + tensor input_33_cast_fp16 = reshape(shape = var_3577, x = x_75_cast_fp16)[name = tensor("input_33_cast_fp16")]; + tensor var_3580 = const()[name = tensor("op_3580"), val = tensor([1, 1])]; + tensor var_3582 = const()[name = tensor("op_3582"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_3_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_3_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(377519424)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_3582, groups = var_2785, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_3580, weight = blocks_3_attn_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attention_output_cast_fp16, y = x_61_cast_fp16)[name = tensor("x_77_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385908096)))]; + tensor x_eps_cast_fp16 = concat(axis = var_2785, interleave = x_eps_interleave_0, values = (x_77_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_2788, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_43_cast_fp16 = real_div(x = x_77_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_43_cast_fp16")]; + tensor var_3608_to_fp16 = const()[name = tensor("op_3608_to_fp16"), val = tensor(0x1.6ap+5)]; + tensor x_normed_45_cast_fp16 = mul(x = x_normed_43_cast_fp16, y = var_3608_to_fp16)[name = tensor("x_normed_45_cast_fp16")]; + tensor blocks_3_norm_2_weight_to_fp16 = const()[name = tensor("blocks_3_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385908288)))]; + tensor input_35_cast_fp16 = mul(x = x_normed_45_cast_fp16, y = blocks_3_norm_2_weight_to_fp16)[name = tensor("input_35_cast_fp16")]; + tensor var_3619 = const()[name = tensor("op_3619"), val = tensor([1, 1])]; + tensor var_3621 = const()[name = tensor("op_3621"), val = tensor([1, 1])]; + tensor input_37_pad_type_0 = const()[name = tensor("input_37_pad_type_0"), val = tensor("custom")]; + tensor input_37_pad_0 = const()[name = tensor("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_3_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_3_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385912448)))]; + tensor input_37_cast_fp16 = conv(dilations = var_3621, groups = var_2785, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = var_3619, weight = blocks_3_mlp_fc_1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor("input_37_cast_fp16")]; + tensor var_3625 = const()[name = tensor("op_3625"), val = tensor([1, 1])]; + tensor var_3627 = const()[name = tensor("op_3627"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_3_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_3_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(419466944)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_3627, groups = var_2785, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_3625, weight = blocks_3_mlp_fc_2_weight_to_fp16, x = input_35_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_3630_cast_fp16 = silu(x = input_37_cast_fp16)[name = tensor("op_3630_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_3630_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_3633 = const()[name = tensor("op_3633"), val = tensor([1, 1])]; + tensor var_3635 = const()[name = tensor("op_3635"), val = tensor([1, 1])]; + tensor var_3637_pad_type_0 = const()[name = tensor("op_3637_pad_type_0"), val = tensor("custom")]; + tensor var_3637_pad_0 = const()[name = tensor("op_3637_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_3_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_3_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453021440)))]; + tensor var_3637_cast_fp16 = conv(dilations = var_3635, groups = var_2785, pad = var_3637_pad_0, pad_type = var_3637_pad_type_0, strides = var_3633, weight = blocks_3_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_3637_cast_fp16")]; + tensor new_x = add(x = var_3637_cast_fp16, y = x_77_cast_fp16)[name = tensor("op_3638_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1, new_k_cache_2, new_v_cache_2, new_k_cache_3, new_v_cache_3); +} \ No newline at end of file