diff --git "a/SmolLM2-360M-Instruct-4bit.mlmodelc/model.mil" "b/SmolLM2-360M-Instruct-4bit.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/SmolLM2-360M-Instruct-4bit.mlmodelc/model.mil" @@ -0,0 +1,6642 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}})] +{ + func main(tensor causal_mask, tensor input_ids, state> key_cache, state> value_cache) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"causal_mask", [1, 1, 1, 1]}, {"input_ids", [1, 1]}}), ("RangeDims", {{"causal_mask", [[1, 1], [1, 1], [1, 2048], [1, 2048]]}, {"input_ids", [[1, 1], [1, 2048]]}})))] { + tensor var_7_shape_cast_fp16 = shape(x = causal_mask)[name = string("op_7_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_7_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_7_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(3)]; + tensor var_7_shape_cast_fp16_to_int16 = cast(dtype = var_7_shape_cast_fp16_to_int16_dtype_0, x = var_7_shape_cast_fp16)[name = string("cast_264")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_7_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor var_10_shape = shape(x = input_ids)[name = string("op_10_shape")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_10_shape_to_uint16_dtype_0 = const()[name = string("op_10_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_10_shape_to_uint16 = cast(dtype = var_10_shape_to_uint16_dtype_0, x = var_10_shape)[name = string("cast_263")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_10_shape_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_261")]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_262")]; + int32 past_seen_tokens = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("past_seen_tokens")]; + int32 var_85 = const()[name = string("op_85"), val = int32(-1)]; + int32 var_89 = const()[name = string("op_89"), val = int32(3)]; + int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)]; + int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)]; + bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)]; + tensor model_model_embed_tokens_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23593088))))[name = string("model_model_embed_tokens_weight_to_fp16_quantized")]; + tensor inputs_embeds_cast_fp16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = model_model_embed_tokens_weight_to_fp16_quantized)[name = string("inputs_embeds_cast_fp16")]; + tensor var_193_shape_cast_fp16 = shape(x = inputs_embeds_cast_fp16)[name = string("op_193_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_193_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_193_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_193_shape_cast_fp16_to_uint16 = cast(dtype = var_193_shape_cast_fp16_to_uint16_dtype_0, x = var_193_shape_cast_fp16)[name = string("cast_260")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_193_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_259")]; + int32 var_195 = add(x = past_seen_tokens, y = gather_2_cast_uint16_to_int32)[name = string("op_195")]; + int32 const_0 = const()[name = string("const_0"), val = int32(1)]; + tensor cache_position = range_1d(end = var_195, start = past_seen_tokens, step = const_0)[name = string("cache_position")]; + tensor position_ids_axes_0 = const()[name = string("position_ids_axes_0"), val = tensor([0])]; + tensor position_ids = expand_dims(axes = position_ids_axes_0, x = cache_position)[name = string("position_ids")]; + tensor var_208_axes_0 = const()[name = string("op_208_axes_0"), val = tensor([1])]; + tensor var_208 = expand_dims(axes = var_208_axes_0, x = position_ids)[name = string("op_208")]; + bool var_213_transpose_x_0 = const()[name = string("op_213_transpose_x_0"), val = bool(false)]; + bool var_213_transpose_y_0 = const()[name = string("op_213_transpose_y_0"), val = bool(false)]; + tensor const_2_to_fp16 = const()[name = string("const_2_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26542272)))]; + string cast_2_to_fp16_dtype_0 = const()[name = string("cast_2_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_208_to_fp16 = cast(dtype = cast_2_to_fp16_dtype_0, x = var_208)[name = string("cast_258")]; + tensor var_213_cast_fp16 = matmul(transpose_x = var_213_transpose_x_0, transpose_y = var_213_transpose_y_0, x = const_2_to_fp16, y = var_208_to_fp16)[name = string("op_213_cast_fp16")]; + tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; + bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; + tensor freqs_cast_fp16 = transpose(perm = freqs_perm_0, x = var_213_cast_fp16)[name = string("transpose_128")]; + tensor emb_cast_fp16 = concat(axis = var_85, interleave = emb_interleave_0, values = (freqs_cast_fp16, freqs_cast_fp16))[name = string("emb_cast_fp16")]; + tensor cos_1_cast_fp16 = cos(x = emb_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor sin_1_cast_fp16 = sin(x = emb_cast_fp16)[name = string("sin_1_cast_fp16")]; + fp16 var_80_promoted_to_fp16 = const()[name = string("op_80_promoted_to_fp16"), val = fp16(0x1p+1)]; + tensor var_234_cast_fp16 = pow(x = inputs_embeds_cast_fp16, y = var_80_promoted_to_fp16)[name = string("op_234_cast_fp16")]; + tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; + bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; + tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_234_cast_fp16)[name = string("variance_1_cast_fp16")]; + fp16 var_237_to_fp16 = const()[name = string("op_237_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_238_cast_fp16 = add(x = variance_1_cast_fp16, y = var_237_to_fp16)[name = string("op_238_cast_fp16")]; + fp32 var_239_epsilon_0 = const()[name = string("op_239_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_239_cast_fp16 = rsqrt(epsilon = var_239_epsilon_0, x = var_238_cast_fp16)[name = string("op_239_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = mul(x = inputs_embeds_cast_fp16, y = var_239_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26542400)))]; + tensor hidden_states_7_cast_fp16 = mul(x = model_model_layers_0_input_layernorm_weight_to_fp16, y = hidden_states_3_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor var_250_shape_cast_fp16 = shape(x = hidden_states_7_cast_fp16)[name = string("op_250_shape_cast_fp16")]; + int32 gather_4 = const()[name = string("gather_4"), val = int32(1)]; + int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)]; + int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)]; + bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)]; + string var_250_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_250_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)]; + tensor var_250_shape_cast_fp16_to_uint16 = cast(dtype = var_250_shape_cast_fp16_to_uint16_dtype_0, x = var_250_shape_cast_fp16)[name = string("cast_257")]; + uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_250_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")]; + string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26544384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27005248))))[name = string("model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27062912)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27064896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27218560))))[name = string("model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27237824)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27238528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27392192))))[name = string("model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor concat_0x = const()[name = string("concat_0x"), val = tensor([1, -1, 15, 64])]; + tensor var_259_cast_fp16 = reshape(shape = concat_0x, x = linear_0_cast_fp16)[name = string("op_259_cast_fp16")]; + tensor q_1_perm_0 = const()[name = string("q_1_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_1x = const()[name = string("concat_1x"), val = tensor([1, -1, 5, 64])]; + tensor var_262_cast_fp16 = reshape(shape = concat_1x, x = linear_1_cast_fp16)[name = string("op_262_cast_fp16")]; + tensor k_1_perm_0 = const()[name = string("k_1_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_2x = const()[name = string("concat_2x"), val = tensor([1, -1, 5, 64])]; + tensor var_265_cast_fp16 = reshape(shape = concat_2x, x = linear_2_cast_fp16)[name = string("op_265_cast_fp16")]; + tensor v_state_1_perm_0 = const()[name = string("v_state_1_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor cos_7_axes_0 = const()[name = string("cos_7_axes_0"), val = tensor([1])]; + tensor cos_7_cast_fp16 = expand_dims(axes = cos_7_axes_0, x = cos_1_cast_fp16)[name = string("cos_7_cast_fp16")]; + tensor sin_7_axes_0 = const()[name = string("sin_7_axes_0"), val = tensor([1])]; + tensor sin_7_cast_fp16 = expand_dims(axes = sin_7_axes_0, x = sin_1_cast_fp16)[name = string("sin_7_cast_fp16")]; + tensor q_1_cast_fp16 = transpose(perm = q_1_perm_0, x = var_259_cast_fp16)[name = string("transpose_127")]; + tensor var_269_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_7_cast_fp16)[name = string("op_269_cast_fp16")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; + fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_280_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_280_cast_fp16")]; + bool var_282_interleave_0 = const()[name = string("op_282_interleave_0"), val = bool(false)]; + tensor var_282_cast_fp16 = concat(axis = var_85, interleave = var_282_interleave_0, values = (var_280_cast_fp16, x1_1_cast_fp16))[name = string("op_282_cast_fp16")]; + tensor var_283_cast_fp16 = mul(x = var_282_cast_fp16, y = sin_7_cast_fp16)[name = string("op_283_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_269_cast_fp16, y = var_283_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = var_262_cast_fp16)[name = string("transpose_126")]; + tensor var_285_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_7_cast_fp16)[name = string("op_285_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_296_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_296_cast_fp16")]; + bool var_298_interleave_0 = const()[name = string("op_298_interleave_0"), val = bool(false)]; + tensor var_298_cast_fp16 = concat(axis = var_85, interleave = var_298_interleave_0, values = (var_296_cast_fp16, x1_3_cast_fp16))[name = string("op_298_cast_fp16")]; + tensor var_299_cast_fp16 = mul(x = var_298_cast_fp16, y = sin_7_cast_fp16)[name = string("op_299_cast_fp16")]; + tensor k_state_1_cast_fp16 = add(x = var_285_cast_fp16, y = var_299_cast_fp16)[name = string("k_state_1_cast_fp16")]; + tensor var_301_shape = shape(x = cache_position)[name = string("op_301_shape")]; + int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; + int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; + bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; + string var_301_shape_to_uint16_dtype_0 = const()[name = string("op_301_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(0)]; + tensor var_301_shape_to_uint16 = cast(dtype = var_301_shape_to_uint16_dtype_0, x = var_301_shape)[name = string("cast_256")]; + uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_301_shape_to_uint16)[name = string("gather_10_cast_uint16")]; + string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_255")]; + int32 end_1 = add(x = past_seen_tokens, y = gather_10_cast_uint16_to_int32)[name = string("end_1")]; + tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_2_axes_0 = const()[name = string("expand_dims_2_axes_0"), val = tensor([0])]; + tensor expand_dims_2 = expand_dims(axes = expand_dims_2_axes_0, x = past_seen_tokens)[name = string("expand_dims_2")]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([5])]; + tensor expand_dims_5_axes_0 = const()[name = string("expand_dims_5_axes_0"), val = tensor([0])]; + tensor expand_dims_5 = expand_dims(axes = expand_dims_5_axes_0, x = end_1)[name = string("expand_dims_5")]; + tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; + int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; + bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; + tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, expand_dims_0, expand_dims_1, expand_dims_2, expand_dims_3))[name = string("concat_5")]; + tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; + tensor concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor([0])]; + tensor concat_6_values4_0 = const()[name = string("concat_6_values4_0"), val = tensor([0])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_4, expand_dims_5, concat_6_values4_0))[name = string("concat_6")]; + tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = k_state_1_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_64_write_state")]; + tensor coreml_update_state_64 = read_state(input = key_cache)[name = string("coreml_update_state_64")]; + tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; + tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_1_cast_fp16 = transpose(perm = v_state_1_perm_0, x = var_265_cast_fp16)[name = string("transpose_125")]; + tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = v_state_1_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_65_write_state")]; + tensor coreml_update_state_65 = read_state(input = value_cache)[name = string("coreml_update_state_65")]; + tensor var_322_begin_0 = const()[name = string("op_322_begin_0"), val = tensor([0, 0, 0, 0, 0])]; + tensor var_322_end_0 = const()[name = string("op_322_end_0"), val = tensor([1, 1, 5, 2048, 64])]; + tensor var_322_end_mask_0 = const()[name = string("op_322_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_322_squeeze_mask_0 = const()[name = string("op_322_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, squeeze_mask = var_322_squeeze_mask_0, x = coreml_update_state_64)[name = string("op_322_cast_fp16")]; + int32 concat_11_values0_0 = const()[name = string("concat_11_values0_0"), val = int32(1)]; + int32 concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = int32(5)]; + int32 concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = int32(64)]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (concat_11_values0_0, concat_11_values1_0, end_1, concat_11_values3_0))[name = string("concat_11")]; + tensor var_325_begin_0 = const()[name = string("op_325_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_325_end_mask_0 = const()[name = string("op_325_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = concat_11, end_mask = var_325_end_mask_0, x = var_322_cast_fp16)[name = string("op_325_cast_fp16")]; + tensor var_327_begin_0 = const()[name = string("op_327_begin_0"), val = tensor([0, 0, 0, 0, 0])]; + tensor var_327_end_0 = const()[name = string("op_327_end_0"), val = tensor([1, 1, 5, 2048, 64])]; + tensor var_327_end_mask_0 = const()[name = string("op_327_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_327_squeeze_mask_0 = const()[name = string("op_327_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_327_cast_fp16 = slice_by_index(begin = var_327_begin_0, end = var_327_end_0, end_mask = var_327_end_mask_0, squeeze_mask = var_327_squeeze_mask_0, x = coreml_update_state_65)[name = string("op_327_cast_fp16")]; + tensor var_330_begin_0 = const()[name = string("op_330_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_330_end_mask_0 = const()[name = string("op_330_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = concat_11, end_mask = var_330_end_mask_0, x = var_327_cast_fp16)[name = string("op_330_cast_fp16")]; + tensor var_332_shape_cast_fp16 = shape(x = var_325_cast_fp16)[name = string("op_332_shape_cast_fp16")]; + int32 gather_13 = const()[name = string("gather_13"), val = int32(1)]; + int32 gather_14 = const()[name = string("gather_14"), val = int32(5)]; + int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)]; + int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)]; + bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)]; + string var_332_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_332_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(2)]; + tensor var_332_shape_cast_fp16_to_uint16 = cast(dtype = var_332_shape_cast_fp16_to_uint16_dtype_0, x = var_332_shape_cast_fp16)[name = string("cast_254")]; + uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_332_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")]; + string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_16 = const()[name = string("gather_16"), val = int32(64)]; + tensor var_339_axes_0 = const()[name = string("op_339_axes_0"), val = tensor([2])]; + tensor var_339_cast_fp16 = expand_dims(axes = var_339_axes_0, x = var_325_cast_fp16)[name = string("op_339_cast_fp16")]; + tensor shape_17_cast_fp16 = shape(x = var_339_cast_fp16)[name = string("shape_17_cast_fp16")]; + int32 concat_13_axis_0 = const()[name = string("concat_13_axis_0"), val = int32(0)]; + bool concat_13_interleave_0 = const()[name = string("concat_13_interleave_0"), val = bool(false)]; + int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_253")]; + tensor concat_13 = concat(axis = concat_13_axis_0, interleave = concat_13_interleave_0, values = (gather_13, gather_14, var_89, gather_15_cast_uint16_to_int32, gather_16))[name = string("concat_13")]; + tensor real_div_0 = real_div(x = concat_13, y = shape_17_cast_fp16)[name = string("real_div_0")]; + tensor hidden_states_11_cast_fp16 = tile(reps = real_div_0, x = var_339_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_3_cast_fp16 = reshape(shape = concat_14x, x = hidden_states_11_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor var_349_shape_cast_fp16 = shape(x = var_330_cast_fp16)[name = string("op_349_shape_cast_fp16")]; + int32 gather_17 = const()[name = string("gather_17"), val = int32(1)]; + int32 gather_18 = const()[name = string("gather_18"), val = int32(5)]; + int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)]; + int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)]; + bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)]; + string var_349_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_349_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(2)]; + tensor var_349_shape_cast_fp16_to_uint16 = cast(dtype = var_349_shape_cast_fp16_to_uint16_dtype_0, x = var_349_shape_cast_fp16)[name = string("cast_252")]; + uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_349_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")]; + string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_20 = const()[name = string("gather_20"), val = int32(64)]; + tensor var_356_axes_0 = const()[name = string("op_356_axes_0"), val = tensor([2])]; + tensor var_356_cast_fp16 = expand_dims(axes = var_356_axes_0, x = var_330_cast_fp16)[name = string("op_356_cast_fp16")]; + tensor shape_22_cast_fp16 = shape(x = var_356_cast_fp16)[name = string("shape_22_cast_fp16")]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_251")]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_17, gather_18, var_89, gather_19_cast_uint16_to_int32, gather_20))[name = string("concat_15")]; + tensor real_div_1 = real_div(x = concat_15, y = shape_22_cast_fp16)[name = string("real_div_1")]; + tensor hidden_states_15_cast_fp16 = tile(reps = real_div_1, x = var_356_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor concat_16x = const()[name = string("concat_16x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_3_cast_fp16 = reshape(shape = concat_16x, x = hidden_states_15_cast_fp16)[name = string("value_states_3_cast_fp16")]; + tensor var_366_shape_cast_fp16 = shape(x = key_states_3_cast_fp16)[name = string("op_366_shape_cast_fp16")]; + int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)]; + int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)]; + bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)]; + string var_366_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_366_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(2)]; + tensor var_366_shape_cast_fp16_to_uint16 = cast(dtype = var_366_shape_cast_fp16_to_uint16_dtype_0, x = var_366_shape_cast_fp16)[name = string("cast_250")]; + uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_366_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")]; + string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_17_values0_0 = const()[name = string("concat_17_values0_0"), val = int32(1)]; + int32 concat_17_values1_0 = const()[name = string("concat_17_values1_0"), val = int32(1)]; + int32 concat_17_values2_0 = const()[name = string("concat_17_values2_0"), val = int32(0)]; + int32 concat_17_axis_0 = const()[name = string("concat_17_axis_0"), val = int32(0)]; + bool concat_17_interleave_0 = const()[name = string("concat_17_interleave_0"), val = bool(false)]; + int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_249")]; + tensor concat_17 = concat(axis = concat_17_axis_0, interleave = concat_17_interleave_0, values = (concat_17_values0_0, concat_17_values1_0, concat_17_values2_0, gather_21_cast_uint16_to_int32))[name = string("concat_17")]; + tensor causal_mask_3_begin_0 = const()[name = string("causal_mask_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_3_end_mask_0 = const()[name = string("causal_mask_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_3_cast_fp16 = slice_by_index(begin = causal_mask_3_begin_0, end = concat_17, end_mask = causal_mask_3_end_mask_0, x = causal_mask)[name = string("causal_mask_3_cast_fp16")]; + tensor attn_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_3_cast_fp16, key = key_states_3_cast_fp16, query = query_states_3_cast_fp16, value = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_372_perm_0 = const()[name = string("op_372_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_248")]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (gather_4, gather_5_cast_uint16_to_int32, var_85))[name = string("concat_18")]; + tensor var_372_cast_fp16 = transpose(perm = var_372_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_124")]; + tensor input_1_cast_fp16 = reshape(shape = concat_18, x = var_372_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27411456))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27872320))))[name = string("model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized, x = input_1_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = add(x = inputs_embeds_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; + fp16 var_80_promoted_1_to_fp16 = const()[name = string("op_80_promoted_1_to_fp16"), val = fp16(0x1p+1)]; + tensor var_381_cast_fp16 = pow(x = hidden_states_19_cast_fp16, y = var_80_promoted_1_to_fp16)[name = string("op_381_cast_fp16")]; + tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([-1])]; + bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; + tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_381_cast_fp16)[name = string("variance_3_cast_fp16")]; + fp16 var_384_to_fp16 = const()[name = string("op_384_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_385_cast_fp16 = add(x = variance_3_cast_fp16, y = var_384_to_fp16)[name = string("op_385_cast_fp16")]; + fp32 var_386_epsilon_0 = const()[name = string("op_386_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_386_cast_fp16 = rsqrt(epsilon = var_386_epsilon_0, x = var_385_cast_fp16)[name = string("op_386_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = mul(x = hidden_states_19_cast_fp16, y = var_386_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27929984)))]; + tensor input_3_cast_fp16 = mul(x = model_model_layers_0_post_attention_layernorm_weight_to_fp16, y = hidden_states_23_cast_fp16)[name = string("input_3_cast_fp16")]; + tensor model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27931968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29160832))))[name = string("model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29314496)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized, x = input_3_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor var_398_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_398_cast_fp16")]; + tensor model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29319680))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30548544))))[name = string("model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized, x = input_3_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_398_cast_fp16, y = linear_5_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30702208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31931072))))[name = string("model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + fp16 var_80_promoted_2_to_fp16 = const()[name = string("op_80_promoted_2_to_fp16"), val = fp16(0x1p+1)]; + tensor var_411_cast_fp16 = pow(x = hidden_states_29_cast_fp16, y = var_80_promoted_2_to_fp16)[name = string("op_411_cast_fp16")]; + tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([-1])]; + bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; + tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_411_cast_fp16)[name = string("variance_5_cast_fp16")]; + fp16 var_414_to_fp16 = const()[name = string("op_414_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_415_cast_fp16 = add(x = variance_5_cast_fp16, y = var_414_to_fp16)[name = string("op_415_cast_fp16")]; + fp32 var_416_epsilon_0 = const()[name = string("op_416_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_416_cast_fp16 = rsqrt(epsilon = var_416_epsilon_0, x = var_415_cast_fp16)[name = string("op_416_cast_fp16")]; + tensor hidden_states_33_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = var_416_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32084736)))]; + tensor hidden_states_37_cast_fp16 = mul(x = model_model_layers_1_input_layernorm_weight_to_fp16, y = hidden_states_33_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor var_427_shape_cast_fp16 = shape(x = hidden_states_37_cast_fp16)[name = string("op_427_shape_cast_fp16")]; + int32 gather_22 = const()[name = string("gather_22"), val = int32(1)]; + int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)]; + int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)]; + bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)]; + string var_427_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_427_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)]; + tensor var_427_shape_cast_fp16_to_uint16 = cast(dtype = var_427_shape_cast_fp16_to_uint16_dtype_0, x = var_427_shape_cast_fp16)[name = string("cast_247")]; + uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_427_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")]; + string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32086720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32547584))))[name = string("model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32605248))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32758912))))[name = string("model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32778176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32931840))))[name = string("model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor concat_19x = const()[name = string("concat_19x"), val = tensor([1, -1, 15, 64])]; + tensor var_436_cast_fp16 = reshape(shape = concat_19x, x = linear_7_cast_fp16)[name = string("op_436_cast_fp16")]; + tensor q_3_perm_0 = const()[name = string("q_3_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_20x = const()[name = string("concat_20x"), val = tensor([1, -1, 5, 64])]; + tensor var_439_cast_fp16 = reshape(shape = concat_20x, x = linear_8_cast_fp16)[name = string("op_439_cast_fp16")]; + tensor k_3_perm_0 = const()[name = string("k_3_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_21x = const()[name = string("concat_21x"), val = tensor([1, -1, 5, 64])]; + tensor var_442_cast_fp16 = reshape(shape = concat_21x, x = linear_9_cast_fp16)[name = string("op_442_cast_fp16")]; + tensor v_state_3_perm_0 = const()[name = string("v_state_3_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_3_cast_fp16 = transpose(perm = q_3_perm_0, x = var_436_cast_fp16)[name = string("transpose_123")]; + tensor var_446_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_7_cast_fp16)[name = string("op_446_cast_fp16")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; + fp16 const_5_promoted_to_fp16 = const()[name = string("const_5_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_457_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_5_promoted_to_fp16)[name = string("op_457_cast_fp16")]; + bool var_459_interleave_0 = const()[name = string("op_459_interleave_0"), val = bool(false)]; + tensor var_459_cast_fp16 = concat(axis = var_85, interleave = var_459_interleave_0, values = (var_457_cast_fp16, x1_5_cast_fp16))[name = string("op_459_cast_fp16")]; + tensor var_460_cast_fp16 = mul(x = var_459_cast_fp16, y = sin_7_cast_fp16)[name = string("op_460_cast_fp16")]; + tensor query_states_7_cast_fp16 = add(x = var_446_cast_fp16, y = var_460_cast_fp16)[name = string("query_states_7_cast_fp16")]; + tensor k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = var_439_cast_fp16)[name = string("transpose_122")]; + tensor var_462_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_7_cast_fp16)[name = string("op_462_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; + fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_473_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_473_cast_fp16")]; + bool var_475_interleave_0 = const()[name = string("op_475_interleave_0"), val = bool(false)]; + tensor var_475_cast_fp16 = concat(axis = var_85, interleave = var_475_interleave_0, values = (var_473_cast_fp16, x1_7_cast_fp16))[name = string("op_475_cast_fp16")]; + tensor var_476_cast_fp16 = mul(x = var_475_cast_fp16, y = sin_7_cast_fp16)[name = string("op_476_cast_fp16")]; + tensor k_state_3_cast_fp16 = add(x = var_462_cast_fp16, y = var_476_cast_fp16)[name = string("k_state_3_cast_fp16")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor([1])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, expand_dims_12, expand_dims_13, expand_dims_2, expand_dims_15))[name = string("concat_24")]; + tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_24, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = k_state_3_cast_fp16, x = coreml_update_state_64)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_66_write_state")]; + tensor coreml_update_state_66 = read_state(input = key_cache)[name = string("coreml_update_state_66")]; + tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_3_cast_fp16 = transpose(perm = v_state_3_perm_0, x = var_442_cast_fp16)[name = string("transpose_121")]; + tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_24, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = v_state_3_cast_fp16, x = coreml_update_state_65)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_67_write_state")]; + tensor coreml_update_state_67 = read_state(input = value_cache)[name = string("coreml_update_state_67")]; + tensor var_499_begin_0 = const()[name = string("op_499_begin_0"), val = tensor([1, 0, 0, 0, 0])]; + tensor var_499_end_0 = const()[name = string("op_499_end_0"), val = tensor([2, 1, 5, 2048, 64])]; + tensor var_499_end_mask_0 = const()[name = string("op_499_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_499_squeeze_mask_0 = const()[name = string("op_499_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_499_cast_fp16 = slice_by_index(begin = var_499_begin_0, end = var_499_end_0, end_mask = var_499_end_mask_0, squeeze_mask = var_499_squeeze_mask_0, x = coreml_update_state_66)[name = string("op_499_cast_fp16")]; + tensor var_502_begin_0 = const()[name = string("op_502_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_end_mask_0 = const()[name = string("op_502_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = concat_11, end_mask = var_502_end_mask_0, x = var_499_cast_fp16)[name = string("op_502_cast_fp16")]; + tensor var_504_begin_0 = const()[name = string("op_504_begin_0"), val = tensor([1, 0, 0, 0, 0])]; + tensor var_504_end_0 = const()[name = string("op_504_end_0"), val = tensor([2, 1, 5, 2048, 64])]; + tensor var_504_end_mask_0 = const()[name = string("op_504_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_504_squeeze_mask_0 = const()[name = string("op_504_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_504_cast_fp16 = slice_by_index(begin = var_504_begin_0, end = var_504_end_0, end_mask = var_504_end_mask_0, squeeze_mask = var_504_squeeze_mask_0, x = coreml_update_state_67)[name = string("op_504_cast_fp16")]; + tensor var_507_begin_0 = const()[name = string("op_507_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_507_end_mask_0 = const()[name = string("op_507_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_507_cast_fp16 = slice_by_index(begin = var_507_begin_0, end = concat_11, end_mask = var_507_end_mask_0, x = var_504_cast_fp16)[name = string("op_507_cast_fp16")]; + tensor var_509_shape_cast_fp16 = shape(x = var_502_cast_fp16)[name = string("op_509_shape_cast_fp16")]; + int32 gather_31 = const()[name = string("gather_31"), val = int32(1)]; + int32 gather_32 = const()[name = string("gather_32"), val = int32(5)]; + int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)]; + int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)]; + bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)]; + string var_509_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_509_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(2)]; + tensor var_509_shape_cast_fp16_to_uint16 = cast(dtype = var_509_shape_cast_fp16_to_uint16_dtype_0, x = var_509_shape_cast_fp16)[name = string("cast_246")]; + uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_509_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")]; + string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_34 = const()[name = string("gather_34"), val = int32(64)]; + tensor var_516_axes_0 = const()[name = string("op_516_axes_0"), val = tensor([2])]; + tensor var_516_cast_fp16 = expand_dims(axes = var_516_axes_0, x = var_502_cast_fp16)[name = string("op_516_cast_fp16")]; + tensor shape_37_cast_fp16 = shape(x = var_516_cast_fp16)[name = string("shape_37_cast_fp16")]; + int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; + bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; + int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_245")]; + tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (gather_31, gather_32, var_89, gather_33_cast_uint16_to_int32, gather_34))[name = string("concat_32")]; + tensor real_div_2 = real_div(x = concat_32, y = shape_37_cast_fp16)[name = string("real_div_2")]; + tensor hidden_states_41_cast_fp16 = tile(reps = real_div_2, x = var_516_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor concat_33x = const()[name = string("concat_33x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_7_cast_fp16 = reshape(shape = concat_33x, x = hidden_states_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor var_526_shape_cast_fp16 = shape(x = var_507_cast_fp16)[name = string("op_526_shape_cast_fp16")]; + int32 gather_35 = const()[name = string("gather_35"), val = int32(1)]; + int32 gather_36 = const()[name = string("gather_36"), val = int32(5)]; + int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)]; + int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)]; + bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)]; + string var_526_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_526_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_37_to_uint16 = const()[name = string("select_37_to_uint16"), val = uint16(2)]; + tensor var_526_shape_cast_fp16_to_uint16 = cast(dtype = var_526_shape_cast_fp16_to_uint16_dtype_0, x = var_526_shape_cast_fp16)[name = string("cast_244")]; + uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_526_shape_cast_fp16_to_uint16)[name = string("gather_37_cast_uint16")]; + string gather_37_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_37_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_38 = const()[name = string("gather_38"), val = int32(64)]; + tensor var_533_axes_0 = const()[name = string("op_533_axes_0"), val = tensor([2])]; + tensor var_533_cast_fp16 = expand_dims(axes = var_533_axes_0, x = var_507_cast_fp16)[name = string("op_533_cast_fp16")]; + tensor shape_42_cast_fp16 = shape(x = var_533_cast_fp16)[name = string("shape_42_cast_fp16")]; + int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; + bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; + int32 gather_37_cast_uint16_to_int32 = cast(dtype = gather_37_cast_uint16_to_int32_dtype_0, x = gather_37_cast_uint16)[name = string("cast_243")]; + tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (gather_35, gather_36, var_89, gather_37_cast_uint16_to_int32, gather_38))[name = string("concat_34")]; + tensor real_div_3 = real_div(x = concat_34, y = shape_42_cast_fp16)[name = string("real_div_3")]; + tensor hidden_states_45_cast_fp16 = tile(reps = real_div_3, x = var_533_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_7_cast_fp16 = reshape(shape = concat_35x, x = hidden_states_45_cast_fp16)[name = string("value_states_7_cast_fp16")]; + tensor var_543_shape_cast_fp16 = shape(x = key_states_7_cast_fp16)[name = string("op_543_shape_cast_fp16")]; + int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)]; + int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)]; + bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)]; + string var_543_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_543_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_39_to_uint16 = const()[name = string("select_39_to_uint16"), val = uint16(2)]; + tensor var_543_shape_cast_fp16_to_uint16 = cast(dtype = var_543_shape_cast_fp16_to_uint16_dtype_0, x = var_543_shape_cast_fp16)[name = string("cast_242")]; + uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_543_shape_cast_fp16_to_uint16)[name = string("gather_39_cast_uint16")]; + string gather_39_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_39_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = int32(1)]; + int32 concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = int32(1)]; + int32 concat_36_values2_0 = const()[name = string("concat_36_values2_0"), val = int32(0)]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + int32 gather_39_cast_uint16_to_int32 = cast(dtype = gather_39_cast_uint16_to_int32_dtype_0, x = gather_39_cast_uint16)[name = string("cast_241")]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, concat_36_values2_0, gather_39_cast_uint16_to_int32))[name = string("concat_36")]; + tensor causal_mask_5_begin_0 = const()[name = string("causal_mask_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_5_end_mask_0 = const()[name = string("causal_mask_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_5_cast_fp16 = slice_by_index(begin = causal_mask_5_begin_0, end = concat_36, end_mask = causal_mask_5_end_mask_0, x = causal_mask)[name = string("causal_mask_5_cast_fp16")]; + tensor attn_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_5_cast_fp16, key = key_states_7_cast_fp16, query = query_states_7_cast_fp16, value = value_states_7_cast_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor var_549_perm_0 = const()[name = string("op_549_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; + bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; + int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_240")]; + tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_22, gather_23_cast_uint16_to_int32, var_85))[name = string("concat_37")]; + tensor var_549_cast_fp16 = transpose(perm = var_549_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_120")]; + tensor input_9_cast_fp16 = reshape(shape = concat_37, x = var_549_cast_fp16)[name = string("input_9_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32951104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33411968))))[name = string("model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized, x = input_9_cast_fp16)[name = string("linear_10_cast_fp16")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = linear_10_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; + fp16 var_80_promoted_3_to_fp16 = const()[name = string("op_80_promoted_3_to_fp16"), val = fp16(0x1p+1)]; + tensor var_558_cast_fp16 = pow(x = hidden_states_49_cast_fp16, y = var_80_promoted_3_to_fp16)[name = string("op_558_cast_fp16")]; + tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; + bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; + tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_558_cast_fp16)[name = string("variance_7_cast_fp16")]; + fp16 var_561_to_fp16 = const()[name = string("op_561_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_562_cast_fp16 = add(x = variance_7_cast_fp16, y = var_561_to_fp16)[name = string("op_562_cast_fp16")]; + fp32 var_563_epsilon_0 = const()[name = string("op_563_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_563_cast_fp16 = rsqrt(epsilon = var_563_epsilon_0, x = var_562_cast_fp16)[name = string("op_563_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = var_563_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33469632)))]; + tensor input_11_cast_fp16 = mul(x = model_model_layers_1_post_attention_layernorm_weight_to_fp16, y = hidden_states_53_cast_fp16)[name = string("input_11_cast_fp16")]; + tensor model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33471616))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34700480))))[name = string("model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor var_575_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_575_cast_fp16")]; + tensor model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34854144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36083008))))[name = string("model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_575_cast_fp16, y = linear_12_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36236672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37465536))))[name = string("model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_13_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized, x = input_15_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor hidden_states_59_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_13_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; + fp16 var_80_promoted_4_to_fp16 = const()[name = string("op_80_promoted_4_to_fp16"), val = fp16(0x1p+1)]; + tensor var_588_cast_fp16 = pow(x = hidden_states_59_cast_fp16, y = var_80_promoted_4_to_fp16)[name = string("op_588_cast_fp16")]; + tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([-1])]; + bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; + tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_588_cast_fp16)[name = string("variance_9_cast_fp16")]; + fp16 var_591_to_fp16 = const()[name = string("op_591_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_592_cast_fp16 = add(x = variance_9_cast_fp16, y = var_591_to_fp16)[name = string("op_592_cast_fp16")]; + fp32 var_593_epsilon_0 = const()[name = string("op_593_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_593_cast_fp16 = rsqrt(epsilon = var_593_epsilon_0, x = var_592_cast_fp16)[name = string("op_593_cast_fp16")]; + tensor hidden_states_63_cast_fp16 = mul(x = hidden_states_59_cast_fp16, y = var_593_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37619200)))]; + tensor hidden_states_67_cast_fp16 = mul(x = model_model_layers_2_input_layernorm_weight_to_fp16, y = hidden_states_63_cast_fp16)[name = string("hidden_states_67_cast_fp16")]; + tensor var_604_shape_cast_fp16 = shape(x = hidden_states_67_cast_fp16)[name = string("op_604_shape_cast_fp16")]; + int32 gather_40 = const()[name = string("gather_40"), val = int32(1)]; + int32 gather_41_axis_0 = const()[name = string("gather_41_axis_0"), val = int32(0)]; + int32 gather_41_batch_dims_0 = const()[name = string("gather_41_batch_dims_0"), val = int32(0)]; + bool gather_41_validate_indices_0 = const()[name = string("gather_41_validate_indices_0"), val = bool(false)]; + string var_604_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_604_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_41_to_uint16 = const()[name = string("select_41_to_uint16"), val = uint16(1)]; + tensor var_604_shape_cast_fp16_to_uint16 = cast(dtype = var_604_shape_cast_fp16_to_uint16_dtype_0, x = var_604_shape_cast_fp16)[name = string("cast_239")]; + uint16 gather_41_cast_uint16 = gather(axis = gather_41_axis_0, batch_dims = gather_41_batch_dims_0, indices = select_41_to_uint16, validate_indices = gather_41_validate_indices_0, x = var_604_shape_cast_fp16_to_uint16)[name = string("gather_41_cast_uint16")]; + string gather_41_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_41_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37621184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38082048))))[name = string("model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_14_cast_fp16")]; + tensor model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38139712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38293376))))[name = string("model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38312640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38466304))))[name = string("model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_16_cast_fp16")]; + tensor concat_38x = const()[name = string("concat_38x"), val = tensor([1, -1, 15, 64])]; + tensor var_613_cast_fp16 = reshape(shape = concat_38x, x = linear_14_cast_fp16)[name = string("op_613_cast_fp16")]; + tensor q_5_perm_0 = const()[name = string("q_5_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 5, 64])]; + tensor var_616_cast_fp16 = reshape(shape = concat_39x, x = linear_15_cast_fp16)[name = string("op_616_cast_fp16")]; + tensor k_5_perm_0 = const()[name = string("k_5_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_40x = const()[name = string("concat_40x"), val = tensor([1, -1, 5, 64])]; + tensor var_619_cast_fp16 = reshape(shape = concat_40x, x = linear_16_cast_fp16)[name = string("op_619_cast_fp16")]; + tensor v_state_5_perm_0 = const()[name = string("v_state_5_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_5_cast_fp16 = transpose(perm = q_5_perm_0, x = var_613_cast_fp16)[name = string("transpose_119")]; + tensor var_623_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_7_cast_fp16)[name = string("op_623_cast_fp16")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_634_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_7_promoted_to_fp16)[name = string("op_634_cast_fp16")]; + bool var_636_interleave_0 = const()[name = string("op_636_interleave_0"), val = bool(false)]; + tensor var_636_cast_fp16 = concat(axis = var_85, interleave = var_636_interleave_0, values = (var_634_cast_fp16, x1_9_cast_fp16))[name = string("op_636_cast_fp16")]; + tensor var_637_cast_fp16 = mul(x = var_636_cast_fp16, y = sin_7_cast_fp16)[name = string("op_637_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_623_cast_fp16, y = var_637_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = var_616_cast_fp16)[name = string("transpose_118")]; + tensor var_639_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_7_cast_fp16)[name = string("op_639_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_650_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_650_cast_fp16")]; + bool var_652_interleave_0 = const()[name = string("op_652_interleave_0"), val = bool(false)]; + tensor var_652_cast_fp16 = concat(axis = var_85, interleave = var_652_interleave_0, values = (var_650_cast_fp16, x1_11_cast_fp16))[name = string("op_652_cast_fp16")]; + tensor var_653_cast_fp16 = mul(x = var_652_cast_fp16, y = sin_7_cast_fp16)[name = string("op_653_cast_fp16")]; + tensor k_state_5_cast_fp16 = add(x = var_639_cast_fp16, y = var_653_cast_fp16)[name = string("k_state_5_cast_fp16")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([0])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor concat_43_values0_0 = const()[name = string("concat_43_values0_0"), val = tensor([2])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (concat_43_values0_0, expand_dims_24, expand_dims_25, expand_dims_2, expand_dims_27))[name = string("concat_43")]; + tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = k_state_5_cast_fp16, x = coreml_update_state_66)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_68 = read_state(input = key_cache)[name = string("coreml_update_state_68")]; + tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_5_cast_fp16 = transpose(perm = v_state_5_perm_0, x = var_619_cast_fp16)[name = string("transpose_117")]; + tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = v_state_5_cast_fp16, x = coreml_update_state_67)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_69 = read_state(input = value_cache)[name = string("coreml_update_state_69")]; + tensor var_676_begin_0 = const()[name = string("op_676_begin_0"), val = tensor([2, 0, 0, 0, 0])]; + tensor var_676_end_0 = const()[name = string("op_676_end_0"), val = tensor([3, 1, 5, 2048, 64])]; + tensor var_676_end_mask_0 = const()[name = string("op_676_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_676_squeeze_mask_0 = const()[name = string("op_676_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_676_cast_fp16 = slice_by_index(begin = var_676_begin_0, end = var_676_end_0, end_mask = var_676_end_mask_0, squeeze_mask = var_676_squeeze_mask_0, x = coreml_update_state_68)[name = string("op_676_cast_fp16")]; + tensor var_679_begin_0 = const()[name = string("op_679_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_679_end_mask_0 = const()[name = string("op_679_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_679_cast_fp16 = slice_by_index(begin = var_679_begin_0, end = concat_11, end_mask = var_679_end_mask_0, x = var_676_cast_fp16)[name = string("op_679_cast_fp16")]; + tensor var_681_begin_0 = const()[name = string("op_681_begin_0"), val = tensor([2, 0, 0, 0, 0])]; + tensor var_681_end_0 = const()[name = string("op_681_end_0"), val = tensor([3, 1, 5, 2048, 64])]; + tensor var_681_end_mask_0 = const()[name = string("op_681_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_681_squeeze_mask_0 = const()[name = string("op_681_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_681_cast_fp16 = slice_by_index(begin = var_681_begin_0, end = var_681_end_0, end_mask = var_681_end_mask_0, squeeze_mask = var_681_squeeze_mask_0, x = coreml_update_state_69)[name = string("op_681_cast_fp16")]; + tensor var_684_begin_0 = const()[name = string("op_684_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_684_end_mask_0 = const()[name = string("op_684_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_684_cast_fp16 = slice_by_index(begin = var_684_begin_0, end = concat_11, end_mask = var_684_end_mask_0, x = var_681_cast_fp16)[name = string("op_684_cast_fp16")]; + tensor var_686_shape_cast_fp16 = shape(x = var_679_cast_fp16)[name = string("op_686_shape_cast_fp16")]; + int32 gather_49 = const()[name = string("gather_49"), val = int32(1)]; + int32 gather_50 = const()[name = string("gather_50"), val = int32(5)]; + int32 gather_51_axis_0 = const()[name = string("gather_51_axis_0"), val = int32(0)]; + int32 gather_51_batch_dims_0 = const()[name = string("gather_51_batch_dims_0"), val = int32(0)]; + bool gather_51_validate_indices_0 = const()[name = string("gather_51_validate_indices_0"), val = bool(false)]; + string var_686_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_686_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_51_to_uint16 = const()[name = string("select_51_to_uint16"), val = uint16(2)]; + tensor var_686_shape_cast_fp16_to_uint16 = cast(dtype = var_686_shape_cast_fp16_to_uint16_dtype_0, x = var_686_shape_cast_fp16)[name = string("cast_238")]; + uint16 gather_51_cast_uint16 = gather(axis = gather_51_axis_0, batch_dims = gather_51_batch_dims_0, indices = select_51_to_uint16, validate_indices = gather_51_validate_indices_0, x = var_686_shape_cast_fp16_to_uint16)[name = string("gather_51_cast_uint16")]; + string gather_51_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_51_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_52 = const()[name = string("gather_52"), val = int32(64)]; + tensor var_693_axes_0 = const()[name = string("op_693_axes_0"), val = tensor([2])]; + tensor var_693_cast_fp16 = expand_dims(axes = var_693_axes_0, x = var_679_cast_fp16)[name = string("op_693_cast_fp16")]; + tensor shape_57_cast_fp16 = shape(x = var_693_cast_fp16)[name = string("shape_57_cast_fp16")]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + int32 gather_51_cast_uint16_to_int32 = cast(dtype = gather_51_cast_uint16_to_int32_dtype_0, x = gather_51_cast_uint16)[name = string("cast_237")]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (gather_49, gather_50, var_89, gather_51_cast_uint16_to_int32, gather_52))[name = string("concat_51")]; + tensor real_div_4 = real_div(x = concat_51, y = shape_57_cast_fp16)[name = string("real_div_4")]; + tensor hidden_states_71_cast_fp16 = tile(reps = real_div_4, x = var_693_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor concat_52x = const()[name = string("concat_52x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_11_cast_fp16 = reshape(shape = concat_52x, x = hidden_states_71_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor var_703_shape_cast_fp16 = shape(x = var_684_cast_fp16)[name = string("op_703_shape_cast_fp16")]; + int32 gather_53 = const()[name = string("gather_53"), val = int32(1)]; + int32 gather_54 = const()[name = string("gather_54"), val = int32(5)]; + int32 gather_55_axis_0 = const()[name = string("gather_55_axis_0"), val = int32(0)]; + int32 gather_55_batch_dims_0 = const()[name = string("gather_55_batch_dims_0"), val = int32(0)]; + bool gather_55_validate_indices_0 = const()[name = string("gather_55_validate_indices_0"), val = bool(false)]; + string var_703_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_703_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_55_to_uint16 = const()[name = string("select_55_to_uint16"), val = uint16(2)]; + tensor var_703_shape_cast_fp16_to_uint16 = cast(dtype = var_703_shape_cast_fp16_to_uint16_dtype_0, x = var_703_shape_cast_fp16)[name = string("cast_236")]; + uint16 gather_55_cast_uint16 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55_to_uint16, validate_indices = gather_55_validate_indices_0, x = var_703_shape_cast_fp16_to_uint16)[name = string("gather_55_cast_uint16")]; + string gather_55_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_55_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_56 = const()[name = string("gather_56"), val = int32(64)]; + tensor var_710_axes_0 = const()[name = string("op_710_axes_0"), val = tensor([2])]; + tensor var_710_cast_fp16 = expand_dims(axes = var_710_axes_0, x = var_684_cast_fp16)[name = string("op_710_cast_fp16")]; + tensor shape_62_cast_fp16 = shape(x = var_710_cast_fp16)[name = string("shape_62_cast_fp16")]; + int32 concat_53_axis_0 = const()[name = string("concat_53_axis_0"), val = int32(0)]; + bool concat_53_interleave_0 = const()[name = string("concat_53_interleave_0"), val = bool(false)]; + int32 gather_55_cast_uint16_to_int32 = cast(dtype = gather_55_cast_uint16_to_int32_dtype_0, x = gather_55_cast_uint16)[name = string("cast_235")]; + tensor concat_53 = concat(axis = concat_53_axis_0, interleave = concat_53_interleave_0, values = (gather_53, gather_54, var_89, gather_55_cast_uint16_to_int32, gather_56))[name = string("concat_53")]; + tensor real_div_5 = real_div(x = concat_53, y = shape_62_cast_fp16)[name = string("real_div_5")]; + tensor hidden_states_75_cast_fp16 = tile(reps = real_div_5, x = var_710_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; + tensor concat_54x = const()[name = string("concat_54x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_11_cast_fp16 = reshape(shape = concat_54x, x = hidden_states_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; + tensor var_720_shape_cast_fp16 = shape(x = key_states_11_cast_fp16)[name = string("op_720_shape_cast_fp16")]; + int32 gather_57_axis_0 = const()[name = string("gather_57_axis_0"), val = int32(0)]; + int32 gather_57_batch_dims_0 = const()[name = string("gather_57_batch_dims_0"), val = int32(0)]; + bool gather_57_validate_indices_0 = const()[name = string("gather_57_validate_indices_0"), val = bool(false)]; + string var_720_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_720_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_57_to_uint16 = const()[name = string("select_57_to_uint16"), val = uint16(2)]; + tensor var_720_shape_cast_fp16_to_uint16 = cast(dtype = var_720_shape_cast_fp16_to_uint16_dtype_0, x = var_720_shape_cast_fp16)[name = string("cast_234")]; + uint16 gather_57_cast_uint16 = gather(axis = gather_57_axis_0, batch_dims = gather_57_batch_dims_0, indices = select_57_to_uint16, validate_indices = gather_57_validate_indices_0, x = var_720_shape_cast_fp16_to_uint16)[name = string("gather_57_cast_uint16")]; + string gather_57_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_57_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_55_values0_0 = const()[name = string("concat_55_values0_0"), val = int32(1)]; + int32 concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = int32(1)]; + int32 concat_55_values2_0 = const()[name = string("concat_55_values2_0"), val = int32(0)]; + int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; + bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; + int32 gather_57_cast_uint16_to_int32 = cast(dtype = gather_57_cast_uint16_to_int32_dtype_0, x = gather_57_cast_uint16)[name = string("cast_233")]; + tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (concat_55_values0_0, concat_55_values1_0, concat_55_values2_0, gather_57_cast_uint16_to_int32))[name = string("concat_55")]; + tensor causal_mask_7_begin_0 = const()[name = string("causal_mask_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_7_end_mask_0 = const()[name = string("causal_mask_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_7_cast_fp16 = slice_by_index(begin = causal_mask_7_begin_0, end = concat_55, end_mask = causal_mask_7_end_mask_0, x = causal_mask)[name = string("causal_mask_7_cast_fp16")]; + tensor attn_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_7_cast_fp16, key = key_states_11_cast_fp16, query = query_states_11_cast_fp16, value = value_states_11_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_726_perm_0 = const()[name = string("op_726_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; + bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; + int32 gather_41_cast_uint16_to_int32 = cast(dtype = gather_41_cast_uint16_to_int32_dtype_0, x = gather_41_cast_uint16)[name = string("cast_232")]; + tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (gather_40, gather_41_cast_uint16_to_int32, var_85))[name = string("concat_56")]; + tensor var_726_cast_fp16 = transpose(perm = var_726_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_116")]; + tensor input_17_cast_fp16 = reshape(shape = concat_56, x = var_726_cast_fp16)[name = string("input_17_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38485568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38946432))))[name = string("model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_17_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized, x = input_17_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor hidden_states_79_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = linear_17_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; + fp16 var_80_promoted_5_to_fp16 = const()[name = string("op_80_promoted_5_to_fp16"), val = fp16(0x1p+1)]; + tensor var_735_cast_fp16 = pow(x = hidden_states_79_cast_fp16, y = var_80_promoted_5_to_fp16)[name = string("op_735_cast_fp16")]; + tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([-1])]; + bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; + tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_735_cast_fp16)[name = string("variance_11_cast_fp16")]; + fp16 var_738_to_fp16 = const()[name = string("op_738_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_739_cast_fp16 = add(x = variance_11_cast_fp16, y = var_738_to_fp16)[name = string("op_739_cast_fp16")]; + fp32 var_740_epsilon_0 = const()[name = string("op_740_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_740_cast_fp16 = rsqrt(epsilon = var_740_epsilon_0, x = var_739_cast_fp16)[name = string("op_740_cast_fp16")]; + tensor hidden_states_83_cast_fp16 = mul(x = hidden_states_79_cast_fp16, y = var_740_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39004096)))]; + tensor input_19_cast_fp16 = mul(x = model_model_layers_2_post_attention_layernorm_weight_to_fp16, y = hidden_states_83_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39006080))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40234944))))[name = string("model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_752_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_752_cast_fp16")]; + tensor model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40388608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41617472))))[name = string("model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor input_23_cast_fp16 = mul(x = var_752_cast_fp16, y = linear_19_cast_fp16)[name = string("input_23_cast_fp16")]; + tensor model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41771136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43000000))))[name = string("model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized, x = input_23_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor hidden_states_89_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = linear_20_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; + fp16 var_80_promoted_6_to_fp16 = const()[name = string("op_80_promoted_6_to_fp16"), val = fp16(0x1p+1)]; + tensor var_765_cast_fp16 = pow(x = hidden_states_89_cast_fp16, y = var_80_promoted_6_to_fp16)[name = string("op_765_cast_fp16")]; + tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([-1])]; + bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; + tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_765_cast_fp16)[name = string("variance_13_cast_fp16")]; + fp16 var_768_to_fp16 = const()[name = string("op_768_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_769_cast_fp16 = add(x = variance_13_cast_fp16, y = var_768_to_fp16)[name = string("op_769_cast_fp16")]; + fp32 var_770_epsilon_0 = const()[name = string("op_770_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_770_cast_fp16 = rsqrt(epsilon = var_770_epsilon_0, x = var_769_cast_fp16)[name = string("op_770_cast_fp16")]; + tensor hidden_states_93_cast_fp16 = mul(x = hidden_states_89_cast_fp16, y = var_770_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43153664)))]; + tensor hidden_states_97_cast_fp16 = mul(x = model_model_layers_3_input_layernorm_weight_to_fp16, y = hidden_states_93_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; + tensor var_781_shape_cast_fp16 = shape(x = hidden_states_97_cast_fp16)[name = string("op_781_shape_cast_fp16")]; + int32 gather_58 = const()[name = string("gather_58"), val = int32(1)]; + int32 gather_59_axis_0 = const()[name = string("gather_59_axis_0"), val = int32(0)]; + int32 gather_59_batch_dims_0 = const()[name = string("gather_59_batch_dims_0"), val = int32(0)]; + bool gather_59_validate_indices_0 = const()[name = string("gather_59_validate_indices_0"), val = bool(false)]; + string var_781_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_781_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_59_to_uint16 = const()[name = string("select_59_to_uint16"), val = uint16(1)]; + tensor var_781_shape_cast_fp16_to_uint16 = cast(dtype = var_781_shape_cast_fp16_to_uint16_dtype_0, x = var_781_shape_cast_fp16)[name = string("cast_231")]; + uint16 gather_59_cast_uint16 = gather(axis = gather_59_axis_0, batch_dims = gather_59_batch_dims_0, indices = select_59_to_uint16, validate_indices = gather_59_validate_indices_0, x = var_781_shape_cast_fp16_to_uint16)[name = string("gather_59_cast_uint16")]; + string gather_59_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_59_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43155648))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43616512))))[name = string("model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_21_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43674176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43827840))))[name = string("model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_22_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_22_cast_fp16")]; + tensor model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43847104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44000768))))[name = string("model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 15, 64])]; + tensor var_790_cast_fp16 = reshape(shape = concat_57x, x = linear_21_cast_fp16)[name = string("op_790_cast_fp16")]; + tensor q_7_perm_0 = const()[name = string("q_7_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 5, 64])]; + tensor var_793_cast_fp16 = reshape(shape = concat_58x, x = linear_22_cast_fp16)[name = string("op_793_cast_fp16")]; + tensor k_7_perm_0 = const()[name = string("k_7_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_59x = const()[name = string("concat_59x"), val = tensor([1, -1, 5, 64])]; + tensor var_796_cast_fp16 = reshape(shape = concat_59x, x = linear_23_cast_fp16)[name = string("op_796_cast_fp16")]; + tensor v_state_7_perm_0 = const()[name = string("v_state_7_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_7_cast_fp16 = transpose(perm = q_7_perm_0, x = var_790_cast_fp16)[name = string("transpose_115")]; + tensor var_800_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_7_cast_fp16)[name = string("op_800_cast_fp16")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; + fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_811_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_811_cast_fp16")]; + bool var_813_interleave_0 = const()[name = string("op_813_interleave_0"), val = bool(false)]; + tensor var_813_cast_fp16 = concat(axis = var_85, interleave = var_813_interleave_0, values = (var_811_cast_fp16, x1_13_cast_fp16))[name = string("op_813_cast_fp16")]; + tensor var_814_cast_fp16 = mul(x = var_813_cast_fp16, y = sin_7_cast_fp16)[name = string("op_814_cast_fp16")]; + tensor query_states_15_cast_fp16 = add(x = var_800_cast_fp16, y = var_814_cast_fp16)[name = string("query_states_15_cast_fp16")]; + tensor k_7_cast_fp16 = transpose(perm = k_7_perm_0, x = var_793_cast_fp16)[name = string("transpose_114")]; + tensor var_816_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_7_cast_fp16)[name = string("op_816_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; + fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_827_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_827_cast_fp16")]; + bool var_829_interleave_0 = const()[name = string("op_829_interleave_0"), val = bool(false)]; + tensor var_829_cast_fp16 = concat(axis = var_85, interleave = var_829_interleave_0, values = (var_827_cast_fp16, x1_15_cast_fp16))[name = string("op_829_cast_fp16")]; + tensor var_830_cast_fp16 = mul(x = var_829_cast_fp16, y = sin_7_cast_fp16)[name = string("op_830_cast_fp16")]; + tensor k_state_7_cast_fp16 = add(x = var_816_cast_fp16, y = var_830_cast_fp16)[name = string("k_state_7_cast_fp16")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([0])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor concat_62_values0_0 = const()[name = string("concat_62_values0_0"), val = tensor([3])]; + int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; + bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; + tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (concat_62_values0_0, expand_dims_36, expand_dims_37, expand_dims_2, expand_dims_39))[name = string("concat_62")]; + tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_62, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = k_state_7_cast_fp16, x = coreml_update_state_68)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_70 = read_state(input = key_cache)[name = string("coreml_update_state_70")]; + tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_7_cast_fp16 = transpose(perm = v_state_7_perm_0, x = var_796_cast_fp16)[name = string("transpose_113")]; + tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_62, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = v_state_7_cast_fp16, x = coreml_update_state_69)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_71 = read_state(input = value_cache)[name = string("coreml_update_state_71")]; + tensor var_853_begin_0 = const()[name = string("op_853_begin_0"), val = tensor([3, 0, 0, 0, 0])]; + tensor var_853_end_0 = const()[name = string("op_853_end_0"), val = tensor([4, 1, 5, 2048, 64])]; + tensor var_853_end_mask_0 = const()[name = string("op_853_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_853_squeeze_mask_0 = const()[name = string("op_853_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_853_cast_fp16 = slice_by_index(begin = var_853_begin_0, end = var_853_end_0, end_mask = var_853_end_mask_0, squeeze_mask = var_853_squeeze_mask_0, x = coreml_update_state_70)[name = string("op_853_cast_fp16")]; + tensor var_856_begin_0 = const()[name = string("op_856_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_856_end_mask_0 = const()[name = string("op_856_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_856_cast_fp16 = slice_by_index(begin = var_856_begin_0, end = concat_11, end_mask = var_856_end_mask_0, x = var_853_cast_fp16)[name = string("op_856_cast_fp16")]; + tensor var_858_begin_0 = const()[name = string("op_858_begin_0"), val = tensor([3, 0, 0, 0, 0])]; + tensor var_858_end_0 = const()[name = string("op_858_end_0"), val = tensor([4, 1, 5, 2048, 64])]; + tensor var_858_end_mask_0 = const()[name = string("op_858_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_858_squeeze_mask_0 = const()[name = string("op_858_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_858_cast_fp16 = slice_by_index(begin = var_858_begin_0, end = var_858_end_0, end_mask = var_858_end_mask_0, squeeze_mask = var_858_squeeze_mask_0, x = coreml_update_state_71)[name = string("op_858_cast_fp16")]; + tensor var_861_begin_0 = const()[name = string("op_861_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_861_end_mask_0 = const()[name = string("op_861_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_861_cast_fp16 = slice_by_index(begin = var_861_begin_0, end = concat_11, end_mask = var_861_end_mask_0, x = var_858_cast_fp16)[name = string("op_861_cast_fp16")]; + tensor var_863_shape_cast_fp16 = shape(x = var_856_cast_fp16)[name = string("op_863_shape_cast_fp16")]; + int32 gather_67 = const()[name = string("gather_67"), val = int32(1)]; + int32 gather_68 = const()[name = string("gather_68"), val = int32(5)]; + int32 gather_69_axis_0 = const()[name = string("gather_69_axis_0"), val = int32(0)]; + int32 gather_69_batch_dims_0 = const()[name = string("gather_69_batch_dims_0"), val = int32(0)]; + bool gather_69_validate_indices_0 = const()[name = string("gather_69_validate_indices_0"), val = bool(false)]; + string var_863_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_863_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_69_to_uint16 = const()[name = string("select_69_to_uint16"), val = uint16(2)]; + tensor var_863_shape_cast_fp16_to_uint16 = cast(dtype = var_863_shape_cast_fp16_to_uint16_dtype_0, x = var_863_shape_cast_fp16)[name = string("cast_230")]; + uint16 gather_69_cast_uint16 = gather(axis = gather_69_axis_0, batch_dims = gather_69_batch_dims_0, indices = select_69_to_uint16, validate_indices = gather_69_validate_indices_0, x = var_863_shape_cast_fp16_to_uint16)[name = string("gather_69_cast_uint16")]; + string gather_69_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_69_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_70 = const()[name = string("gather_70"), val = int32(64)]; + tensor var_870_axes_0 = const()[name = string("op_870_axes_0"), val = tensor([2])]; + tensor var_870_cast_fp16 = expand_dims(axes = var_870_axes_0, x = var_856_cast_fp16)[name = string("op_870_cast_fp16")]; + tensor shape_77_cast_fp16 = shape(x = var_870_cast_fp16)[name = string("shape_77_cast_fp16")]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + int32 gather_69_cast_uint16_to_int32 = cast(dtype = gather_69_cast_uint16_to_int32_dtype_0, x = gather_69_cast_uint16)[name = string("cast_229")]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (gather_67, gather_68, var_89, gather_69_cast_uint16_to_int32, gather_70))[name = string("concat_70")]; + tensor real_div_6 = real_div(x = concat_70, y = shape_77_cast_fp16)[name = string("real_div_6")]; + tensor hidden_states_101_cast_fp16 = tile(reps = real_div_6, x = var_870_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + tensor concat_71x = const()[name = string("concat_71x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_15_cast_fp16 = reshape(shape = concat_71x, x = hidden_states_101_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor var_880_shape_cast_fp16 = shape(x = var_861_cast_fp16)[name = string("op_880_shape_cast_fp16")]; + int32 gather_71 = const()[name = string("gather_71"), val = int32(1)]; + int32 gather_72 = const()[name = string("gather_72"), val = int32(5)]; + int32 gather_73_axis_0 = const()[name = string("gather_73_axis_0"), val = int32(0)]; + int32 gather_73_batch_dims_0 = const()[name = string("gather_73_batch_dims_0"), val = int32(0)]; + bool gather_73_validate_indices_0 = const()[name = string("gather_73_validate_indices_0"), val = bool(false)]; + string var_880_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_880_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_73_to_uint16 = const()[name = string("select_73_to_uint16"), val = uint16(2)]; + tensor var_880_shape_cast_fp16_to_uint16 = cast(dtype = var_880_shape_cast_fp16_to_uint16_dtype_0, x = var_880_shape_cast_fp16)[name = string("cast_228")]; + uint16 gather_73_cast_uint16 = gather(axis = gather_73_axis_0, batch_dims = gather_73_batch_dims_0, indices = select_73_to_uint16, validate_indices = gather_73_validate_indices_0, x = var_880_shape_cast_fp16_to_uint16)[name = string("gather_73_cast_uint16")]; + string gather_73_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_73_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_74 = const()[name = string("gather_74"), val = int32(64)]; + tensor var_887_axes_0 = const()[name = string("op_887_axes_0"), val = tensor([2])]; + tensor var_887_cast_fp16 = expand_dims(axes = var_887_axes_0, x = var_861_cast_fp16)[name = string("op_887_cast_fp16")]; + tensor shape_82_cast_fp16 = shape(x = var_887_cast_fp16)[name = string("shape_82_cast_fp16")]; + int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)]; + bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)]; + int32 gather_73_cast_uint16_to_int32 = cast(dtype = gather_73_cast_uint16_to_int32_dtype_0, x = gather_73_cast_uint16)[name = string("cast_227")]; + tensor concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (gather_71, gather_72, var_89, gather_73_cast_uint16_to_int32, gather_74))[name = string("concat_72")]; + tensor real_div_7 = real_div(x = concat_72, y = shape_82_cast_fp16)[name = string("real_div_7")]; + tensor hidden_states_105_cast_fp16 = tile(reps = real_div_7, x = var_887_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; + tensor concat_73x = const()[name = string("concat_73x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_15_cast_fp16 = reshape(shape = concat_73x, x = hidden_states_105_cast_fp16)[name = string("value_states_15_cast_fp16")]; + tensor var_897_shape_cast_fp16 = shape(x = key_states_15_cast_fp16)[name = string("op_897_shape_cast_fp16")]; + int32 gather_75_axis_0 = const()[name = string("gather_75_axis_0"), val = int32(0)]; + int32 gather_75_batch_dims_0 = const()[name = string("gather_75_batch_dims_0"), val = int32(0)]; + bool gather_75_validate_indices_0 = const()[name = string("gather_75_validate_indices_0"), val = bool(false)]; + string var_897_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_897_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_75_to_uint16 = const()[name = string("select_75_to_uint16"), val = uint16(2)]; + tensor var_897_shape_cast_fp16_to_uint16 = cast(dtype = var_897_shape_cast_fp16_to_uint16_dtype_0, x = var_897_shape_cast_fp16)[name = string("cast_226")]; + uint16 gather_75_cast_uint16 = gather(axis = gather_75_axis_0, batch_dims = gather_75_batch_dims_0, indices = select_75_to_uint16, validate_indices = gather_75_validate_indices_0, x = var_897_shape_cast_fp16_to_uint16)[name = string("gather_75_cast_uint16")]; + string gather_75_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_75_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_74_values0_0 = const()[name = string("concat_74_values0_0"), val = int32(1)]; + int32 concat_74_values1_0 = const()[name = string("concat_74_values1_0"), val = int32(1)]; + int32 concat_74_values2_0 = const()[name = string("concat_74_values2_0"), val = int32(0)]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + int32 gather_75_cast_uint16_to_int32 = cast(dtype = gather_75_cast_uint16_to_int32_dtype_0, x = gather_75_cast_uint16)[name = string("cast_225")]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (concat_74_values0_0, concat_74_values1_0, concat_74_values2_0, gather_75_cast_uint16_to_int32))[name = string("concat_74")]; + tensor causal_mask_9_begin_0 = const()[name = string("causal_mask_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_9_end_mask_0 = const()[name = string("causal_mask_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_9_cast_fp16 = slice_by_index(begin = causal_mask_9_begin_0, end = concat_74, end_mask = causal_mask_9_end_mask_0, x = causal_mask)[name = string("causal_mask_9_cast_fp16")]; + tensor attn_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_9_cast_fp16, key = key_states_15_cast_fp16, query = query_states_15_cast_fp16, value = value_states_15_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_903_perm_0 = const()[name = string("op_903_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + int32 gather_59_cast_uint16_to_int32 = cast(dtype = gather_59_cast_uint16_to_int32_dtype_0, x = gather_59_cast_uint16)[name = string("cast_224")]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (gather_58, gather_59_cast_uint16_to_int32, var_85))[name = string("concat_75")]; + tensor var_903_cast_fp16 = transpose(perm = var_903_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_112")]; + tensor input_25_cast_fp16 = reshape(shape = concat_75, x = var_903_cast_fp16)[name = string("input_25_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44020032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44480896))))[name = string("model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor hidden_states_109_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = linear_24_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; + fp16 var_80_promoted_7_to_fp16 = const()[name = string("op_80_promoted_7_to_fp16"), val = fp16(0x1p+1)]; + tensor var_912_cast_fp16 = pow(x = hidden_states_109_cast_fp16, y = var_80_promoted_7_to_fp16)[name = string("op_912_cast_fp16")]; + tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([-1])]; + bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; + tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_912_cast_fp16)[name = string("variance_15_cast_fp16")]; + fp16 var_915_to_fp16 = const()[name = string("op_915_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_916_cast_fp16 = add(x = variance_15_cast_fp16, y = var_915_to_fp16)[name = string("op_916_cast_fp16")]; + fp32 var_917_epsilon_0 = const()[name = string("op_917_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_917_cast_fp16 = rsqrt(epsilon = var_917_epsilon_0, x = var_916_cast_fp16)[name = string("op_917_cast_fp16")]; + tensor hidden_states_113_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = var_917_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44538560)))]; + tensor input_27_cast_fp16 = mul(x = model_model_layers_3_post_attention_layernorm_weight_to_fp16, y = hidden_states_113_cast_fp16)[name = string("input_27_cast_fp16")]; + tensor model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44540544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45769408))))[name = string("model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_929_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_929_cast_fp16")]; + tensor model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45923072))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47151936))))[name = string("model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor input_31_cast_fp16 = mul(x = var_929_cast_fp16, y = linear_26_cast_fp16)[name = string("input_31_cast_fp16")]; + tensor model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47305600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48534464))))[name = string("model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_27_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized, x = input_31_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor hidden_states_119_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = linear_27_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; + fp16 var_80_promoted_8_to_fp16 = const()[name = string("op_80_promoted_8_to_fp16"), val = fp16(0x1p+1)]; + tensor var_942_cast_fp16 = pow(x = hidden_states_119_cast_fp16, y = var_80_promoted_8_to_fp16)[name = string("op_942_cast_fp16")]; + tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([-1])]; + bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; + tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_942_cast_fp16)[name = string("variance_17_cast_fp16")]; + fp16 var_945_to_fp16 = const()[name = string("op_945_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_946_cast_fp16 = add(x = variance_17_cast_fp16, y = var_945_to_fp16)[name = string("op_946_cast_fp16")]; + fp32 var_947_epsilon_0 = const()[name = string("op_947_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_947_cast_fp16 = rsqrt(epsilon = var_947_epsilon_0, x = var_946_cast_fp16)[name = string("op_947_cast_fp16")]; + tensor hidden_states_123_cast_fp16 = mul(x = hidden_states_119_cast_fp16, y = var_947_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; + tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48688128)))]; + tensor hidden_states_127_cast_fp16 = mul(x = model_model_layers_4_input_layernorm_weight_to_fp16, y = hidden_states_123_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; + tensor var_958_shape_cast_fp16 = shape(x = hidden_states_127_cast_fp16)[name = string("op_958_shape_cast_fp16")]; + int32 gather_76 = const()[name = string("gather_76"), val = int32(1)]; + int32 gather_77_axis_0 = const()[name = string("gather_77_axis_0"), val = int32(0)]; + int32 gather_77_batch_dims_0 = const()[name = string("gather_77_batch_dims_0"), val = int32(0)]; + bool gather_77_validate_indices_0 = const()[name = string("gather_77_validate_indices_0"), val = bool(false)]; + string var_958_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_958_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_77_to_uint16 = const()[name = string("select_77_to_uint16"), val = uint16(1)]; + tensor var_958_shape_cast_fp16_to_uint16 = cast(dtype = var_958_shape_cast_fp16_to_uint16_dtype_0, x = var_958_shape_cast_fp16)[name = string("cast_223")]; + uint16 gather_77_cast_uint16 = gather(axis = gather_77_axis_0, batch_dims = gather_77_batch_dims_0, indices = select_77_to_uint16, validate_indices = gather_77_validate_indices_0, x = var_958_shape_cast_fp16_to_uint16)[name = string("gather_77_cast_uint16")]; + string gather_77_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_77_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48690112))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49150976))))[name = string("model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_28_cast_fp16")]; + tensor model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49208640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49362304))))[name = string("model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49381568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49535232))))[name = string("model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_30_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_30_cast_fp16")]; + tensor concat_76x = const()[name = string("concat_76x"), val = tensor([1, -1, 15, 64])]; + tensor var_967_cast_fp16 = reshape(shape = concat_76x, x = linear_28_cast_fp16)[name = string("op_967_cast_fp16")]; + tensor q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_77x = const()[name = string("concat_77x"), val = tensor([1, -1, 5, 64])]; + tensor var_970_cast_fp16 = reshape(shape = concat_77x, x = linear_29_cast_fp16)[name = string("op_970_cast_fp16")]; + tensor k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 5, 64])]; + tensor var_973_cast_fp16 = reshape(shape = concat_78x, x = linear_30_cast_fp16)[name = string("op_973_cast_fp16")]; + tensor v_state_9_perm_0 = const()[name = string("v_state_9_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_9_cast_fp16 = transpose(perm = q_9_perm_0, x = var_967_cast_fp16)[name = string("transpose_111")]; + tensor var_977_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_7_cast_fp16)[name = string("op_977_cast_fp16")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; + fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_988_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_988_cast_fp16")]; + bool var_990_interleave_0 = const()[name = string("op_990_interleave_0"), val = bool(false)]; + tensor var_990_cast_fp16 = concat(axis = var_85, interleave = var_990_interleave_0, values = (var_988_cast_fp16, x1_17_cast_fp16))[name = string("op_990_cast_fp16")]; + tensor var_991_cast_fp16 = mul(x = var_990_cast_fp16, y = sin_7_cast_fp16)[name = string("op_991_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_977_cast_fp16, y = var_991_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = var_970_cast_fp16)[name = string("transpose_110")]; + tensor var_993_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_7_cast_fp16)[name = string("op_993_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1004_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1004_cast_fp16")]; + bool var_1006_interleave_0 = const()[name = string("op_1006_interleave_0"), val = bool(false)]; + tensor var_1006_cast_fp16 = concat(axis = var_85, interleave = var_1006_interleave_0, values = (var_1004_cast_fp16, x1_19_cast_fp16))[name = string("op_1006_cast_fp16")]; + tensor var_1007_cast_fp16 = mul(x = var_1006_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1007_cast_fp16")]; + tensor k_state_9_cast_fp16 = add(x = var_993_cast_fp16, y = var_1007_cast_fp16)[name = string("k_state_9_cast_fp16")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor concat_81_values0_0 = const()[name = string("concat_81_values0_0"), val = tensor([4])]; + int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; + bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; + tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (concat_81_values0_0, expand_dims_48, expand_dims_49, expand_dims_2, expand_dims_51))[name = string("concat_81")]; + tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_81, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = k_state_9_cast_fp16, x = coreml_update_state_70)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_72")]; + tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_9_cast_fp16 = transpose(perm = v_state_9_perm_0, x = var_973_cast_fp16)[name = string("transpose_109")]; + tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_81, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = v_state_9_cast_fp16, x = coreml_update_state_71)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_73")]; + tensor var_1030_begin_0 = const()[name = string("op_1030_begin_0"), val = tensor([4, 0, 0, 0, 0])]; + tensor var_1030_end_0 = const()[name = string("op_1030_end_0"), val = tensor([5, 1, 5, 2048, 64])]; + tensor var_1030_end_mask_0 = const()[name = string("op_1030_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1030_squeeze_mask_0 = const()[name = string("op_1030_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, squeeze_mask = var_1030_squeeze_mask_0, x = coreml_update_state_72)[name = string("op_1030_cast_fp16")]; + tensor var_1033_begin_0 = const()[name = string("op_1033_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1033_end_mask_0 = const()[name = string("op_1033_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1033_cast_fp16 = slice_by_index(begin = var_1033_begin_0, end = concat_11, end_mask = var_1033_end_mask_0, x = var_1030_cast_fp16)[name = string("op_1033_cast_fp16")]; + tensor var_1035_begin_0 = const()[name = string("op_1035_begin_0"), val = tensor([4, 0, 0, 0, 0])]; + tensor var_1035_end_0 = const()[name = string("op_1035_end_0"), val = tensor([5, 1, 5, 2048, 64])]; + tensor var_1035_end_mask_0 = const()[name = string("op_1035_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1035_squeeze_mask_0 = const()[name = string("op_1035_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1035_cast_fp16 = slice_by_index(begin = var_1035_begin_0, end = var_1035_end_0, end_mask = var_1035_end_mask_0, squeeze_mask = var_1035_squeeze_mask_0, x = coreml_update_state_73)[name = string("op_1035_cast_fp16")]; + tensor var_1038_begin_0 = const()[name = string("op_1038_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1038_end_mask_0 = const()[name = string("op_1038_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1038_cast_fp16 = slice_by_index(begin = var_1038_begin_0, end = concat_11, end_mask = var_1038_end_mask_0, x = var_1035_cast_fp16)[name = string("op_1038_cast_fp16")]; + tensor var_1040_shape_cast_fp16 = shape(x = var_1033_cast_fp16)[name = string("op_1040_shape_cast_fp16")]; + int32 gather_85 = const()[name = string("gather_85"), val = int32(1)]; + int32 gather_86 = const()[name = string("gather_86"), val = int32(5)]; + int32 gather_87_axis_0 = const()[name = string("gather_87_axis_0"), val = int32(0)]; + int32 gather_87_batch_dims_0 = const()[name = string("gather_87_batch_dims_0"), val = int32(0)]; + bool gather_87_validate_indices_0 = const()[name = string("gather_87_validate_indices_0"), val = bool(false)]; + string var_1040_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1040_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_87_to_uint16 = const()[name = string("select_87_to_uint16"), val = uint16(2)]; + tensor var_1040_shape_cast_fp16_to_uint16 = cast(dtype = var_1040_shape_cast_fp16_to_uint16_dtype_0, x = var_1040_shape_cast_fp16)[name = string("cast_222")]; + uint16 gather_87_cast_uint16 = gather(axis = gather_87_axis_0, batch_dims = gather_87_batch_dims_0, indices = select_87_to_uint16, validate_indices = gather_87_validate_indices_0, x = var_1040_shape_cast_fp16_to_uint16)[name = string("gather_87_cast_uint16")]; + string gather_87_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_87_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_88 = const()[name = string("gather_88"), val = int32(64)]; + tensor var_1047_axes_0 = const()[name = string("op_1047_axes_0"), val = tensor([2])]; + tensor var_1047_cast_fp16 = expand_dims(axes = var_1047_axes_0, x = var_1033_cast_fp16)[name = string("op_1047_cast_fp16")]; + tensor shape_97_cast_fp16 = shape(x = var_1047_cast_fp16)[name = string("shape_97_cast_fp16")]; + int32 concat_89_axis_0 = const()[name = string("concat_89_axis_0"), val = int32(0)]; + bool concat_89_interleave_0 = const()[name = string("concat_89_interleave_0"), val = bool(false)]; + int32 gather_87_cast_uint16_to_int32 = cast(dtype = gather_87_cast_uint16_to_int32_dtype_0, x = gather_87_cast_uint16)[name = string("cast_221")]; + tensor concat_89 = concat(axis = concat_89_axis_0, interleave = concat_89_interleave_0, values = (gather_85, gather_86, var_89, gather_87_cast_uint16_to_int32, gather_88))[name = string("concat_89")]; + tensor real_div_8 = real_div(x = concat_89, y = shape_97_cast_fp16)[name = string("real_div_8")]; + tensor hidden_states_131_cast_fp16 = tile(reps = real_div_8, x = var_1047_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; + tensor concat_90x = const()[name = string("concat_90x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_19_cast_fp16 = reshape(shape = concat_90x, x = hidden_states_131_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor var_1057_shape_cast_fp16 = shape(x = var_1038_cast_fp16)[name = string("op_1057_shape_cast_fp16")]; + int32 gather_89 = const()[name = string("gather_89"), val = int32(1)]; + int32 gather_90 = const()[name = string("gather_90"), val = int32(5)]; + int32 gather_91_axis_0 = const()[name = string("gather_91_axis_0"), val = int32(0)]; + int32 gather_91_batch_dims_0 = const()[name = string("gather_91_batch_dims_0"), val = int32(0)]; + bool gather_91_validate_indices_0 = const()[name = string("gather_91_validate_indices_0"), val = bool(false)]; + string var_1057_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1057_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_91_to_uint16 = const()[name = string("select_91_to_uint16"), val = uint16(2)]; + tensor var_1057_shape_cast_fp16_to_uint16 = cast(dtype = var_1057_shape_cast_fp16_to_uint16_dtype_0, x = var_1057_shape_cast_fp16)[name = string("cast_220")]; + uint16 gather_91_cast_uint16 = gather(axis = gather_91_axis_0, batch_dims = gather_91_batch_dims_0, indices = select_91_to_uint16, validate_indices = gather_91_validate_indices_0, x = var_1057_shape_cast_fp16_to_uint16)[name = string("gather_91_cast_uint16")]; + string gather_91_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_91_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_92 = const()[name = string("gather_92"), val = int32(64)]; + tensor var_1064_axes_0 = const()[name = string("op_1064_axes_0"), val = tensor([2])]; + tensor var_1064_cast_fp16 = expand_dims(axes = var_1064_axes_0, x = var_1038_cast_fp16)[name = string("op_1064_cast_fp16")]; + tensor shape_102_cast_fp16 = shape(x = var_1064_cast_fp16)[name = string("shape_102_cast_fp16")]; + int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; + bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; + int32 gather_91_cast_uint16_to_int32 = cast(dtype = gather_91_cast_uint16_to_int32_dtype_0, x = gather_91_cast_uint16)[name = string("cast_219")]; + tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (gather_89, gather_90, var_89, gather_91_cast_uint16_to_int32, gather_92))[name = string("concat_91")]; + tensor real_div_9 = real_div(x = concat_91, y = shape_102_cast_fp16)[name = string("real_div_9")]; + tensor hidden_states_135_cast_fp16 = tile(reps = real_div_9, x = var_1064_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; + tensor concat_92x = const()[name = string("concat_92x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_19_cast_fp16 = reshape(shape = concat_92x, x = hidden_states_135_cast_fp16)[name = string("value_states_19_cast_fp16")]; + tensor var_1074_shape_cast_fp16 = shape(x = key_states_19_cast_fp16)[name = string("op_1074_shape_cast_fp16")]; + int32 gather_93_axis_0 = const()[name = string("gather_93_axis_0"), val = int32(0)]; + int32 gather_93_batch_dims_0 = const()[name = string("gather_93_batch_dims_0"), val = int32(0)]; + bool gather_93_validate_indices_0 = const()[name = string("gather_93_validate_indices_0"), val = bool(false)]; + string var_1074_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1074_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_93_to_uint16 = const()[name = string("select_93_to_uint16"), val = uint16(2)]; + tensor var_1074_shape_cast_fp16_to_uint16 = cast(dtype = var_1074_shape_cast_fp16_to_uint16_dtype_0, x = var_1074_shape_cast_fp16)[name = string("cast_218")]; + uint16 gather_93_cast_uint16 = gather(axis = gather_93_axis_0, batch_dims = gather_93_batch_dims_0, indices = select_93_to_uint16, validate_indices = gather_93_validate_indices_0, x = var_1074_shape_cast_fp16_to_uint16)[name = string("gather_93_cast_uint16")]; + string gather_93_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_93_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = int32(1)]; + int32 concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = int32(1)]; + int32 concat_93_values2_0 = const()[name = string("concat_93_values2_0"), val = int32(0)]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + int32 gather_93_cast_uint16_to_int32 = cast(dtype = gather_93_cast_uint16_to_int32_dtype_0, x = gather_93_cast_uint16)[name = string("cast_217")]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, concat_93_values2_0, gather_93_cast_uint16_to_int32))[name = string("concat_93")]; + tensor causal_mask_11_begin_0 = const()[name = string("causal_mask_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_11_end_mask_0 = const()[name = string("causal_mask_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_11_cast_fp16 = slice_by_index(begin = causal_mask_11_begin_0, end = concat_93, end_mask = causal_mask_11_end_mask_0, x = causal_mask)[name = string("causal_mask_11_cast_fp16")]; + tensor attn_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_11_cast_fp16, key = key_states_19_cast_fp16, query = query_states_19_cast_fp16, value = value_states_19_cast_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor var_1080_perm_0 = const()[name = string("op_1080_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_94_axis_0 = const()[name = string("concat_94_axis_0"), val = int32(0)]; + bool concat_94_interleave_0 = const()[name = string("concat_94_interleave_0"), val = bool(false)]; + int32 gather_77_cast_uint16_to_int32 = cast(dtype = gather_77_cast_uint16_to_int32_dtype_0, x = gather_77_cast_uint16)[name = string("cast_216")]; + tensor concat_94 = concat(axis = concat_94_axis_0, interleave = concat_94_interleave_0, values = (gather_76, gather_77_cast_uint16_to_int32, var_85))[name = string("concat_94")]; + tensor var_1080_cast_fp16 = transpose(perm = var_1080_perm_0, x = attn_output_17_cast_fp16)[name = string("transpose_108")]; + tensor input_33_cast_fp16 = reshape(shape = concat_94, x = var_1080_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49554496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50015360))))[name = string("model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_31_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor hidden_states_139_cast_fp16 = add(x = hidden_states_119_cast_fp16, y = linear_31_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; + fp16 var_80_promoted_9_to_fp16 = const()[name = string("op_80_promoted_9_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1089_cast_fp16 = pow(x = hidden_states_139_cast_fp16, y = var_80_promoted_9_to_fp16)[name = string("op_1089_cast_fp16")]; + tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([-1])]; + bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; + tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_1089_cast_fp16)[name = string("variance_19_cast_fp16")]; + fp16 var_1092_to_fp16 = const()[name = string("op_1092_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1093_cast_fp16 = add(x = variance_19_cast_fp16, y = var_1092_to_fp16)[name = string("op_1093_cast_fp16")]; + fp32 var_1094_epsilon_0 = const()[name = string("op_1094_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1094_cast_fp16 = rsqrt(epsilon = var_1094_epsilon_0, x = var_1093_cast_fp16)[name = string("op_1094_cast_fp16")]; + tensor hidden_states_143_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = var_1094_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; + tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50073024)))]; + tensor input_35_cast_fp16 = mul(x = model_model_layers_4_post_attention_layernorm_weight_to_fp16, y = hidden_states_143_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50075008))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51303872))))[name = string("model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_1106_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_1106_cast_fp16")]; + tensor model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51457536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52686400))))[name = string("model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor input_39_cast_fp16 = mul(x = var_1106_cast_fp16, y = linear_33_cast_fp16)[name = string("input_39_cast_fp16")]; + tensor model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52840064))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54068928))))[name = string("model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_34_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = string("linear_34_cast_fp16")]; + tensor hidden_states_149_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = linear_34_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; + fp16 var_80_promoted_10_to_fp16 = const()[name = string("op_80_promoted_10_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1119_cast_fp16 = pow(x = hidden_states_149_cast_fp16, y = var_80_promoted_10_to_fp16)[name = string("op_1119_cast_fp16")]; + tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([-1])]; + bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; + tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_1119_cast_fp16)[name = string("variance_21_cast_fp16")]; + fp16 var_1122_to_fp16 = const()[name = string("op_1122_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1123_cast_fp16 = add(x = variance_21_cast_fp16, y = var_1122_to_fp16)[name = string("op_1123_cast_fp16")]; + fp32 var_1124_epsilon_0 = const()[name = string("op_1124_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1124_cast_fp16 = rsqrt(epsilon = var_1124_epsilon_0, x = var_1123_cast_fp16)[name = string("op_1124_cast_fp16")]; + tensor hidden_states_153_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = var_1124_cast_fp16)[name = string("hidden_states_153_cast_fp16")]; + tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54222592)))]; + tensor hidden_states_157_cast_fp16 = mul(x = model_model_layers_5_input_layernorm_weight_to_fp16, y = hidden_states_153_cast_fp16)[name = string("hidden_states_157_cast_fp16")]; + tensor var_1135_shape_cast_fp16 = shape(x = hidden_states_157_cast_fp16)[name = string("op_1135_shape_cast_fp16")]; + int32 gather_94 = const()[name = string("gather_94"), val = int32(1)]; + int32 gather_95_axis_0 = const()[name = string("gather_95_axis_0"), val = int32(0)]; + int32 gather_95_batch_dims_0 = const()[name = string("gather_95_batch_dims_0"), val = int32(0)]; + bool gather_95_validate_indices_0 = const()[name = string("gather_95_validate_indices_0"), val = bool(false)]; + string var_1135_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1135_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_95_to_uint16 = const()[name = string("select_95_to_uint16"), val = uint16(1)]; + tensor var_1135_shape_cast_fp16_to_uint16 = cast(dtype = var_1135_shape_cast_fp16_to_uint16_dtype_0, x = var_1135_shape_cast_fp16)[name = string("cast_215")]; + uint16 gather_95_cast_uint16 = gather(axis = gather_95_axis_0, batch_dims = gather_95_batch_dims_0, indices = select_95_to_uint16, validate_indices = gather_95_validate_indices_0, x = var_1135_shape_cast_fp16_to_uint16)[name = string("gather_95_cast_uint16")]; + string gather_95_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_95_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54224576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54685440))))[name = string("model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54743104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54896768))))[name = string("model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54916032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55069696))))[name = string("model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor concat_95x = const()[name = string("concat_95x"), val = tensor([1, -1, 15, 64])]; + tensor var_1144_cast_fp16 = reshape(shape = concat_95x, x = linear_35_cast_fp16)[name = string("op_1144_cast_fp16")]; + tensor q_11_perm_0 = const()[name = string("q_11_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_96x = const()[name = string("concat_96x"), val = tensor([1, -1, 5, 64])]; + tensor var_1147_cast_fp16 = reshape(shape = concat_96x, x = linear_36_cast_fp16)[name = string("op_1147_cast_fp16")]; + tensor k_11_perm_0 = const()[name = string("k_11_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_97x = const()[name = string("concat_97x"), val = tensor([1, -1, 5, 64])]; + tensor var_1150_cast_fp16 = reshape(shape = concat_97x, x = linear_37_cast_fp16)[name = string("op_1150_cast_fp16")]; + tensor v_state_11_perm_0 = const()[name = string("v_state_11_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_11_cast_fp16 = transpose(perm = q_11_perm_0, x = var_1144_cast_fp16)[name = string("transpose_107")]; + tensor var_1154_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1154_cast_fp16")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1165_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_1165_cast_fp16")]; + bool var_1167_interleave_0 = const()[name = string("op_1167_interleave_0"), val = bool(false)]; + tensor var_1167_cast_fp16 = concat(axis = var_85, interleave = var_1167_interleave_0, values = (var_1165_cast_fp16, x1_21_cast_fp16))[name = string("op_1167_cast_fp16")]; + tensor var_1168_cast_fp16 = mul(x = var_1167_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1168_cast_fp16")]; + tensor query_states_23_cast_fp16 = add(x = var_1154_cast_fp16, y = var_1168_cast_fp16)[name = string("query_states_23_cast_fp16")]; + tensor k_11_cast_fp16 = transpose(perm = k_11_perm_0, x = var_1147_cast_fp16)[name = string("transpose_106")]; + tensor var_1170_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1170_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1181_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1181_cast_fp16")]; + bool var_1183_interleave_0 = const()[name = string("op_1183_interleave_0"), val = bool(false)]; + tensor var_1183_cast_fp16 = concat(axis = var_85, interleave = var_1183_interleave_0, values = (var_1181_cast_fp16, x1_23_cast_fp16))[name = string("op_1183_cast_fp16")]; + tensor var_1184_cast_fp16 = mul(x = var_1183_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1184_cast_fp16")]; + tensor k_state_11_cast_fp16 = add(x = var_1170_cast_fp16, y = var_1184_cast_fp16)[name = string("k_state_11_cast_fp16")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([0])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor concat_100_values0_0 = const()[name = string("concat_100_values0_0"), val = tensor([5])]; + int32 concat_100_axis_0 = const()[name = string("concat_100_axis_0"), val = int32(0)]; + bool concat_100_interleave_0 = const()[name = string("concat_100_interleave_0"), val = bool(false)]; + tensor concat_100 = concat(axis = concat_100_axis_0, interleave = concat_100_interleave_0, values = (concat_100_values0_0, expand_dims_60, expand_dims_61, expand_dims_2, expand_dims_63))[name = string("concat_100")]; + tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_100, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = k_state_11_cast_fp16, x = coreml_update_state_72)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_74")]; + tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_11_cast_fp16 = transpose(perm = v_state_11_perm_0, x = var_1150_cast_fp16)[name = string("transpose_105")]; + tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_100, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = v_state_11_cast_fp16, x = coreml_update_state_73)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_75")]; + tensor var_1207_begin_0 = const()[name = string("op_1207_begin_0"), val = tensor([5, 0, 0, 0, 0])]; + tensor var_1207_end_0 = const()[name = string("op_1207_end_0"), val = tensor([6, 1, 5, 2048, 64])]; + tensor var_1207_end_mask_0 = const()[name = string("op_1207_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1207_squeeze_mask_0 = const()[name = string("op_1207_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1207_cast_fp16 = slice_by_index(begin = var_1207_begin_0, end = var_1207_end_0, end_mask = var_1207_end_mask_0, squeeze_mask = var_1207_squeeze_mask_0, x = coreml_update_state_74)[name = string("op_1207_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = string("op_1210_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1210_end_mask_0 = const()[name = string("op_1210_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = concat_11, end_mask = var_1210_end_mask_0, x = var_1207_cast_fp16)[name = string("op_1210_cast_fp16")]; + tensor var_1212_begin_0 = const()[name = string("op_1212_begin_0"), val = tensor([5, 0, 0, 0, 0])]; + tensor var_1212_end_0 = const()[name = string("op_1212_end_0"), val = tensor([6, 1, 5, 2048, 64])]; + tensor var_1212_end_mask_0 = const()[name = string("op_1212_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1212_squeeze_mask_0 = const()[name = string("op_1212_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1212_cast_fp16 = slice_by_index(begin = var_1212_begin_0, end = var_1212_end_0, end_mask = var_1212_end_mask_0, squeeze_mask = var_1212_squeeze_mask_0, x = coreml_update_state_75)[name = string("op_1212_cast_fp16")]; + tensor var_1215_begin_0 = const()[name = string("op_1215_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1215_end_mask_0 = const()[name = string("op_1215_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1215_cast_fp16 = slice_by_index(begin = var_1215_begin_0, end = concat_11, end_mask = var_1215_end_mask_0, x = var_1212_cast_fp16)[name = string("op_1215_cast_fp16")]; + tensor var_1217_shape_cast_fp16 = shape(x = var_1210_cast_fp16)[name = string("op_1217_shape_cast_fp16")]; + int32 gather_103 = const()[name = string("gather_103"), val = int32(1)]; + int32 gather_104 = const()[name = string("gather_104"), val = int32(5)]; + int32 gather_105_axis_0 = const()[name = string("gather_105_axis_0"), val = int32(0)]; + int32 gather_105_batch_dims_0 = const()[name = string("gather_105_batch_dims_0"), val = int32(0)]; + bool gather_105_validate_indices_0 = const()[name = string("gather_105_validate_indices_0"), val = bool(false)]; + string var_1217_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1217_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_105_to_uint16 = const()[name = string("select_105_to_uint16"), val = uint16(2)]; + tensor var_1217_shape_cast_fp16_to_uint16 = cast(dtype = var_1217_shape_cast_fp16_to_uint16_dtype_0, x = var_1217_shape_cast_fp16)[name = string("cast_214")]; + uint16 gather_105_cast_uint16 = gather(axis = gather_105_axis_0, batch_dims = gather_105_batch_dims_0, indices = select_105_to_uint16, validate_indices = gather_105_validate_indices_0, x = var_1217_shape_cast_fp16_to_uint16)[name = string("gather_105_cast_uint16")]; + string gather_105_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_105_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_106 = const()[name = string("gather_106"), val = int32(64)]; + tensor var_1224_axes_0 = const()[name = string("op_1224_axes_0"), val = tensor([2])]; + tensor var_1224_cast_fp16 = expand_dims(axes = var_1224_axes_0, x = var_1210_cast_fp16)[name = string("op_1224_cast_fp16")]; + tensor shape_117_cast_fp16 = shape(x = var_1224_cast_fp16)[name = string("shape_117_cast_fp16")]; + int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; + bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; + int32 gather_105_cast_uint16_to_int32 = cast(dtype = gather_105_cast_uint16_to_int32_dtype_0, x = gather_105_cast_uint16)[name = string("cast_213")]; + tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (gather_103, gather_104, var_89, gather_105_cast_uint16_to_int32, gather_106))[name = string("concat_108")]; + tensor real_div_10 = real_div(x = concat_108, y = shape_117_cast_fp16)[name = string("real_div_10")]; + tensor hidden_states_161_cast_fp16 = tile(reps = real_div_10, x = var_1224_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; + tensor concat_109x = const()[name = string("concat_109x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_23_cast_fp16 = reshape(shape = concat_109x, x = hidden_states_161_cast_fp16)[name = string("key_states_23_cast_fp16")]; + tensor var_1234_shape_cast_fp16 = shape(x = var_1215_cast_fp16)[name = string("op_1234_shape_cast_fp16")]; + int32 gather_107 = const()[name = string("gather_107"), val = int32(1)]; + int32 gather_108 = const()[name = string("gather_108"), val = int32(5)]; + int32 gather_109_axis_0 = const()[name = string("gather_109_axis_0"), val = int32(0)]; + int32 gather_109_batch_dims_0 = const()[name = string("gather_109_batch_dims_0"), val = int32(0)]; + bool gather_109_validate_indices_0 = const()[name = string("gather_109_validate_indices_0"), val = bool(false)]; + string var_1234_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1234_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_109_to_uint16 = const()[name = string("select_109_to_uint16"), val = uint16(2)]; + tensor var_1234_shape_cast_fp16_to_uint16 = cast(dtype = var_1234_shape_cast_fp16_to_uint16_dtype_0, x = var_1234_shape_cast_fp16)[name = string("cast_212")]; + uint16 gather_109_cast_uint16 = gather(axis = gather_109_axis_0, batch_dims = gather_109_batch_dims_0, indices = select_109_to_uint16, validate_indices = gather_109_validate_indices_0, x = var_1234_shape_cast_fp16_to_uint16)[name = string("gather_109_cast_uint16")]; + string gather_109_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_109_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_110 = const()[name = string("gather_110"), val = int32(64)]; + tensor var_1241_axes_0 = const()[name = string("op_1241_axes_0"), val = tensor([2])]; + tensor var_1241_cast_fp16 = expand_dims(axes = var_1241_axes_0, x = var_1215_cast_fp16)[name = string("op_1241_cast_fp16")]; + tensor shape_122_cast_fp16 = shape(x = var_1241_cast_fp16)[name = string("shape_122_cast_fp16")]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + int32 gather_109_cast_uint16_to_int32 = cast(dtype = gather_109_cast_uint16_to_int32_dtype_0, x = gather_109_cast_uint16)[name = string("cast_211")]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (gather_107, gather_108, var_89, gather_109_cast_uint16_to_int32, gather_110))[name = string("concat_110")]; + tensor real_div_11 = real_div(x = concat_110, y = shape_122_cast_fp16)[name = string("real_div_11")]; + tensor hidden_states_165_cast_fp16 = tile(reps = real_div_11, x = var_1241_cast_fp16)[name = string("hidden_states_165_cast_fp16")]; + tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_23_cast_fp16 = reshape(shape = concat_111x, x = hidden_states_165_cast_fp16)[name = string("value_states_23_cast_fp16")]; + tensor var_1251_shape_cast_fp16 = shape(x = key_states_23_cast_fp16)[name = string("op_1251_shape_cast_fp16")]; + int32 gather_111_axis_0 = const()[name = string("gather_111_axis_0"), val = int32(0)]; + int32 gather_111_batch_dims_0 = const()[name = string("gather_111_batch_dims_0"), val = int32(0)]; + bool gather_111_validate_indices_0 = const()[name = string("gather_111_validate_indices_0"), val = bool(false)]; + string var_1251_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1251_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_111_to_uint16 = const()[name = string("select_111_to_uint16"), val = uint16(2)]; + tensor var_1251_shape_cast_fp16_to_uint16 = cast(dtype = var_1251_shape_cast_fp16_to_uint16_dtype_0, x = var_1251_shape_cast_fp16)[name = string("cast_210")]; + uint16 gather_111_cast_uint16 = gather(axis = gather_111_axis_0, batch_dims = gather_111_batch_dims_0, indices = select_111_to_uint16, validate_indices = gather_111_validate_indices_0, x = var_1251_shape_cast_fp16_to_uint16)[name = string("gather_111_cast_uint16")]; + string gather_111_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_111_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_112_values0_0 = const()[name = string("concat_112_values0_0"), val = int32(1)]; + int32 concat_112_values1_0 = const()[name = string("concat_112_values1_0"), val = int32(1)]; + int32 concat_112_values2_0 = const()[name = string("concat_112_values2_0"), val = int32(0)]; + int32 concat_112_axis_0 = const()[name = string("concat_112_axis_0"), val = int32(0)]; + bool concat_112_interleave_0 = const()[name = string("concat_112_interleave_0"), val = bool(false)]; + int32 gather_111_cast_uint16_to_int32 = cast(dtype = gather_111_cast_uint16_to_int32_dtype_0, x = gather_111_cast_uint16)[name = string("cast_209")]; + tensor concat_112 = concat(axis = concat_112_axis_0, interleave = concat_112_interleave_0, values = (concat_112_values0_0, concat_112_values1_0, concat_112_values2_0, gather_111_cast_uint16_to_int32))[name = string("concat_112")]; + tensor causal_mask_13_begin_0 = const()[name = string("causal_mask_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_13_end_mask_0 = const()[name = string("causal_mask_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_13_cast_fp16 = slice_by_index(begin = causal_mask_13_begin_0, end = concat_112, end_mask = causal_mask_13_end_mask_0, x = causal_mask)[name = string("causal_mask_13_cast_fp16")]; + tensor attn_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_13_cast_fp16, key = key_states_23_cast_fp16, query = query_states_23_cast_fp16, value = value_states_23_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_1257_perm_0 = const()[name = string("op_1257_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_113_axis_0 = const()[name = string("concat_113_axis_0"), val = int32(0)]; + bool concat_113_interleave_0 = const()[name = string("concat_113_interleave_0"), val = bool(false)]; + int32 gather_95_cast_uint16_to_int32 = cast(dtype = gather_95_cast_uint16_to_int32_dtype_0, x = gather_95_cast_uint16)[name = string("cast_208")]; + tensor concat_113 = concat(axis = concat_113_axis_0, interleave = concat_113_interleave_0, values = (gather_94, gather_95_cast_uint16_to_int32, var_85))[name = string("concat_113")]; + tensor var_1257_cast_fp16 = transpose(perm = var_1257_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_104")]; + tensor input_41_cast_fp16 = reshape(shape = concat_113, x = var_1257_cast_fp16)[name = string("input_41_cast_fp16")]; + tensor model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55088960))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55549824))))[name = string("model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_38_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = string("linear_38_cast_fp16")]; + tensor hidden_states_169_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = linear_38_cast_fp16)[name = string("hidden_states_169_cast_fp16")]; + fp16 var_80_promoted_11_to_fp16 = const()[name = string("op_80_promoted_11_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1266_cast_fp16 = pow(x = hidden_states_169_cast_fp16, y = var_80_promoted_11_to_fp16)[name = string("op_1266_cast_fp16")]; + tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([-1])]; + bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; + tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_1266_cast_fp16)[name = string("variance_23_cast_fp16")]; + fp16 var_1269_to_fp16 = const()[name = string("op_1269_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1270_cast_fp16 = add(x = variance_23_cast_fp16, y = var_1269_to_fp16)[name = string("op_1270_cast_fp16")]; + fp32 var_1271_epsilon_0 = const()[name = string("op_1271_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1271_cast_fp16 = rsqrt(epsilon = var_1271_epsilon_0, x = var_1270_cast_fp16)[name = string("op_1271_cast_fp16")]; + tensor hidden_states_173_cast_fp16 = mul(x = hidden_states_169_cast_fp16, y = var_1271_cast_fp16)[name = string("hidden_states_173_cast_fp16")]; + tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55607488)))]; + tensor input_43_cast_fp16 = mul(x = model_model_layers_5_post_attention_layernorm_weight_to_fp16, y = hidden_states_173_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55609472))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56838336))))[name = string("model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor var_1283_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_1283_cast_fp16")]; + tensor model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56992000))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58220864))))[name = string("model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = string("linear_40_cast_fp16")]; + tensor input_47_cast_fp16 = mul(x = var_1283_cast_fp16, y = linear_40_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58374528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59603392))))[name = string("model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_41_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor hidden_states_179_cast_fp16 = add(x = hidden_states_169_cast_fp16, y = linear_41_cast_fp16)[name = string("hidden_states_179_cast_fp16")]; + fp16 var_80_promoted_12_to_fp16 = const()[name = string("op_80_promoted_12_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1296_cast_fp16 = pow(x = hidden_states_179_cast_fp16, y = var_80_promoted_12_to_fp16)[name = string("op_1296_cast_fp16")]; + tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([-1])]; + bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; + tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_1296_cast_fp16)[name = string("variance_25_cast_fp16")]; + fp16 var_1299_to_fp16 = const()[name = string("op_1299_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1300_cast_fp16 = add(x = variance_25_cast_fp16, y = var_1299_to_fp16)[name = string("op_1300_cast_fp16")]; + fp32 var_1301_epsilon_0 = const()[name = string("op_1301_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1301_cast_fp16 = rsqrt(epsilon = var_1301_epsilon_0, x = var_1300_cast_fp16)[name = string("op_1301_cast_fp16")]; + tensor hidden_states_183_cast_fp16 = mul(x = hidden_states_179_cast_fp16, y = var_1301_cast_fp16)[name = string("hidden_states_183_cast_fp16")]; + tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59757056)))]; + tensor hidden_states_187_cast_fp16 = mul(x = model_model_layers_6_input_layernorm_weight_to_fp16, y = hidden_states_183_cast_fp16)[name = string("hidden_states_187_cast_fp16")]; + tensor var_1312_shape_cast_fp16 = shape(x = hidden_states_187_cast_fp16)[name = string("op_1312_shape_cast_fp16")]; + int32 gather_112 = const()[name = string("gather_112"), val = int32(1)]; + int32 gather_113_axis_0 = const()[name = string("gather_113_axis_0"), val = int32(0)]; + int32 gather_113_batch_dims_0 = const()[name = string("gather_113_batch_dims_0"), val = int32(0)]; + bool gather_113_validate_indices_0 = const()[name = string("gather_113_validate_indices_0"), val = bool(false)]; + string var_1312_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1312_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_113_to_uint16 = const()[name = string("select_113_to_uint16"), val = uint16(1)]; + tensor var_1312_shape_cast_fp16_to_uint16 = cast(dtype = var_1312_shape_cast_fp16_to_uint16_dtype_0, x = var_1312_shape_cast_fp16)[name = string("cast_207")]; + uint16 gather_113_cast_uint16 = gather(axis = gather_113_axis_0, batch_dims = gather_113_batch_dims_0, indices = select_113_to_uint16, validate_indices = gather_113_validate_indices_0, x = var_1312_shape_cast_fp16_to_uint16)[name = string("gather_113_cast_uint16")]; + string gather_113_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_113_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59759040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60219904))))[name = string("model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60277568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60431232))))[name = string("model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60450496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60604160))))[name = string("model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor concat_114x = const()[name = string("concat_114x"), val = tensor([1, -1, 15, 64])]; + tensor var_1321_cast_fp16 = reshape(shape = concat_114x, x = linear_42_cast_fp16)[name = string("op_1321_cast_fp16")]; + tensor q_13_perm_0 = const()[name = string("q_13_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_115x = const()[name = string("concat_115x"), val = tensor([1, -1, 5, 64])]; + tensor var_1324_cast_fp16 = reshape(shape = concat_115x, x = linear_43_cast_fp16)[name = string("op_1324_cast_fp16")]; + tensor k_13_perm_0 = const()[name = string("k_13_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_116x = const()[name = string("concat_116x"), val = tensor([1, -1, 5, 64])]; + tensor var_1327_cast_fp16 = reshape(shape = concat_116x, x = linear_44_cast_fp16)[name = string("op_1327_cast_fp16")]; + tensor v_state_13_perm_0 = const()[name = string("v_state_13_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_13_cast_fp16 = transpose(perm = q_13_perm_0, x = var_1321_cast_fp16)[name = string("transpose_103")]; + tensor var_1331_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1331_cast_fp16")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; + fp16 const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1342_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_15_promoted_to_fp16)[name = string("op_1342_cast_fp16")]; + bool var_1344_interleave_0 = const()[name = string("op_1344_interleave_0"), val = bool(false)]; + tensor var_1344_cast_fp16 = concat(axis = var_85, interleave = var_1344_interleave_0, values = (var_1342_cast_fp16, x1_25_cast_fp16))[name = string("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = mul(x = var_1344_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1345_cast_fp16")]; + tensor query_states_27_cast_fp16 = add(x = var_1331_cast_fp16, y = var_1345_cast_fp16)[name = string("query_states_27_cast_fp16")]; + tensor k_13_cast_fp16 = transpose(perm = k_13_perm_0, x = var_1324_cast_fp16)[name = string("transpose_102")]; + tensor var_1347_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1347_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; + fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1358_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1358_cast_fp16")]; + bool var_1360_interleave_0 = const()[name = string("op_1360_interleave_0"), val = bool(false)]; + tensor var_1360_cast_fp16 = concat(axis = var_85, interleave = var_1360_interleave_0, values = (var_1358_cast_fp16, x1_27_cast_fp16))[name = string("op_1360_cast_fp16")]; + tensor var_1361_cast_fp16 = mul(x = var_1360_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1361_cast_fp16")]; + tensor k_state_13_cast_fp16 = add(x = var_1347_cast_fp16, y = var_1361_cast_fp16)[name = string("k_state_13_cast_fp16")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor concat_119_values0_0 = const()[name = string("concat_119_values0_0"), val = tensor([6])]; + int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; + bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; + tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (concat_119_values0_0, expand_dims_72, expand_dims_73, expand_dims_2, expand_dims_75))[name = string("concat_119")]; + tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_119, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = k_state_13_cast_fp16, x = coreml_update_state_74)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_76")]; + tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_13_cast_fp16 = transpose(perm = v_state_13_perm_0, x = var_1327_cast_fp16)[name = string("transpose_101")]; + tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_119, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = v_state_13_cast_fp16, x = coreml_update_state_75)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_77")]; + tensor var_1384_begin_0 = const()[name = string("op_1384_begin_0"), val = tensor([6, 0, 0, 0, 0])]; + tensor var_1384_end_0 = const()[name = string("op_1384_end_0"), val = tensor([7, 1, 5, 2048, 64])]; + tensor var_1384_end_mask_0 = const()[name = string("op_1384_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1384_squeeze_mask_0 = const()[name = string("op_1384_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1384_cast_fp16 = slice_by_index(begin = var_1384_begin_0, end = var_1384_end_0, end_mask = var_1384_end_mask_0, squeeze_mask = var_1384_squeeze_mask_0, x = coreml_update_state_76)[name = string("op_1384_cast_fp16")]; + tensor var_1387_begin_0 = const()[name = string("op_1387_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1387_end_mask_0 = const()[name = string("op_1387_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1387_cast_fp16 = slice_by_index(begin = var_1387_begin_0, end = concat_11, end_mask = var_1387_end_mask_0, x = var_1384_cast_fp16)[name = string("op_1387_cast_fp16")]; + tensor var_1389_begin_0 = const()[name = string("op_1389_begin_0"), val = tensor([6, 0, 0, 0, 0])]; + tensor var_1389_end_0 = const()[name = string("op_1389_end_0"), val = tensor([7, 1, 5, 2048, 64])]; + tensor var_1389_end_mask_0 = const()[name = string("op_1389_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1389_squeeze_mask_0 = const()[name = string("op_1389_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1389_cast_fp16 = slice_by_index(begin = var_1389_begin_0, end = var_1389_end_0, end_mask = var_1389_end_mask_0, squeeze_mask = var_1389_squeeze_mask_0, x = coreml_update_state_77)[name = string("op_1389_cast_fp16")]; + tensor var_1392_begin_0 = const()[name = string("op_1392_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1392_end_mask_0 = const()[name = string("op_1392_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1392_cast_fp16 = slice_by_index(begin = var_1392_begin_0, end = concat_11, end_mask = var_1392_end_mask_0, x = var_1389_cast_fp16)[name = string("op_1392_cast_fp16")]; + tensor var_1394_shape_cast_fp16 = shape(x = var_1387_cast_fp16)[name = string("op_1394_shape_cast_fp16")]; + int32 gather_121 = const()[name = string("gather_121"), val = int32(1)]; + int32 gather_122 = const()[name = string("gather_122"), val = int32(5)]; + int32 gather_123_axis_0 = const()[name = string("gather_123_axis_0"), val = int32(0)]; + int32 gather_123_batch_dims_0 = const()[name = string("gather_123_batch_dims_0"), val = int32(0)]; + bool gather_123_validate_indices_0 = const()[name = string("gather_123_validate_indices_0"), val = bool(false)]; + string var_1394_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1394_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_123_to_uint16 = const()[name = string("select_123_to_uint16"), val = uint16(2)]; + tensor var_1394_shape_cast_fp16_to_uint16 = cast(dtype = var_1394_shape_cast_fp16_to_uint16_dtype_0, x = var_1394_shape_cast_fp16)[name = string("cast_206")]; + uint16 gather_123_cast_uint16 = gather(axis = gather_123_axis_0, batch_dims = gather_123_batch_dims_0, indices = select_123_to_uint16, validate_indices = gather_123_validate_indices_0, x = var_1394_shape_cast_fp16_to_uint16)[name = string("gather_123_cast_uint16")]; + string gather_123_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_123_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_124 = const()[name = string("gather_124"), val = int32(64)]; + tensor var_1401_axes_0 = const()[name = string("op_1401_axes_0"), val = tensor([2])]; + tensor var_1401_cast_fp16 = expand_dims(axes = var_1401_axes_0, x = var_1387_cast_fp16)[name = string("op_1401_cast_fp16")]; + tensor shape_137_cast_fp16 = shape(x = var_1401_cast_fp16)[name = string("shape_137_cast_fp16")]; + int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; + bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; + int32 gather_123_cast_uint16_to_int32 = cast(dtype = gather_123_cast_uint16_to_int32_dtype_0, x = gather_123_cast_uint16)[name = string("cast_205")]; + tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (gather_121, gather_122, var_89, gather_123_cast_uint16_to_int32, gather_124))[name = string("concat_127")]; + tensor real_div_12 = real_div(x = concat_127, y = shape_137_cast_fp16)[name = string("real_div_12")]; + tensor hidden_states_191_cast_fp16 = tile(reps = real_div_12, x = var_1401_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; + tensor concat_128x = const()[name = string("concat_128x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_27_cast_fp16 = reshape(shape = concat_128x, x = hidden_states_191_cast_fp16)[name = string("key_states_27_cast_fp16")]; + tensor var_1411_shape_cast_fp16 = shape(x = var_1392_cast_fp16)[name = string("op_1411_shape_cast_fp16")]; + int32 gather_125 = const()[name = string("gather_125"), val = int32(1)]; + int32 gather_126 = const()[name = string("gather_126"), val = int32(5)]; + int32 gather_127_axis_0 = const()[name = string("gather_127_axis_0"), val = int32(0)]; + int32 gather_127_batch_dims_0 = const()[name = string("gather_127_batch_dims_0"), val = int32(0)]; + bool gather_127_validate_indices_0 = const()[name = string("gather_127_validate_indices_0"), val = bool(false)]; + string var_1411_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1411_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_127_to_uint16 = const()[name = string("select_127_to_uint16"), val = uint16(2)]; + tensor var_1411_shape_cast_fp16_to_uint16 = cast(dtype = var_1411_shape_cast_fp16_to_uint16_dtype_0, x = var_1411_shape_cast_fp16)[name = string("cast_204")]; + uint16 gather_127_cast_uint16 = gather(axis = gather_127_axis_0, batch_dims = gather_127_batch_dims_0, indices = select_127_to_uint16, validate_indices = gather_127_validate_indices_0, x = var_1411_shape_cast_fp16_to_uint16)[name = string("gather_127_cast_uint16")]; + string gather_127_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_127_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_128 = const()[name = string("gather_128"), val = int32(64)]; + tensor var_1418_axes_0 = const()[name = string("op_1418_axes_0"), val = tensor([2])]; + tensor var_1418_cast_fp16 = expand_dims(axes = var_1418_axes_0, x = var_1392_cast_fp16)[name = string("op_1418_cast_fp16")]; + tensor shape_142_cast_fp16 = shape(x = var_1418_cast_fp16)[name = string("shape_142_cast_fp16")]; + int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; + bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; + int32 gather_127_cast_uint16_to_int32 = cast(dtype = gather_127_cast_uint16_to_int32_dtype_0, x = gather_127_cast_uint16)[name = string("cast_203")]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (gather_125, gather_126, var_89, gather_127_cast_uint16_to_int32, gather_128))[name = string("concat_129")]; + tensor real_div_13 = real_div(x = concat_129, y = shape_142_cast_fp16)[name = string("real_div_13")]; + tensor hidden_states_195_cast_fp16 = tile(reps = real_div_13, x = var_1418_cast_fp16)[name = string("hidden_states_195_cast_fp16")]; + tensor concat_130x = const()[name = string("concat_130x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_27_cast_fp16 = reshape(shape = concat_130x, x = hidden_states_195_cast_fp16)[name = string("value_states_27_cast_fp16")]; + tensor var_1428_shape_cast_fp16 = shape(x = key_states_27_cast_fp16)[name = string("op_1428_shape_cast_fp16")]; + int32 gather_129_axis_0 = const()[name = string("gather_129_axis_0"), val = int32(0)]; + int32 gather_129_batch_dims_0 = const()[name = string("gather_129_batch_dims_0"), val = int32(0)]; + bool gather_129_validate_indices_0 = const()[name = string("gather_129_validate_indices_0"), val = bool(false)]; + string var_1428_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1428_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_129_to_uint16 = const()[name = string("select_129_to_uint16"), val = uint16(2)]; + tensor var_1428_shape_cast_fp16_to_uint16 = cast(dtype = var_1428_shape_cast_fp16_to_uint16_dtype_0, x = var_1428_shape_cast_fp16)[name = string("cast_202")]; + uint16 gather_129_cast_uint16 = gather(axis = gather_129_axis_0, batch_dims = gather_129_batch_dims_0, indices = select_129_to_uint16, validate_indices = gather_129_validate_indices_0, x = var_1428_shape_cast_fp16_to_uint16)[name = string("gather_129_cast_uint16")]; + string gather_129_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_129_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_131_values0_0 = const()[name = string("concat_131_values0_0"), val = int32(1)]; + int32 concat_131_values1_0 = const()[name = string("concat_131_values1_0"), val = int32(1)]; + int32 concat_131_values2_0 = const()[name = string("concat_131_values2_0"), val = int32(0)]; + int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; + bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; + int32 gather_129_cast_uint16_to_int32 = cast(dtype = gather_129_cast_uint16_to_int32_dtype_0, x = gather_129_cast_uint16)[name = string("cast_201")]; + tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (concat_131_values0_0, concat_131_values1_0, concat_131_values2_0, gather_129_cast_uint16_to_int32))[name = string("concat_131")]; + tensor causal_mask_15_begin_0 = const()[name = string("causal_mask_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_15_end_mask_0 = const()[name = string("causal_mask_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_15_cast_fp16 = slice_by_index(begin = causal_mask_15_begin_0, end = concat_131, end_mask = causal_mask_15_end_mask_0, x = causal_mask)[name = string("causal_mask_15_cast_fp16")]; + tensor attn_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_15_cast_fp16, key = key_states_27_cast_fp16, query = query_states_27_cast_fp16, value = value_states_27_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_1434_perm_0 = const()[name = string("op_1434_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; + bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; + int32 gather_113_cast_uint16_to_int32 = cast(dtype = gather_113_cast_uint16_to_int32_dtype_0, x = gather_113_cast_uint16)[name = string("cast_200")]; + tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (gather_112, gather_113_cast_uint16_to_int32, var_85))[name = string("concat_132")]; + tensor var_1434_cast_fp16 = transpose(perm = var_1434_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_100")]; + tensor input_49_cast_fp16 = reshape(shape = concat_132, x = var_1434_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60623424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61084288))))[name = string("model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_45_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor hidden_states_199_cast_fp16 = add(x = hidden_states_179_cast_fp16, y = linear_45_cast_fp16)[name = string("hidden_states_199_cast_fp16")]; + fp16 var_80_promoted_13_to_fp16 = const()[name = string("op_80_promoted_13_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1443_cast_fp16 = pow(x = hidden_states_199_cast_fp16, y = var_80_promoted_13_to_fp16)[name = string("op_1443_cast_fp16")]; + tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([-1])]; + bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; + tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_1443_cast_fp16)[name = string("variance_27_cast_fp16")]; + fp16 var_1446_to_fp16 = const()[name = string("op_1446_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1447_cast_fp16 = add(x = variance_27_cast_fp16, y = var_1446_to_fp16)[name = string("op_1447_cast_fp16")]; + fp32 var_1448_epsilon_0 = const()[name = string("op_1448_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1448_cast_fp16 = rsqrt(epsilon = var_1448_epsilon_0, x = var_1447_cast_fp16)[name = string("op_1448_cast_fp16")]; + tensor hidden_states_203_cast_fp16 = mul(x = hidden_states_199_cast_fp16, y = var_1448_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; + tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61141952)))]; + tensor input_51_cast_fp16 = mul(x = model_model_layers_6_post_attention_layernorm_weight_to_fp16, y = hidden_states_203_cast_fp16)[name = string("input_51_cast_fp16")]; + tensor model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61143936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62372800))))[name = string("model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_46_cast_fp16")]; + tensor var_1460_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_1460_cast_fp16")]; + tensor model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62526464))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63755328))))[name = string("model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor input_55_cast_fp16 = mul(x = var_1460_cast_fp16, y = linear_47_cast_fp16)[name = string("input_55_cast_fp16")]; + tensor model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63908992))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65137856))))[name = string("model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_48_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized, x = input_55_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor hidden_states_209_cast_fp16 = add(x = hidden_states_199_cast_fp16, y = linear_48_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; + fp16 var_80_promoted_14_to_fp16 = const()[name = string("op_80_promoted_14_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1473_cast_fp16 = pow(x = hidden_states_209_cast_fp16, y = var_80_promoted_14_to_fp16)[name = string("op_1473_cast_fp16")]; + tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([-1])]; + bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; + tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_1473_cast_fp16)[name = string("variance_29_cast_fp16")]; + fp16 var_1476_to_fp16 = const()[name = string("op_1476_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1477_cast_fp16 = add(x = variance_29_cast_fp16, y = var_1476_to_fp16)[name = string("op_1477_cast_fp16")]; + fp32 var_1478_epsilon_0 = const()[name = string("op_1478_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1478_cast_fp16 = rsqrt(epsilon = var_1478_epsilon_0, x = var_1477_cast_fp16)[name = string("op_1478_cast_fp16")]; + tensor hidden_states_213_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = var_1478_cast_fp16)[name = string("hidden_states_213_cast_fp16")]; + tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65291520)))]; + tensor hidden_states_217_cast_fp16 = mul(x = model_model_layers_7_input_layernorm_weight_to_fp16, y = hidden_states_213_cast_fp16)[name = string("hidden_states_217_cast_fp16")]; + tensor var_1489_shape_cast_fp16 = shape(x = hidden_states_217_cast_fp16)[name = string("op_1489_shape_cast_fp16")]; + int32 gather_130 = const()[name = string("gather_130"), val = int32(1)]; + int32 gather_131_axis_0 = const()[name = string("gather_131_axis_0"), val = int32(0)]; + int32 gather_131_batch_dims_0 = const()[name = string("gather_131_batch_dims_0"), val = int32(0)]; + bool gather_131_validate_indices_0 = const()[name = string("gather_131_validate_indices_0"), val = bool(false)]; + string var_1489_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1489_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_131_to_uint16 = const()[name = string("select_131_to_uint16"), val = uint16(1)]; + tensor var_1489_shape_cast_fp16_to_uint16 = cast(dtype = var_1489_shape_cast_fp16_to_uint16_dtype_0, x = var_1489_shape_cast_fp16)[name = string("cast_199")]; + uint16 gather_131_cast_uint16 = gather(axis = gather_131_axis_0, batch_dims = gather_131_batch_dims_0, indices = select_131_to_uint16, validate_indices = gather_131_validate_indices_0, x = var_1489_shape_cast_fp16_to_uint16)[name = string("gather_131_cast_uint16")]; + string gather_131_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_131_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65293504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65754368))))[name = string("model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65812032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65965696))))[name = string("model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_50_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65984960))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66138624))))[name = string("model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_51_cast_fp16")]; + tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 15, 64])]; + tensor var_1498_cast_fp16 = reshape(shape = concat_133x, x = linear_49_cast_fp16)[name = string("op_1498_cast_fp16")]; + tensor q_15_perm_0 = const()[name = string("q_15_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_134x = const()[name = string("concat_134x"), val = tensor([1, -1, 5, 64])]; + tensor var_1501_cast_fp16 = reshape(shape = concat_134x, x = linear_50_cast_fp16)[name = string("op_1501_cast_fp16")]; + tensor k_15_perm_0 = const()[name = string("k_15_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_135x = const()[name = string("concat_135x"), val = tensor([1, -1, 5, 64])]; + tensor var_1504_cast_fp16 = reshape(shape = concat_135x, x = linear_51_cast_fp16)[name = string("op_1504_cast_fp16")]; + tensor v_state_15_perm_0 = const()[name = string("v_state_15_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_15_cast_fp16 = transpose(perm = q_15_perm_0, x = var_1498_cast_fp16)[name = string("transpose_99")]; + tensor var_1508_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1508_cast_fp16")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1519_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_1519_cast_fp16")]; + bool var_1521_interleave_0 = const()[name = string("op_1521_interleave_0"), val = bool(false)]; + tensor var_1521_cast_fp16 = concat(axis = var_85, interleave = var_1521_interleave_0, values = (var_1519_cast_fp16, x1_29_cast_fp16))[name = string("op_1521_cast_fp16")]; + tensor var_1522_cast_fp16 = mul(x = var_1521_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1522_cast_fp16")]; + tensor query_states_31_cast_fp16 = add(x = var_1508_cast_fp16, y = var_1522_cast_fp16)[name = string("query_states_31_cast_fp16")]; + tensor k_15_cast_fp16 = transpose(perm = k_15_perm_0, x = var_1501_cast_fp16)[name = string("transpose_98")]; + tensor var_1524_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1524_cast_fp16")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; + fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1535_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_18_promoted_to_fp16)[name = string("op_1535_cast_fp16")]; + bool var_1537_interleave_0 = const()[name = string("op_1537_interleave_0"), val = bool(false)]; + tensor var_1537_cast_fp16 = concat(axis = var_85, interleave = var_1537_interleave_0, values = (var_1535_cast_fp16, x1_31_cast_fp16))[name = string("op_1537_cast_fp16")]; + tensor var_1538_cast_fp16 = mul(x = var_1537_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1538_cast_fp16")]; + tensor k_state_15_cast_fp16 = add(x = var_1524_cast_fp16, y = var_1538_cast_fp16)[name = string("k_state_15_cast_fp16")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([0])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor concat_138_values0_0 = const()[name = string("concat_138_values0_0"), val = tensor([7])]; + int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)]; + bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)]; + tensor concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (concat_138_values0_0, expand_dims_84, expand_dims_85, expand_dims_2, expand_dims_87))[name = string("concat_138")]; + tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_138, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = k_state_15_cast_fp16, x = coreml_update_state_76)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_78")]; + tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_15_cast_fp16 = transpose(perm = v_state_15_perm_0, x = var_1504_cast_fp16)[name = string("transpose_97")]; + tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_138, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = v_state_15_cast_fp16, x = coreml_update_state_77)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_79")]; + tensor var_1561_begin_0 = const()[name = string("op_1561_begin_0"), val = tensor([7, 0, 0, 0, 0])]; + tensor var_1561_end_0 = const()[name = string("op_1561_end_0"), val = tensor([8, 1, 5, 2048, 64])]; + tensor var_1561_end_mask_0 = const()[name = string("op_1561_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1561_squeeze_mask_0 = const()[name = string("op_1561_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1561_cast_fp16 = slice_by_index(begin = var_1561_begin_0, end = var_1561_end_0, end_mask = var_1561_end_mask_0, squeeze_mask = var_1561_squeeze_mask_0, x = coreml_update_state_78)[name = string("op_1561_cast_fp16")]; + tensor var_1564_begin_0 = const()[name = string("op_1564_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1564_end_mask_0 = const()[name = string("op_1564_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1564_cast_fp16 = slice_by_index(begin = var_1564_begin_0, end = concat_11, end_mask = var_1564_end_mask_0, x = var_1561_cast_fp16)[name = string("op_1564_cast_fp16")]; + tensor var_1566_begin_0 = const()[name = string("op_1566_begin_0"), val = tensor([7, 0, 0, 0, 0])]; + tensor var_1566_end_0 = const()[name = string("op_1566_end_0"), val = tensor([8, 1, 5, 2048, 64])]; + tensor var_1566_end_mask_0 = const()[name = string("op_1566_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1566_squeeze_mask_0 = const()[name = string("op_1566_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1566_cast_fp16 = slice_by_index(begin = var_1566_begin_0, end = var_1566_end_0, end_mask = var_1566_end_mask_0, squeeze_mask = var_1566_squeeze_mask_0, x = coreml_update_state_79)[name = string("op_1566_cast_fp16")]; + tensor var_1569_begin_0 = const()[name = string("op_1569_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1569_end_mask_0 = const()[name = string("op_1569_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1569_cast_fp16 = slice_by_index(begin = var_1569_begin_0, end = concat_11, end_mask = var_1569_end_mask_0, x = var_1566_cast_fp16)[name = string("op_1569_cast_fp16")]; + tensor var_1571_shape_cast_fp16 = shape(x = var_1564_cast_fp16)[name = string("op_1571_shape_cast_fp16")]; + int32 gather_139 = const()[name = string("gather_139"), val = int32(1)]; + int32 gather_140 = const()[name = string("gather_140"), val = int32(5)]; + int32 gather_141_axis_0 = const()[name = string("gather_141_axis_0"), val = int32(0)]; + int32 gather_141_batch_dims_0 = const()[name = string("gather_141_batch_dims_0"), val = int32(0)]; + bool gather_141_validate_indices_0 = const()[name = string("gather_141_validate_indices_0"), val = bool(false)]; + string var_1571_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1571_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_141_to_uint16 = const()[name = string("select_141_to_uint16"), val = uint16(2)]; + tensor var_1571_shape_cast_fp16_to_uint16 = cast(dtype = var_1571_shape_cast_fp16_to_uint16_dtype_0, x = var_1571_shape_cast_fp16)[name = string("cast_198")]; + uint16 gather_141_cast_uint16 = gather(axis = gather_141_axis_0, batch_dims = gather_141_batch_dims_0, indices = select_141_to_uint16, validate_indices = gather_141_validate_indices_0, x = var_1571_shape_cast_fp16_to_uint16)[name = string("gather_141_cast_uint16")]; + string gather_141_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_141_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_142 = const()[name = string("gather_142"), val = int32(64)]; + tensor var_1578_axes_0 = const()[name = string("op_1578_axes_0"), val = tensor([2])]; + tensor var_1578_cast_fp16 = expand_dims(axes = var_1578_axes_0, x = var_1564_cast_fp16)[name = string("op_1578_cast_fp16")]; + tensor shape_157_cast_fp16 = shape(x = var_1578_cast_fp16)[name = string("shape_157_cast_fp16")]; + int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; + bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; + int32 gather_141_cast_uint16_to_int32 = cast(dtype = gather_141_cast_uint16_to_int32_dtype_0, x = gather_141_cast_uint16)[name = string("cast_197")]; + tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (gather_139, gather_140, var_89, gather_141_cast_uint16_to_int32, gather_142))[name = string("concat_146")]; + tensor real_div_14 = real_div(x = concat_146, y = shape_157_cast_fp16)[name = string("real_div_14")]; + tensor hidden_states_221_cast_fp16 = tile(reps = real_div_14, x = var_1578_cast_fp16)[name = string("hidden_states_221_cast_fp16")]; + tensor concat_147x = const()[name = string("concat_147x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_31_cast_fp16 = reshape(shape = concat_147x, x = hidden_states_221_cast_fp16)[name = string("key_states_31_cast_fp16")]; + tensor var_1588_shape_cast_fp16 = shape(x = var_1569_cast_fp16)[name = string("op_1588_shape_cast_fp16")]; + int32 gather_143 = const()[name = string("gather_143"), val = int32(1)]; + int32 gather_144 = const()[name = string("gather_144"), val = int32(5)]; + int32 gather_145_axis_0 = const()[name = string("gather_145_axis_0"), val = int32(0)]; + int32 gather_145_batch_dims_0 = const()[name = string("gather_145_batch_dims_0"), val = int32(0)]; + bool gather_145_validate_indices_0 = const()[name = string("gather_145_validate_indices_0"), val = bool(false)]; + string var_1588_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1588_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_145_to_uint16 = const()[name = string("select_145_to_uint16"), val = uint16(2)]; + tensor var_1588_shape_cast_fp16_to_uint16 = cast(dtype = var_1588_shape_cast_fp16_to_uint16_dtype_0, x = var_1588_shape_cast_fp16)[name = string("cast_196")]; + uint16 gather_145_cast_uint16 = gather(axis = gather_145_axis_0, batch_dims = gather_145_batch_dims_0, indices = select_145_to_uint16, validate_indices = gather_145_validate_indices_0, x = var_1588_shape_cast_fp16_to_uint16)[name = string("gather_145_cast_uint16")]; + string gather_145_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_145_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_146 = const()[name = string("gather_146"), val = int32(64)]; + tensor var_1595_axes_0 = const()[name = string("op_1595_axes_0"), val = tensor([2])]; + tensor var_1595_cast_fp16 = expand_dims(axes = var_1595_axes_0, x = var_1569_cast_fp16)[name = string("op_1595_cast_fp16")]; + tensor shape_162_cast_fp16 = shape(x = var_1595_cast_fp16)[name = string("shape_162_cast_fp16")]; + int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; + bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; + int32 gather_145_cast_uint16_to_int32 = cast(dtype = gather_145_cast_uint16_to_int32_dtype_0, x = gather_145_cast_uint16)[name = string("cast_195")]; + tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (gather_143, gather_144, var_89, gather_145_cast_uint16_to_int32, gather_146))[name = string("concat_148")]; + tensor real_div_15 = real_div(x = concat_148, y = shape_162_cast_fp16)[name = string("real_div_15")]; + tensor hidden_states_225_cast_fp16 = tile(reps = real_div_15, x = var_1595_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; + tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_31_cast_fp16 = reshape(shape = concat_149x, x = hidden_states_225_cast_fp16)[name = string("value_states_31_cast_fp16")]; + tensor var_1605_shape_cast_fp16 = shape(x = key_states_31_cast_fp16)[name = string("op_1605_shape_cast_fp16")]; + int32 gather_147_axis_0 = const()[name = string("gather_147_axis_0"), val = int32(0)]; + int32 gather_147_batch_dims_0 = const()[name = string("gather_147_batch_dims_0"), val = int32(0)]; + bool gather_147_validate_indices_0 = const()[name = string("gather_147_validate_indices_0"), val = bool(false)]; + string var_1605_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1605_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_147_to_uint16 = const()[name = string("select_147_to_uint16"), val = uint16(2)]; + tensor var_1605_shape_cast_fp16_to_uint16 = cast(dtype = var_1605_shape_cast_fp16_to_uint16_dtype_0, x = var_1605_shape_cast_fp16)[name = string("cast_194")]; + uint16 gather_147_cast_uint16 = gather(axis = gather_147_axis_0, batch_dims = gather_147_batch_dims_0, indices = select_147_to_uint16, validate_indices = gather_147_validate_indices_0, x = var_1605_shape_cast_fp16_to_uint16)[name = string("gather_147_cast_uint16")]; + string gather_147_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_147_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_150_values0_0 = const()[name = string("concat_150_values0_0"), val = int32(1)]; + int32 concat_150_values1_0 = const()[name = string("concat_150_values1_0"), val = int32(1)]; + int32 concat_150_values2_0 = const()[name = string("concat_150_values2_0"), val = int32(0)]; + int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; + bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; + int32 gather_147_cast_uint16_to_int32 = cast(dtype = gather_147_cast_uint16_to_int32_dtype_0, x = gather_147_cast_uint16)[name = string("cast_193")]; + tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (concat_150_values0_0, concat_150_values1_0, concat_150_values2_0, gather_147_cast_uint16_to_int32))[name = string("concat_150")]; + tensor causal_mask_17_begin_0 = const()[name = string("causal_mask_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_17_end_mask_0 = const()[name = string("causal_mask_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_17_cast_fp16 = slice_by_index(begin = causal_mask_17_begin_0, end = concat_150, end_mask = causal_mask_17_end_mask_0, x = causal_mask)[name = string("causal_mask_17_cast_fp16")]; + tensor attn_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_17_cast_fp16, key = key_states_31_cast_fp16, query = query_states_31_cast_fp16, value = value_states_31_cast_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor var_1611_perm_0 = const()[name = string("op_1611_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; + bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; + int32 gather_131_cast_uint16_to_int32 = cast(dtype = gather_131_cast_uint16_to_int32_dtype_0, x = gather_131_cast_uint16)[name = string("cast_192")]; + tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (gather_130, gather_131_cast_uint16_to_int32, var_85))[name = string("concat_151")]; + tensor var_1611_cast_fp16 = transpose(perm = var_1611_perm_0, x = attn_output_29_cast_fp16)[name = string("transpose_96")]; + tensor input_57_cast_fp16 = reshape(shape = concat_151, x = var_1611_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66157888))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66618752))))[name = string("model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_52_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized, x = input_57_cast_fp16)[name = string("linear_52_cast_fp16")]; + tensor hidden_states_229_cast_fp16 = add(x = hidden_states_209_cast_fp16, y = linear_52_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; + fp16 var_80_promoted_15_to_fp16 = const()[name = string("op_80_promoted_15_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1620_cast_fp16 = pow(x = hidden_states_229_cast_fp16, y = var_80_promoted_15_to_fp16)[name = string("op_1620_cast_fp16")]; + tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([-1])]; + bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; + tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1620_cast_fp16)[name = string("variance_31_cast_fp16")]; + fp16 var_1623_to_fp16 = const()[name = string("op_1623_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1624_cast_fp16 = add(x = variance_31_cast_fp16, y = var_1623_to_fp16)[name = string("op_1624_cast_fp16")]; + fp32 var_1625_epsilon_0 = const()[name = string("op_1625_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1625_cast_fp16 = rsqrt(epsilon = var_1625_epsilon_0, x = var_1624_cast_fp16)[name = string("op_1625_cast_fp16")]; + tensor hidden_states_233_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = var_1625_cast_fp16)[name = string("hidden_states_233_cast_fp16")]; + tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66676416)))]; + tensor input_59_cast_fp16 = mul(x = model_model_layers_7_post_attention_layernorm_weight_to_fp16, y = hidden_states_233_cast_fp16)[name = string("input_59_cast_fp16")]; + tensor model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66678400))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67907264))))[name = string("model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor var_1637_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_1637_cast_fp16")]; + tensor model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68060928))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69289792))))[name = string("model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = string("linear_54_cast_fp16")]; + tensor input_63_cast_fp16 = mul(x = var_1637_cast_fp16, y = linear_54_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69443456))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70672320))))[name = string("model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_55_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor hidden_states_239_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = linear_55_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; + fp16 var_80_promoted_16_to_fp16 = const()[name = string("op_80_promoted_16_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1650_cast_fp16 = pow(x = hidden_states_239_cast_fp16, y = var_80_promoted_16_to_fp16)[name = string("op_1650_cast_fp16")]; + tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([-1])]; + bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; + tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1650_cast_fp16)[name = string("variance_33_cast_fp16")]; + fp16 var_1653_to_fp16 = const()[name = string("op_1653_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1654_cast_fp16 = add(x = variance_33_cast_fp16, y = var_1653_to_fp16)[name = string("op_1654_cast_fp16")]; + fp32 var_1655_epsilon_0 = const()[name = string("op_1655_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1655_cast_fp16 = rsqrt(epsilon = var_1655_epsilon_0, x = var_1654_cast_fp16)[name = string("op_1655_cast_fp16")]; + tensor hidden_states_243_cast_fp16 = mul(x = hidden_states_239_cast_fp16, y = var_1655_cast_fp16)[name = string("hidden_states_243_cast_fp16")]; + tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70825984)))]; + tensor hidden_states_247_cast_fp16 = mul(x = model_model_layers_8_input_layernorm_weight_to_fp16, y = hidden_states_243_cast_fp16)[name = string("hidden_states_247_cast_fp16")]; + tensor var_1666_shape_cast_fp16 = shape(x = hidden_states_247_cast_fp16)[name = string("op_1666_shape_cast_fp16")]; + int32 gather_148 = const()[name = string("gather_148"), val = int32(1)]; + int32 gather_149_axis_0 = const()[name = string("gather_149_axis_0"), val = int32(0)]; + int32 gather_149_batch_dims_0 = const()[name = string("gather_149_batch_dims_0"), val = int32(0)]; + bool gather_149_validate_indices_0 = const()[name = string("gather_149_validate_indices_0"), val = bool(false)]; + string var_1666_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1666_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_149_to_uint16 = const()[name = string("select_149_to_uint16"), val = uint16(1)]; + tensor var_1666_shape_cast_fp16_to_uint16 = cast(dtype = var_1666_shape_cast_fp16_to_uint16_dtype_0, x = var_1666_shape_cast_fp16)[name = string("cast_191")]; + uint16 gather_149_cast_uint16 = gather(axis = gather_149_axis_0, batch_dims = gather_149_batch_dims_0, indices = select_149_to_uint16, validate_indices = gather_149_validate_indices_0, x = var_1666_shape_cast_fp16_to_uint16)[name = string("gather_149_cast_uint16")]; + string gather_149_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_149_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70827968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71288832))))[name = string("model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71346496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71500160))))[name = string("model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71519424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71673088))))[name = string("model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_58_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_58_cast_fp16")]; + tensor concat_152x = const()[name = string("concat_152x"), val = tensor([1, -1, 15, 64])]; + tensor var_1675_cast_fp16 = reshape(shape = concat_152x, x = linear_56_cast_fp16)[name = string("op_1675_cast_fp16")]; + tensor q_17_perm_0 = const()[name = string("q_17_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_153x = const()[name = string("concat_153x"), val = tensor([1, -1, 5, 64])]; + tensor var_1678_cast_fp16 = reshape(shape = concat_153x, x = linear_57_cast_fp16)[name = string("op_1678_cast_fp16")]; + tensor k_17_perm_0 = const()[name = string("k_17_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 5, 64])]; + tensor var_1681_cast_fp16 = reshape(shape = concat_154x, x = linear_58_cast_fp16)[name = string("op_1681_cast_fp16")]; + tensor v_state_17_perm_0 = const()[name = string("v_state_17_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_17_cast_fp16 = transpose(perm = q_17_perm_0, x = var_1675_cast_fp16)[name = string("transpose_95")]; + tensor var_1685_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1685_cast_fp16")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1696_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1696_cast_fp16")]; + bool var_1698_interleave_0 = const()[name = string("op_1698_interleave_0"), val = bool(false)]; + tensor var_1698_cast_fp16 = concat(axis = var_85, interleave = var_1698_interleave_0, values = (var_1696_cast_fp16, x1_33_cast_fp16))[name = string("op_1698_cast_fp16")]; + tensor var_1699_cast_fp16 = mul(x = var_1698_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1699_cast_fp16")]; + tensor query_states_35_cast_fp16 = add(x = var_1685_cast_fp16, y = var_1699_cast_fp16)[name = string("query_states_35_cast_fp16")]; + tensor k_17_cast_fp16 = transpose(perm = k_17_perm_0, x = var_1678_cast_fp16)[name = string("transpose_94")]; + tensor var_1701_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1701_cast_fp16")]; + tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; + tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1712_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1712_cast_fp16")]; + bool var_1714_interleave_0 = const()[name = string("op_1714_interleave_0"), val = bool(false)]; + tensor var_1714_cast_fp16 = concat(axis = var_85, interleave = var_1714_interleave_0, values = (var_1712_cast_fp16, x1_35_cast_fp16))[name = string("op_1714_cast_fp16")]; + tensor var_1715_cast_fp16 = mul(x = var_1714_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1715_cast_fp16")]; + tensor k_state_17_cast_fp16 = add(x = var_1701_cast_fp16, y = var_1715_cast_fp16)[name = string("k_state_17_cast_fp16")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor concat_157_values0_0 = const()[name = string("concat_157_values0_0"), val = tensor([8])]; + int32 concat_157_axis_0 = const()[name = string("concat_157_axis_0"), val = int32(0)]; + bool concat_157_interleave_0 = const()[name = string("concat_157_interleave_0"), val = bool(false)]; + tensor concat_157 = concat(axis = concat_157_axis_0, interleave = concat_157_interleave_0, values = (concat_157_values0_0, expand_dims_96, expand_dims_97, expand_dims_2, expand_dims_99))[name = string("concat_157")]; + tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_157, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = k_state_17_cast_fp16, x = coreml_update_state_78)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_80")]; + tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_17_cast_fp16 = transpose(perm = v_state_17_perm_0, x = var_1681_cast_fp16)[name = string("transpose_93")]; + tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_157, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = v_state_17_cast_fp16, x = coreml_update_state_79)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_81")]; + tensor var_1738_begin_0 = const()[name = string("op_1738_begin_0"), val = tensor([8, 0, 0, 0, 0])]; + tensor var_1738_end_0 = const()[name = string("op_1738_end_0"), val = tensor([9, 1, 5, 2048, 64])]; + tensor var_1738_end_mask_0 = const()[name = string("op_1738_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1738_squeeze_mask_0 = const()[name = string("op_1738_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1738_cast_fp16 = slice_by_index(begin = var_1738_begin_0, end = var_1738_end_0, end_mask = var_1738_end_mask_0, squeeze_mask = var_1738_squeeze_mask_0, x = coreml_update_state_80)[name = string("op_1738_cast_fp16")]; + tensor var_1741_begin_0 = const()[name = string("op_1741_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1741_end_mask_0 = const()[name = string("op_1741_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1741_cast_fp16 = slice_by_index(begin = var_1741_begin_0, end = concat_11, end_mask = var_1741_end_mask_0, x = var_1738_cast_fp16)[name = string("op_1741_cast_fp16")]; + tensor var_1743_begin_0 = const()[name = string("op_1743_begin_0"), val = tensor([8, 0, 0, 0, 0])]; + tensor var_1743_end_0 = const()[name = string("op_1743_end_0"), val = tensor([9, 1, 5, 2048, 64])]; + tensor var_1743_end_mask_0 = const()[name = string("op_1743_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1743_squeeze_mask_0 = const()[name = string("op_1743_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1743_cast_fp16 = slice_by_index(begin = var_1743_begin_0, end = var_1743_end_0, end_mask = var_1743_end_mask_0, squeeze_mask = var_1743_squeeze_mask_0, x = coreml_update_state_81)[name = string("op_1743_cast_fp16")]; + tensor var_1746_begin_0 = const()[name = string("op_1746_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1746_end_mask_0 = const()[name = string("op_1746_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1746_cast_fp16 = slice_by_index(begin = var_1746_begin_0, end = concat_11, end_mask = var_1746_end_mask_0, x = var_1743_cast_fp16)[name = string("op_1746_cast_fp16")]; + tensor var_1748_shape_cast_fp16 = shape(x = var_1741_cast_fp16)[name = string("op_1748_shape_cast_fp16")]; + int32 gather_157 = const()[name = string("gather_157"), val = int32(1)]; + int32 gather_158 = const()[name = string("gather_158"), val = int32(5)]; + int32 gather_159_axis_0 = const()[name = string("gather_159_axis_0"), val = int32(0)]; + int32 gather_159_batch_dims_0 = const()[name = string("gather_159_batch_dims_0"), val = int32(0)]; + bool gather_159_validate_indices_0 = const()[name = string("gather_159_validate_indices_0"), val = bool(false)]; + string var_1748_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1748_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_159_to_uint16 = const()[name = string("select_159_to_uint16"), val = uint16(2)]; + tensor var_1748_shape_cast_fp16_to_uint16 = cast(dtype = var_1748_shape_cast_fp16_to_uint16_dtype_0, x = var_1748_shape_cast_fp16)[name = string("cast_190")]; + uint16 gather_159_cast_uint16 = gather(axis = gather_159_axis_0, batch_dims = gather_159_batch_dims_0, indices = select_159_to_uint16, validate_indices = gather_159_validate_indices_0, x = var_1748_shape_cast_fp16_to_uint16)[name = string("gather_159_cast_uint16")]; + string gather_159_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_159_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_160 = const()[name = string("gather_160"), val = int32(64)]; + tensor var_1755_axes_0 = const()[name = string("op_1755_axes_0"), val = tensor([2])]; + tensor var_1755_cast_fp16 = expand_dims(axes = var_1755_axes_0, x = var_1741_cast_fp16)[name = string("op_1755_cast_fp16")]; + tensor shape_177_cast_fp16 = shape(x = var_1755_cast_fp16)[name = string("shape_177_cast_fp16")]; + int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; + bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; + int32 gather_159_cast_uint16_to_int32 = cast(dtype = gather_159_cast_uint16_to_int32_dtype_0, x = gather_159_cast_uint16)[name = string("cast_189")]; + tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (gather_157, gather_158, var_89, gather_159_cast_uint16_to_int32, gather_160))[name = string("concat_165")]; + tensor real_div_16 = real_div(x = concat_165, y = shape_177_cast_fp16)[name = string("real_div_16")]; + tensor hidden_states_251_cast_fp16 = tile(reps = real_div_16, x = var_1755_cast_fp16)[name = string("hidden_states_251_cast_fp16")]; + tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_35_cast_fp16 = reshape(shape = concat_166x, x = hidden_states_251_cast_fp16)[name = string("key_states_35_cast_fp16")]; + tensor var_1765_shape_cast_fp16 = shape(x = var_1746_cast_fp16)[name = string("op_1765_shape_cast_fp16")]; + int32 gather_161 = const()[name = string("gather_161"), val = int32(1)]; + int32 gather_162 = const()[name = string("gather_162"), val = int32(5)]; + int32 gather_163_axis_0 = const()[name = string("gather_163_axis_0"), val = int32(0)]; + int32 gather_163_batch_dims_0 = const()[name = string("gather_163_batch_dims_0"), val = int32(0)]; + bool gather_163_validate_indices_0 = const()[name = string("gather_163_validate_indices_0"), val = bool(false)]; + string var_1765_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1765_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_163_to_uint16 = const()[name = string("select_163_to_uint16"), val = uint16(2)]; + tensor var_1765_shape_cast_fp16_to_uint16 = cast(dtype = var_1765_shape_cast_fp16_to_uint16_dtype_0, x = var_1765_shape_cast_fp16)[name = string("cast_188")]; + uint16 gather_163_cast_uint16 = gather(axis = gather_163_axis_0, batch_dims = gather_163_batch_dims_0, indices = select_163_to_uint16, validate_indices = gather_163_validate_indices_0, x = var_1765_shape_cast_fp16_to_uint16)[name = string("gather_163_cast_uint16")]; + string gather_163_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_163_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_164 = const()[name = string("gather_164"), val = int32(64)]; + tensor var_1772_axes_0 = const()[name = string("op_1772_axes_0"), val = tensor([2])]; + tensor var_1772_cast_fp16 = expand_dims(axes = var_1772_axes_0, x = var_1746_cast_fp16)[name = string("op_1772_cast_fp16")]; + tensor shape_182_cast_fp16 = shape(x = var_1772_cast_fp16)[name = string("shape_182_cast_fp16")]; + int32 concat_167_axis_0 = const()[name = string("concat_167_axis_0"), val = int32(0)]; + bool concat_167_interleave_0 = const()[name = string("concat_167_interleave_0"), val = bool(false)]; + int32 gather_163_cast_uint16_to_int32 = cast(dtype = gather_163_cast_uint16_to_int32_dtype_0, x = gather_163_cast_uint16)[name = string("cast_187")]; + tensor concat_167 = concat(axis = concat_167_axis_0, interleave = concat_167_interleave_0, values = (gather_161, gather_162, var_89, gather_163_cast_uint16_to_int32, gather_164))[name = string("concat_167")]; + tensor real_div_17 = real_div(x = concat_167, y = shape_182_cast_fp16)[name = string("real_div_17")]; + tensor hidden_states_255_cast_fp16 = tile(reps = real_div_17, x = var_1772_cast_fp16)[name = string("hidden_states_255_cast_fp16")]; + tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_35_cast_fp16 = reshape(shape = concat_168x, x = hidden_states_255_cast_fp16)[name = string("value_states_35_cast_fp16")]; + tensor var_1782_shape_cast_fp16 = shape(x = key_states_35_cast_fp16)[name = string("op_1782_shape_cast_fp16")]; + int32 gather_165_axis_0 = const()[name = string("gather_165_axis_0"), val = int32(0)]; + int32 gather_165_batch_dims_0 = const()[name = string("gather_165_batch_dims_0"), val = int32(0)]; + bool gather_165_validate_indices_0 = const()[name = string("gather_165_validate_indices_0"), val = bool(false)]; + string var_1782_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1782_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_165_to_uint16 = const()[name = string("select_165_to_uint16"), val = uint16(2)]; + tensor var_1782_shape_cast_fp16_to_uint16 = cast(dtype = var_1782_shape_cast_fp16_to_uint16_dtype_0, x = var_1782_shape_cast_fp16)[name = string("cast_186")]; + uint16 gather_165_cast_uint16 = gather(axis = gather_165_axis_0, batch_dims = gather_165_batch_dims_0, indices = select_165_to_uint16, validate_indices = gather_165_validate_indices_0, x = var_1782_shape_cast_fp16_to_uint16)[name = string("gather_165_cast_uint16")]; + string gather_165_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_165_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_169_values0_0 = const()[name = string("concat_169_values0_0"), val = int32(1)]; + int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(1)]; + int32 concat_169_values2_0 = const()[name = string("concat_169_values2_0"), val = int32(0)]; + int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; + bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; + int32 gather_165_cast_uint16_to_int32 = cast(dtype = gather_165_cast_uint16_to_int32_dtype_0, x = gather_165_cast_uint16)[name = string("cast_185")]; + tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (concat_169_values0_0, concat_169_values1_0, concat_169_values2_0, gather_165_cast_uint16_to_int32))[name = string("concat_169")]; + tensor causal_mask_19_begin_0 = const()[name = string("causal_mask_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_19_end_mask_0 = const()[name = string("causal_mask_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_19_cast_fp16 = slice_by_index(begin = causal_mask_19_begin_0, end = concat_169, end_mask = causal_mask_19_end_mask_0, x = causal_mask)[name = string("causal_mask_19_cast_fp16")]; + tensor attn_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_19_cast_fp16, key = key_states_35_cast_fp16, query = query_states_35_cast_fp16, value = value_states_35_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_1788_perm_0 = const()[name = string("op_1788_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; + bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; + int32 gather_149_cast_uint16_to_int32 = cast(dtype = gather_149_cast_uint16_to_int32_dtype_0, x = gather_149_cast_uint16)[name = string("cast_184")]; + tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (gather_148, gather_149_cast_uint16_to_int32, var_85))[name = string("concat_170")]; + tensor var_1788_cast_fp16 = transpose(perm = var_1788_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_92")]; + tensor input_65_cast_fp16 = reshape(shape = concat_170, x = var_1788_cast_fp16)[name = string("input_65_cast_fp16")]; + tensor model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71692352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72153216))))[name = string("model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_59_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized, x = input_65_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor hidden_states_259_cast_fp16 = add(x = hidden_states_239_cast_fp16, y = linear_59_cast_fp16)[name = string("hidden_states_259_cast_fp16")]; + fp16 var_80_promoted_17_to_fp16 = const()[name = string("op_80_promoted_17_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1797_cast_fp16 = pow(x = hidden_states_259_cast_fp16, y = var_80_promoted_17_to_fp16)[name = string("op_1797_cast_fp16")]; + tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([-1])]; + bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; + tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_1797_cast_fp16)[name = string("variance_35_cast_fp16")]; + fp16 var_1800_to_fp16 = const()[name = string("op_1800_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1801_cast_fp16 = add(x = variance_35_cast_fp16, y = var_1800_to_fp16)[name = string("op_1801_cast_fp16")]; + fp32 var_1802_epsilon_0 = const()[name = string("op_1802_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1802_cast_fp16 = rsqrt(epsilon = var_1802_epsilon_0, x = var_1801_cast_fp16)[name = string("op_1802_cast_fp16")]; + tensor hidden_states_263_cast_fp16 = mul(x = hidden_states_259_cast_fp16, y = var_1802_cast_fp16)[name = string("hidden_states_263_cast_fp16")]; + tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72210880)))]; + tensor input_67_cast_fp16 = mul(x = model_model_layers_8_post_attention_layernorm_weight_to_fp16, y = hidden_states_263_cast_fp16)[name = string("input_67_cast_fp16")]; + tensor model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72212864))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73441728))))[name = string("model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor var_1814_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_1814_cast_fp16")]; + tensor model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73595392))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74824256))))[name = string("model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor input_71_cast_fp16 = mul(x = var_1814_cast_fp16, y = linear_61_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74977920))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76206784))))[name = string("model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_62_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized, x = input_71_cast_fp16)[name = string("linear_62_cast_fp16")]; + tensor hidden_states_269_cast_fp16 = add(x = hidden_states_259_cast_fp16, y = linear_62_cast_fp16)[name = string("hidden_states_269_cast_fp16")]; + fp16 var_80_promoted_18_to_fp16 = const()[name = string("op_80_promoted_18_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1827_cast_fp16 = pow(x = hidden_states_269_cast_fp16, y = var_80_promoted_18_to_fp16)[name = string("op_1827_cast_fp16")]; + tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([-1])]; + bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; + tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_1827_cast_fp16)[name = string("variance_37_cast_fp16")]; + fp16 var_1830_to_fp16 = const()[name = string("op_1830_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1831_cast_fp16 = add(x = variance_37_cast_fp16, y = var_1830_to_fp16)[name = string("op_1831_cast_fp16")]; + fp32 var_1832_epsilon_0 = const()[name = string("op_1832_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1832_cast_fp16 = rsqrt(epsilon = var_1832_epsilon_0, x = var_1831_cast_fp16)[name = string("op_1832_cast_fp16")]; + tensor hidden_states_273_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = var_1832_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; + tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76360448)))]; + tensor hidden_states_277_cast_fp16 = mul(x = model_model_layers_9_input_layernorm_weight_to_fp16, y = hidden_states_273_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; + tensor var_1843_shape_cast_fp16 = shape(x = hidden_states_277_cast_fp16)[name = string("op_1843_shape_cast_fp16")]; + int32 gather_166 = const()[name = string("gather_166"), val = int32(1)]; + int32 gather_167_axis_0 = const()[name = string("gather_167_axis_0"), val = int32(0)]; + int32 gather_167_batch_dims_0 = const()[name = string("gather_167_batch_dims_0"), val = int32(0)]; + bool gather_167_validate_indices_0 = const()[name = string("gather_167_validate_indices_0"), val = bool(false)]; + string var_1843_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1843_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_167_to_uint16 = const()[name = string("select_167_to_uint16"), val = uint16(1)]; + tensor var_1843_shape_cast_fp16_to_uint16 = cast(dtype = var_1843_shape_cast_fp16_to_uint16_dtype_0, x = var_1843_shape_cast_fp16)[name = string("cast_183")]; + uint16 gather_167_cast_uint16 = gather(axis = gather_167_axis_0, batch_dims = gather_167_batch_dims_0, indices = select_167_to_uint16, validate_indices = gather_167_validate_indices_0, x = var_1843_shape_cast_fp16_to_uint16)[name = string("gather_167_cast_uint16")]; + string gather_167_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_167_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76362432))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76823296))))[name = string("model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_63_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76880960))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77034624))))[name = string("model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_64_cast_fp16")]; + tensor model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77053888))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77207552))))[name = string("model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_65_cast_fp16")]; + tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 15, 64])]; + tensor var_1852_cast_fp16 = reshape(shape = concat_171x, x = linear_63_cast_fp16)[name = string("op_1852_cast_fp16")]; + tensor q_19_perm_0 = const()[name = string("q_19_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_172x = const()[name = string("concat_172x"), val = tensor([1, -1, 5, 64])]; + tensor var_1855_cast_fp16 = reshape(shape = concat_172x, x = linear_64_cast_fp16)[name = string("op_1855_cast_fp16")]; + tensor k_19_perm_0 = const()[name = string("k_19_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_173x = const()[name = string("concat_173x"), val = tensor([1, -1, 5, 64])]; + tensor var_1858_cast_fp16 = reshape(shape = concat_173x, x = linear_65_cast_fp16)[name = string("op_1858_cast_fp16")]; + tensor v_state_19_perm_0 = const()[name = string("v_state_19_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_19_cast_fp16 = transpose(perm = q_19_perm_0, x = var_1852_cast_fp16)[name = string("transpose_91")]; + tensor var_1862_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1862_cast_fp16")]; + tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; + tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; + fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1873_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_1873_cast_fp16")]; + bool var_1875_interleave_0 = const()[name = string("op_1875_interleave_0"), val = bool(false)]; + tensor var_1875_cast_fp16 = concat(axis = var_85, interleave = var_1875_interleave_0, values = (var_1873_cast_fp16, x1_37_cast_fp16))[name = string("op_1875_cast_fp16")]; + tensor var_1876_cast_fp16 = mul(x = var_1875_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1876_cast_fp16")]; + tensor query_states_39_cast_fp16 = add(x = var_1862_cast_fp16, y = var_1876_cast_fp16)[name = string("query_states_39_cast_fp16")]; + tensor k_19_cast_fp16 = transpose(perm = k_19_perm_0, x = var_1855_cast_fp16)[name = string("transpose_90")]; + tensor var_1878_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1878_cast_fp16")]; + tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; + tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1889_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1889_cast_fp16")]; + bool var_1891_interleave_0 = const()[name = string("op_1891_interleave_0"), val = bool(false)]; + tensor var_1891_cast_fp16 = concat(axis = var_85, interleave = var_1891_interleave_0, values = (var_1889_cast_fp16, x1_39_cast_fp16))[name = string("op_1891_cast_fp16")]; + tensor var_1892_cast_fp16 = mul(x = var_1891_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1892_cast_fp16")]; + tensor k_state_19_cast_fp16 = add(x = var_1878_cast_fp16, y = var_1892_cast_fp16)[name = string("k_state_19_cast_fp16")]; + tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([0])]; + tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; + tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; + tensor concat_176_values0_0 = const()[name = string("concat_176_values0_0"), val = tensor([9])]; + int32 concat_176_axis_0 = const()[name = string("concat_176_axis_0"), val = int32(0)]; + bool concat_176_interleave_0 = const()[name = string("concat_176_interleave_0"), val = bool(false)]; + tensor concat_176 = concat(axis = concat_176_axis_0, interleave = concat_176_interleave_0, values = (concat_176_values0_0, expand_dims_108, expand_dims_109, expand_dims_2, expand_dims_111))[name = string("concat_176")]; + tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_176, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = k_state_19_cast_fp16, x = coreml_update_state_80)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_82")]; + tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_19_cast_fp16 = transpose(perm = v_state_19_perm_0, x = var_1858_cast_fp16)[name = string("transpose_89")]; + tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_176, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = v_state_19_cast_fp16, x = coreml_update_state_81)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_83")]; + tensor var_1915_begin_0 = const()[name = string("op_1915_begin_0"), val = tensor([9, 0, 0, 0, 0])]; + tensor var_1915_end_0 = const()[name = string("op_1915_end_0"), val = tensor([10, 1, 5, 2048, 64])]; + tensor var_1915_end_mask_0 = const()[name = string("op_1915_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1915_squeeze_mask_0 = const()[name = string("op_1915_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1915_cast_fp16 = slice_by_index(begin = var_1915_begin_0, end = var_1915_end_0, end_mask = var_1915_end_mask_0, squeeze_mask = var_1915_squeeze_mask_0, x = coreml_update_state_82)[name = string("op_1915_cast_fp16")]; + tensor var_1918_begin_0 = const()[name = string("op_1918_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1918_end_mask_0 = const()[name = string("op_1918_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1918_cast_fp16 = slice_by_index(begin = var_1918_begin_0, end = concat_11, end_mask = var_1918_end_mask_0, x = var_1915_cast_fp16)[name = string("op_1918_cast_fp16")]; + tensor var_1920_begin_0 = const()[name = string("op_1920_begin_0"), val = tensor([9, 0, 0, 0, 0])]; + tensor var_1920_end_0 = const()[name = string("op_1920_end_0"), val = tensor([10, 1, 5, 2048, 64])]; + tensor var_1920_end_mask_0 = const()[name = string("op_1920_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1920_squeeze_mask_0 = const()[name = string("op_1920_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1920_cast_fp16 = slice_by_index(begin = var_1920_begin_0, end = var_1920_end_0, end_mask = var_1920_end_mask_0, squeeze_mask = var_1920_squeeze_mask_0, x = coreml_update_state_83)[name = string("op_1920_cast_fp16")]; + tensor var_1923_begin_0 = const()[name = string("op_1923_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1923_end_mask_0 = const()[name = string("op_1923_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1923_cast_fp16 = slice_by_index(begin = var_1923_begin_0, end = concat_11, end_mask = var_1923_end_mask_0, x = var_1920_cast_fp16)[name = string("op_1923_cast_fp16")]; + tensor var_1925_shape_cast_fp16 = shape(x = var_1918_cast_fp16)[name = string("op_1925_shape_cast_fp16")]; + int32 gather_175 = const()[name = string("gather_175"), val = int32(1)]; + int32 gather_176 = const()[name = string("gather_176"), val = int32(5)]; + int32 gather_177_axis_0 = const()[name = string("gather_177_axis_0"), val = int32(0)]; + int32 gather_177_batch_dims_0 = const()[name = string("gather_177_batch_dims_0"), val = int32(0)]; + bool gather_177_validate_indices_0 = const()[name = string("gather_177_validate_indices_0"), val = bool(false)]; + string var_1925_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1925_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_177_to_uint16 = const()[name = string("select_177_to_uint16"), val = uint16(2)]; + tensor var_1925_shape_cast_fp16_to_uint16 = cast(dtype = var_1925_shape_cast_fp16_to_uint16_dtype_0, x = var_1925_shape_cast_fp16)[name = string("cast_182")]; + uint16 gather_177_cast_uint16 = gather(axis = gather_177_axis_0, batch_dims = gather_177_batch_dims_0, indices = select_177_to_uint16, validate_indices = gather_177_validate_indices_0, x = var_1925_shape_cast_fp16_to_uint16)[name = string("gather_177_cast_uint16")]; + string gather_177_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_177_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_178 = const()[name = string("gather_178"), val = int32(64)]; + tensor var_1932_axes_0 = const()[name = string("op_1932_axes_0"), val = tensor([2])]; + tensor var_1932_cast_fp16 = expand_dims(axes = var_1932_axes_0, x = var_1918_cast_fp16)[name = string("op_1932_cast_fp16")]; + tensor shape_197_cast_fp16 = shape(x = var_1932_cast_fp16)[name = string("shape_197_cast_fp16")]; + int32 concat_184_axis_0 = const()[name = string("concat_184_axis_0"), val = int32(0)]; + bool concat_184_interleave_0 = const()[name = string("concat_184_interleave_0"), val = bool(false)]; + int32 gather_177_cast_uint16_to_int32 = cast(dtype = gather_177_cast_uint16_to_int32_dtype_0, x = gather_177_cast_uint16)[name = string("cast_181")]; + tensor concat_184 = concat(axis = concat_184_axis_0, interleave = concat_184_interleave_0, values = (gather_175, gather_176, var_89, gather_177_cast_uint16_to_int32, gather_178))[name = string("concat_184")]; + tensor real_div_18 = real_div(x = concat_184, y = shape_197_cast_fp16)[name = string("real_div_18")]; + tensor hidden_states_281_cast_fp16 = tile(reps = real_div_18, x = var_1932_cast_fp16)[name = string("hidden_states_281_cast_fp16")]; + tensor concat_185x = const()[name = string("concat_185x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_39_cast_fp16 = reshape(shape = concat_185x, x = hidden_states_281_cast_fp16)[name = string("key_states_39_cast_fp16")]; + tensor var_1942_shape_cast_fp16 = shape(x = var_1923_cast_fp16)[name = string("op_1942_shape_cast_fp16")]; + int32 gather_179 = const()[name = string("gather_179"), val = int32(1)]; + int32 gather_180 = const()[name = string("gather_180"), val = int32(5)]; + int32 gather_181_axis_0 = const()[name = string("gather_181_axis_0"), val = int32(0)]; + int32 gather_181_batch_dims_0 = const()[name = string("gather_181_batch_dims_0"), val = int32(0)]; + bool gather_181_validate_indices_0 = const()[name = string("gather_181_validate_indices_0"), val = bool(false)]; + string var_1942_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1942_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_181_to_uint16 = const()[name = string("select_181_to_uint16"), val = uint16(2)]; + tensor var_1942_shape_cast_fp16_to_uint16 = cast(dtype = var_1942_shape_cast_fp16_to_uint16_dtype_0, x = var_1942_shape_cast_fp16)[name = string("cast_180")]; + uint16 gather_181_cast_uint16 = gather(axis = gather_181_axis_0, batch_dims = gather_181_batch_dims_0, indices = select_181_to_uint16, validate_indices = gather_181_validate_indices_0, x = var_1942_shape_cast_fp16_to_uint16)[name = string("gather_181_cast_uint16")]; + string gather_181_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_181_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_182 = const()[name = string("gather_182"), val = int32(64)]; + tensor var_1949_axes_0 = const()[name = string("op_1949_axes_0"), val = tensor([2])]; + tensor var_1949_cast_fp16 = expand_dims(axes = var_1949_axes_0, x = var_1923_cast_fp16)[name = string("op_1949_cast_fp16")]; + tensor shape_202_cast_fp16 = shape(x = var_1949_cast_fp16)[name = string("shape_202_cast_fp16")]; + int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; + bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; + int32 gather_181_cast_uint16_to_int32 = cast(dtype = gather_181_cast_uint16_to_int32_dtype_0, x = gather_181_cast_uint16)[name = string("cast_179")]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (gather_179, gather_180, var_89, gather_181_cast_uint16_to_int32, gather_182))[name = string("concat_186")]; + tensor real_div_19 = real_div(x = concat_186, y = shape_202_cast_fp16)[name = string("real_div_19")]; + tensor hidden_states_285_cast_fp16 = tile(reps = real_div_19, x = var_1949_cast_fp16)[name = string("hidden_states_285_cast_fp16")]; + tensor concat_187x = const()[name = string("concat_187x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_39_cast_fp16 = reshape(shape = concat_187x, x = hidden_states_285_cast_fp16)[name = string("value_states_39_cast_fp16")]; + tensor var_1959_shape_cast_fp16 = shape(x = key_states_39_cast_fp16)[name = string("op_1959_shape_cast_fp16")]; + int32 gather_183_axis_0 = const()[name = string("gather_183_axis_0"), val = int32(0)]; + int32 gather_183_batch_dims_0 = const()[name = string("gather_183_batch_dims_0"), val = int32(0)]; + bool gather_183_validate_indices_0 = const()[name = string("gather_183_validate_indices_0"), val = bool(false)]; + string var_1959_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1959_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_183_to_uint16 = const()[name = string("select_183_to_uint16"), val = uint16(2)]; + tensor var_1959_shape_cast_fp16_to_uint16 = cast(dtype = var_1959_shape_cast_fp16_to_uint16_dtype_0, x = var_1959_shape_cast_fp16)[name = string("cast_178")]; + uint16 gather_183_cast_uint16 = gather(axis = gather_183_axis_0, batch_dims = gather_183_batch_dims_0, indices = select_183_to_uint16, validate_indices = gather_183_validate_indices_0, x = var_1959_shape_cast_fp16_to_uint16)[name = string("gather_183_cast_uint16")]; + string gather_183_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_183_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_188_values0_0 = const()[name = string("concat_188_values0_0"), val = int32(1)]; + int32 concat_188_values1_0 = const()[name = string("concat_188_values1_0"), val = int32(1)]; + int32 concat_188_values2_0 = const()[name = string("concat_188_values2_0"), val = int32(0)]; + int32 concat_188_axis_0 = const()[name = string("concat_188_axis_0"), val = int32(0)]; + bool concat_188_interleave_0 = const()[name = string("concat_188_interleave_0"), val = bool(false)]; + int32 gather_183_cast_uint16_to_int32 = cast(dtype = gather_183_cast_uint16_to_int32_dtype_0, x = gather_183_cast_uint16)[name = string("cast_177")]; + tensor concat_188 = concat(axis = concat_188_axis_0, interleave = concat_188_interleave_0, values = (concat_188_values0_0, concat_188_values1_0, concat_188_values2_0, gather_183_cast_uint16_to_int32))[name = string("concat_188")]; + tensor causal_mask_21_begin_0 = const()[name = string("causal_mask_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_21_end_mask_0 = const()[name = string("causal_mask_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_21_cast_fp16 = slice_by_index(begin = causal_mask_21_begin_0, end = concat_188, end_mask = causal_mask_21_end_mask_0, x = causal_mask)[name = string("causal_mask_21_cast_fp16")]; + tensor attn_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_21_cast_fp16, key = key_states_39_cast_fp16, query = query_states_39_cast_fp16, value = value_states_39_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_1965_perm_0 = const()[name = string("op_1965_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_189_axis_0 = const()[name = string("concat_189_axis_0"), val = int32(0)]; + bool concat_189_interleave_0 = const()[name = string("concat_189_interleave_0"), val = bool(false)]; + int32 gather_167_cast_uint16_to_int32 = cast(dtype = gather_167_cast_uint16_to_int32_dtype_0, x = gather_167_cast_uint16)[name = string("cast_176")]; + tensor concat_189 = concat(axis = concat_189_axis_0, interleave = concat_189_interleave_0, values = (gather_166, gather_167_cast_uint16_to_int32, var_85))[name = string("concat_189")]; + tensor var_1965_cast_fp16 = transpose(perm = var_1965_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_88")]; + tensor input_73_cast_fp16 = reshape(shape = concat_189, x = var_1965_cast_fp16)[name = string("input_73_cast_fp16")]; + tensor model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77226816))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77687680))))[name = string("model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_66_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized, x = input_73_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor hidden_states_289_cast_fp16 = add(x = hidden_states_269_cast_fp16, y = linear_66_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; + fp16 var_80_promoted_19_to_fp16 = const()[name = string("op_80_promoted_19_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1974_cast_fp16 = pow(x = hidden_states_289_cast_fp16, y = var_80_promoted_19_to_fp16)[name = string("op_1974_cast_fp16")]; + tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([-1])]; + bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; + tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_1974_cast_fp16)[name = string("variance_39_cast_fp16")]; + fp16 var_1977_to_fp16 = const()[name = string("op_1977_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1978_cast_fp16 = add(x = variance_39_cast_fp16, y = var_1977_to_fp16)[name = string("op_1978_cast_fp16")]; + fp32 var_1979_epsilon_0 = const()[name = string("op_1979_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1979_cast_fp16 = rsqrt(epsilon = var_1979_epsilon_0, x = var_1978_cast_fp16)[name = string("op_1979_cast_fp16")]; + tensor hidden_states_293_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = var_1979_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; + tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77745344)))]; + tensor input_75_cast_fp16 = mul(x = model_model_layers_9_post_attention_layernorm_weight_to_fp16, y = hidden_states_293_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77747328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78976192))))[name = string("model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor var_1991_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_1991_cast_fp16")]; + tensor model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79129856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80358720))))[name = string("model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor input_79_cast_fp16 = mul(x = var_1991_cast_fp16, y = linear_68_cast_fp16)[name = string("input_79_cast_fp16")]; + tensor model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80512384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81741248))))[name = string("model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_69_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized, x = input_79_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor hidden_states_299_cast_fp16 = add(x = hidden_states_289_cast_fp16, y = linear_69_cast_fp16)[name = string("hidden_states_299_cast_fp16")]; + fp16 var_80_promoted_20_to_fp16 = const()[name = string("op_80_promoted_20_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2004_cast_fp16 = pow(x = hidden_states_299_cast_fp16, y = var_80_promoted_20_to_fp16)[name = string("op_2004_cast_fp16")]; + tensor variance_41_axes_0 = const()[name = string("variance_41_axes_0"), val = tensor([-1])]; + bool variance_41_keep_dims_0 = const()[name = string("variance_41_keep_dims_0"), val = bool(true)]; + tensor variance_41_cast_fp16 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_2004_cast_fp16)[name = string("variance_41_cast_fp16")]; + fp16 var_2007_to_fp16 = const()[name = string("op_2007_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2008_cast_fp16 = add(x = variance_41_cast_fp16, y = var_2007_to_fp16)[name = string("op_2008_cast_fp16")]; + fp32 var_2009_epsilon_0 = const()[name = string("op_2009_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2009_cast_fp16 = rsqrt(epsilon = var_2009_epsilon_0, x = var_2008_cast_fp16)[name = string("op_2009_cast_fp16")]; + tensor hidden_states_303_cast_fp16 = mul(x = hidden_states_299_cast_fp16, y = var_2009_cast_fp16)[name = string("hidden_states_303_cast_fp16")]; + tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81894912)))]; + tensor hidden_states_307_cast_fp16 = mul(x = model_model_layers_10_input_layernorm_weight_to_fp16, y = hidden_states_303_cast_fp16)[name = string("hidden_states_307_cast_fp16")]; + tensor var_2020_shape_cast_fp16 = shape(x = hidden_states_307_cast_fp16)[name = string("op_2020_shape_cast_fp16")]; + int32 gather_184 = const()[name = string("gather_184"), val = int32(1)]; + int32 gather_185_axis_0 = const()[name = string("gather_185_axis_0"), val = int32(0)]; + int32 gather_185_batch_dims_0 = const()[name = string("gather_185_batch_dims_0"), val = int32(0)]; + bool gather_185_validate_indices_0 = const()[name = string("gather_185_validate_indices_0"), val = bool(false)]; + string var_2020_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2020_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_185_to_uint16 = const()[name = string("select_185_to_uint16"), val = uint16(1)]; + tensor var_2020_shape_cast_fp16_to_uint16 = cast(dtype = var_2020_shape_cast_fp16_to_uint16_dtype_0, x = var_2020_shape_cast_fp16)[name = string("cast_175")]; + uint16 gather_185_cast_uint16 = gather(axis = gather_185_axis_0, batch_dims = gather_185_batch_dims_0, indices = select_185_to_uint16, validate_indices = gather_185_validate_indices_0, x = var_2020_shape_cast_fp16_to_uint16)[name = string("gather_185_cast_uint16")]; + string gather_185_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_185_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81896896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82357760))))[name = string("model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_70_cast_fp16")]; + tensor model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82415424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82569088))))[name = string("model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82588352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82742016))))[name = string("model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_72_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_72_cast_fp16")]; + tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 15, 64])]; + tensor var_2029_cast_fp16 = reshape(shape = concat_190x, x = linear_70_cast_fp16)[name = string("op_2029_cast_fp16")]; + tensor q_21_perm_0 = const()[name = string("q_21_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_191x = const()[name = string("concat_191x"), val = tensor([1, -1, 5, 64])]; + tensor var_2032_cast_fp16 = reshape(shape = concat_191x, x = linear_71_cast_fp16)[name = string("op_2032_cast_fp16")]; + tensor k_21_perm_0 = const()[name = string("k_21_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_192x = const()[name = string("concat_192x"), val = tensor([1, -1, 5, 64])]; + tensor var_2035_cast_fp16 = reshape(shape = concat_192x, x = linear_72_cast_fp16)[name = string("op_2035_cast_fp16")]; + tensor v_state_21_perm_0 = const()[name = string("v_state_21_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_21_cast_fp16 = transpose(perm = q_21_perm_0, x = var_2029_cast_fp16)[name = string("transpose_87")]; + tensor var_2039_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2039_cast_fp16")]; + tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; + tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2050_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_2050_cast_fp16")]; + bool var_2052_interleave_0 = const()[name = string("op_2052_interleave_0"), val = bool(false)]; + tensor var_2052_cast_fp16 = concat(axis = var_85, interleave = var_2052_interleave_0, values = (var_2050_cast_fp16, x1_41_cast_fp16))[name = string("op_2052_cast_fp16")]; + tensor var_2053_cast_fp16 = mul(x = var_2052_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2053_cast_fp16")]; + tensor query_states_43_cast_fp16 = add(x = var_2039_cast_fp16, y = var_2053_cast_fp16)[name = string("query_states_43_cast_fp16")]; + tensor k_21_cast_fp16 = transpose(perm = k_21_perm_0, x = var_2032_cast_fp16)[name = string("transpose_86")]; + tensor var_2055_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2055_cast_fp16")]; + tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; + tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2066_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_2066_cast_fp16")]; + bool var_2068_interleave_0 = const()[name = string("op_2068_interleave_0"), val = bool(false)]; + tensor var_2068_cast_fp16 = concat(axis = var_85, interleave = var_2068_interleave_0, values = (var_2066_cast_fp16, x1_43_cast_fp16))[name = string("op_2068_cast_fp16")]; + tensor var_2069_cast_fp16 = mul(x = var_2068_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2069_cast_fp16")]; + tensor k_state_21_cast_fp16 = add(x = var_2055_cast_fp16, y = var_2069_cast_fp16)[name = string("k_state_21_cast_fp16")]; + tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([0])]; + tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; + tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; + tensor concat_195_values0_0 = const()[name = string("concat_195_values0_0"), val = tensor([10])]; + int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)]; + bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)]; + tensor concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (concat_195_values0_0, expand_dims_120, expand_dims_121, expand_dims_2, expand_dims_123))[name = string("concat_195")]; + tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_195, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = k_state_21_cast_fp16, x = coreml_update_state_82)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_84")]; + tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_21_cast_fp16 = transpose(perm = v_state_21_perm_0, x = var_2035_cast_fp16)[name = string("transpose_85")]; + tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_195, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = v_state_21_cast_fp16, x = coreml_update_state_83)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_85")]; + tensor var_2092_begin_0 = const()[name = string("op_2092_begin_0"), val = tensor([10, 0, 0, 0, 0])]; + tensor var_2092_end_0 = const()[name = string("op_2092_end_0"), val = tensor([11, 1, 5, 2048, 64])]; + tensor var_2092_end_mask_0 = const()[name = string("op_2092_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2092_squeeze_mask_0 = const()[name = string("op_2092_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2092_cast_fp16 = slice_by_index(begin = var_2092_begin_0, end = var_2092_end_0, end_mask = var_2092_end_mask_0, squeeze_mask = var_2092_squeeze_mask_0, x = coreml_update_state_84)[name = string("op_2092_cast_fp16")]; + tensor var_2095_begin_0 = const()[name = string("op_2095_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2095_end_mask_0 = const()[name = string("op_2095_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2095_cast_fp16 = slice_by_index(begin = var_2095_begin_0, end = concat_11, end_mask = var_2095_end_mask_0, x = var_2092_cast_fp16)[name = string("op_2095_cast_fp16")]; + tensor var_2097_begin_0 = const()[name = string("op_2097_begin_0"), val = tensor([10, 0, 0, 0, 0])]; + tensor var_2097_end_0 = const()[name = string("op_2097_end_0"), val = tensor([11, 1, 5, 2048, 64])]; + tensor var_2097_end_mask_0 = const()[name = string("op_2097_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2097_squeeze_mask_0 = const()[name = string("op_2097_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2097_cast_fp16 = slice_by_index(begin = var_2097_begin_0, end = var_2097_end_0, end_mask = var_2097_end_mask_0, squeeze_mask = var_2097_squeeze_mask_0, x = coreml_update_state_85)[name = string("op_2097_cast_fp16")]; + tensor var_2100_begin_0 = const()[name = string("op_2100_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2100_end_mask_0 = const()[name = string("op_2100_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2100_cast_fp16 = slice_by_index(begin = var_2100_begin_0, end = concat_11, end_mask = var_2100_end_mask_0, x = var_2097_cast_fp16)[name = string("op_2100_cast_fp16")]; + tensor var_2102_shape_cast_fp16 = shape(x = var_2095_cast_fp16)[name = string("op_2102_shape_cast_fp16")]; + int32 gather_193 = const()[name = string("gather_193"), val = int32(1)]; + int32 gather_194 = const()[name = string("gather_194"), val = int32(5)]; + int32 gather_195_axis_0 = const()[name = string("gather_195_axis_0"), val = int32(0)]; + int32 gather_195_batch_dims_0 = const()[name = string("gather_195_batch_dims_0"), val = int32(0)]; + bool gather_195_validate_indices_0 = const()[name = string("gather_195_validate_indices_0"), val = bool(false)]; + string var_2102_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2102_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_195_to_uint16 = const()[name = string("select_195_to_uint16"), val = uint16(2)]; + tensor var_2102_shape_cast_fp16_to_uint16 = cast(dtype = var_2102_shape_cast_fp16_to_uint16_dtype_0, x = var_2102_shape_cast_fp16)[name = string("cast_174")]; + uint16 gather_195_cast_uint16 = gather(axis = gather_195_axis_0, batch_dims = gather_195_batch_dims_0, indices = select_195_to_uint16, validate_indices = gather_195_validate_indices_0, x = var_2102_shape_cast_fp16_to_uint16)[name = string("gather_195_cast_uint16")]; + string gather_195_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_195_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_196 = const()[name = string("gather_196"), val = int32(64)]; + tensor var_2109_axes_0 = const()[name = string("op_2109_axes_0"), val = tensor([2])]; + tensor var_2109_cast_fp16 = expand_dims(axes = var_2109_axes_0, x = var_2095_cast_fp16)[name = string("op_2109_cast_fp16")]; + tensor shape_217_cast_fp16 = shape(x = var_2109_cast_fp16)[name = string("shape_217_cast_fp16")]; + int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; + bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; + int32 gather_195_cast_uint16_to_int32 = cast(dtype = gather_195_cast_uint16_to_int32_dtype_0, x = gather_195_cast_uint16)[name = string("cast_173")]; + tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (gather_193, gather_194, var_89, gather_195_cast_uint16_to_int32, gather_196))[name = string("concat_203")]; + tensor real_div_20 = real_div(x = concat_203, y = shape_217_cast_fp16)[name = string("real_div_20")]; + tensor hidden_states_311_cast_fp16 = tile(reps = real_div_20, x = var_2109_cast_fp16)[name = string("hidden_states_311_cast_fp16")]; + tensor concat_204x = const()[name = string("concat_204x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_43_cast_fp16 = reshape(shape = concat_204x, x = hidden_states_311_cast_fp16)[name = string("key_states_43_cast_fp16")]; + tensor var_2119_shape_cast_fp16 = shape(x = var_2100_cast_fp16)[name = string("op_2119_shape_cast_fp16")]; + int32 gather_197 = const()[name = string("gather_197"), val = int32(1)]; + int32 gather_198 = const()[name = string("gather_198"), val = int32(5)]; + int32 gather_199_axis_0 = const()[name = string("gather_199_axis_0"), val = int32(0)]; + int32 gather_199_batch_dims_0 = const()[name = string("gather_199_batch_dims_0"), val = int32(0)]; + bool gather_199_validate_indices_0 = const()[name = string("gather_199_validate_indices_0"), val = bool(false)]; + string var_2119_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2119_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_199_to_uint16 = const()[name = string("select_199_to_uint16"), val = uint16(2)]; + tensor var_2119_shape_cast_fp16_to_uint16 = cast(dtype = var_2119_shape_cast_fp16_to_uint16_dtype_0, x = var_2119_shape_cast_fp16)[name = string("cast_172")]; + uint16 gather_199_cast_uint16 = gather(axis = gather_199_axis_0, batch_dims = gather_199_batch_dims_0, indices = select_199_to_uint16, validate_indices = gather_199_validate_indices_0, x = var_2119_shape_cast_fp16_to_uint16)[name = string("gather_199_cast_uint16")]; + string gather_199_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_199_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_200 = const()[name = string("gather_200"), val = int32(64)]; + tensor var_2126_axes_0 = const()[name = string("op_2126_axes_0"), val = tensor([2])]; + tensor var_2126_cast_fp16 = expand_dims(axes = var_2126_axes_0, x = var_2100_cast_fp16)[name = string("op_2126_cast_fp16")]; + tensor shape_222_cast_fp16 = shape(x = var_2126_cast_fp16)[name = string("shape_222_cast_fp16")]; + int32 concat_205_axis_0 = const()[name = string("concat_205_axis_0"), val = int32(0)]; + bool concat_205_interleave_0 = const()[name = string("concat_205_interleave_0"), val = bool(false)]; + int32 gather_199_cast_uint16_to_int32 = cast(dtype = gather_199_cast_uint16_to_int32_dtype_0, x = gather_199_cast_uint16)[name = string("cast_171")]; + tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (gather_197, gather_198, var_89, gather_199_cast_uint16_to_int32, gather_200))[name = string("concat_205")]; + tensor real_div_21 = real_div(x = concat_205, y = shape_222_cast_fp16)[name = string("real_div_21")]; + tensor hidden_states_315_cast_fp16 = tile(reps = real_div_21, x = var_2126_cast_fp16)[name = string("hidden_states_315_cast_fp16")]; + tensor concat_206x = const()[name = string("concat_206x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_43_cast_fp16 = reshape(shape = concat_206x, x = hidden_states_315_cast_fp16)[name = string("value_states_43_cast_fp16")]; + tensor var_2136_shape_cast_fp16 = shape(x = key_states_43_cast_fp16)[name = string("op_2136_shape_cast_fp16")]; + int32 gather_201_axis_0 = const()[name = string("gather_201_axis_0"), val = int32(0)]; + int32 gather_201_batch_dims_0 = const()[name = string("gather_201_batch_dims_0"), val = int32(0)]; + bool gather_201_validate_indices_0 = const()[name = string("gather_201_validate_indices_0"), val = bool(false)]; + string var_2136_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2136_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_201_to_uint16 = const()[name = string("select_201_to_uint16"), val = uint16(2)]; + tensor var_2136_shape_cast_fp16_to_uint16 = cast(dtype = var_2136_shape_cast_fp16_to_uint16_dtype_0, x = var_2136_shape_cast_fp16)[name = string("cast_170")]; + uint16 gather_201_cast_uint16 = gather(axis = gather_201_axis_0, batch_dims = gather_201_batch_dims_0, indices = select_201_to_uint16, validate_indices = gather_201_validate_indices_0, x = var_2136_shape_cast_fp16_to_uint16)[name = string("gather_201_cast_uint16")]; + string gather_201_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_201_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_207_values0_0 = const()[name = string("concat_207_values0_0"), val = int32(1)]; + int32 concat_207_values1_0 = const()[name = string("concat_207_values1_0"), val = int32(1)]; + int32 concat_207_values2_0 = const()[name = string("concat_207_values2_0"), val = int32(0)]; + int32 concat_207_axis_0 = const()[name = string("concat_207_axis_0"), val = int32(0)]; + bool concat_207_interleave_0 = const()[name = string("concat_207_interleave_0"), val = bool(false)]; + int32 gather_201_cast_uint16_to_int32 = cast(dtype = gather_201_cast_uint16_to_int32_dtype_0, x = gather_201_cast_uint16)[name = string("cast_169")]; + tensor concat_207 = concat(axis = concat_207_axis_0, interleave = concat_207_interleave_0, values = (concat_207_values0_0, concat_207_values1_0, concat_207_values2_0, gather_201_cast_uint16_to_int32))[name = string("concat_207")]; + tensor causal_mask_23_begin_0 = const()[name = string("causal_mask_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_23_end_mask_0 = const()[name = string("causal_mask_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_23_cast_fp16 = slice_by_index(begin = causal_mask_23_begin_0, end = concat_207, end_mask = causal_mask_23_end_mask_0, x = causal_mask)[name = string("causal_mask_23_cast_fp16")]; + tensor attn_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_23_cast_fp16, key = key_states_43_cast_fp16, query = query_states_43_cast_fp16, value = value_states_43_cast_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor var_2142_perm_0 = const()[name = string("op_2142_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; + bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; + int32 gather_185_cast_uint16_to_int32 = cast(dtype = gather_185_cast_uint16_to_int32_dtype_0, x = gather_185_cast_uint16)[name = string("cast_168")]; + tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (gather_184, gather_185_cast_uint16_to_int32, var_85))[name = string("concat_208")]; + tensor var_2142_cast_fp16 = transpose(perm = var_2142_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_84")]; + tensor input_81_cast_fp16 = reshape(shape = concat_208, x = var_2142_cast_fp16)[name = string("input_81_cast_fp16")]; + tensor model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82761280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83222144))))[name = string("model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_73_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = string("linear_73_cast_fp16")]; + tensor hidden_states_319_cast_fp16 = add(x = hidden_states_299_cast_fp16, y = linear_73_cast_fp16)[name = string("hidden_states_319_cast_fp16")]; + fp16 var_80_promoted_21_to_fp16 = const()[name = string("op_80_promoted_21_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2151_cast_fp16 = pow(x = hidden_states_319_cast_fp16, y = var_80_promoted_21_to_fp16)[name = string("op_2151_cast_fp16")]; + tensor variance_43_axes_0 = const()[name = string("variance_43_axes_0"), val = tensor([-1])]; + bool variance_43_keep_dims_0 = const()[name = string("variance_43_keep_dims_0"), val = bool(true)]; + tensor variance_43_cast_fp16 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_2151_cast_fp16)[name = string("variance_43_cast_fp16")]; + fp16 var_2154_to_fp16 = const()[name = string("op_2154_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2155_cast_fp16 = add(x = variance_43_cast_fp16, y = var_2154_to_fp16)[name = string("op_2155_cast_fp16")]; + fp32 var_2156_epsilon_0 = const()[name = string("op_2156_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2156_cast_fp16 = rsqrt(epsilon = var_2156_epsilon_0, x = var_2155_cast_fp16)[name = string("op_2156_cast_fp16")]; + tensor hidden_states_323_cast_fp16 = mul(x = hidden_states_319_cast_fp16, y = var_2156_cast_fp16)[name = string("hidden_states_323_cast_fp16")]; + tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83279808)))]; + tensor input_83_cast_fp16 = mul(x = model_model_layers_10_post_attention_layernorm_weight_to_fp16, y = hidden_states_323_cast_fp16)[name = string("input_83_cast_fp16")]; + tensor model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83281792))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84510656))))[name = string("model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_74_cast_fp16")]; + tensor var_2168_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_2168_cast_fp16")]; + tensor model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84664320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85893184))))[name = string("model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_75_cast_fp16")]; + tensor input_87_cast_fp16 = mul(x = var_2168_cast_fp16, y = linear_75_cast_fp16)[name = string("input_87_cast_fp16")]; + tensor model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86046848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87275712))))[name = string("model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_76_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized, x = input_87_cast_fp16)[name = string("linear_76_cast_fp16")]; + tensor hidden_states_329_cast_fp16 = add(x = hidden_states_319_cast_fp16, y = linear_76_cast_fp16)[name = string("hidden_states_329_cast_fp16")]; + fp16 var_80_promoted_22_to_fp16 = const()[name = string("op_80_promoted_22_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2181_cast_fp16 = pow(x = hidden_states_329_cast_fp16, y = var_80_promoted_22_to_fp16)[name = string("op_2181_cast_fp16")]; + tensor variance_45_axes_0 = const()[name = string("variance_45_axes_0"), val = tensor([-1])]; + bool variance_45_keep_dims_0 = const()[name = string("variance_45_keep_dims_0"), val = bool(true)]; + tensor variance_45_cast_fp16 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_2181_cast_fp16)[name = string("variance_45_cast_fp16")]; + fp16 var_2184_to_fp16 = const()[name = string("op_2184_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2185_cast_fp16 = add(x = variance_45_cast_fp16, y = var_2184_to_fp16)[name = string("op_2185_cast_fp16")]; + fp32 var_2186_epsilon_0 = const()[name = string("op_2186_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2186_cast_fp16 = rsqrt(epsilon = var_2186_epsilon_0, x = var_2185_cast_fp16)[name = string("op_2186_cast_fp16")]; + tensor hidden_states_333_cast_fp16 = mul(x = hidden_states_329_cast_fp16, y = var_2186_cast_fp16)[name = string("hidden_states_333_cast_fp16")]; + tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87429376)))]; + tensor hidden_states_337_cast_fp16 = mul(x = model_model_layers_11_input_layernorm_weight_to_fp16, y = hidden_states_333_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; + tensor var_2197_shape_cast_fp16 = shape(x = hidden_states_337_cast_fp16)[name = string("op_2197_shape_cast_fp16")]; + int32 gather_202 = const()[name = string("gather_202"), val = int32(1)]; + int32 gather_203_axis_0 = const()[name = string("gather_203_axis_0"), val = int32(0)]; + int32 gather_203_batch_dims_0 = const()[name = string("gather_203_batch_dims_0"), val = int32(0)]; + bool gather_203_validate_indices_0 = const()[name = string("gather_203_validate_indices_0"), val = bool(false)]; + string var_2197_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2197_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_203_to_uint16 = const()[name = string("select_203_to_uint16"), val = uint16(1)]; + tensor var_2197_shape_cast_fp16_to_uint16 = cast(dtype = var_2197_shape_cast_fp16_to_uint16_dtype_0, x = var_2197_shape_cast_fp16)[name = string("cast_167")]; + uint16 gather_203_cast_uint16 = gather(axis = gather_203_axis_0, batch_dims = gather_203_batch_dims_0, indices = select_203_to_uint16, validate_indices = gather_203_validate_indices_0, x = var_2197_shape_cast_fp16_to_uint16)[name = string("gather_203_cast_uint16")]; + string gather_203_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_203_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87431360))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87892224))))[name = string("model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_77_cast_fp16")]; + tensor model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87949888))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88103552))))[name = string("model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_78_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_78_cast_fp16")]; + tensor model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88122816))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88276480))))[name = string("model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_79_cast_fp16")]; + tensor concat_209x = const()[name = string("concat_209x"), val = tensor([1, -1, 15, 64])]; + tensor var_2206_cast_fp16 = reshape(shape = concat_209x, x = linear_77_cast_fp16)[name = string("op_2206_cast_fp16")]; + tensor q_23_perm_0 = const()[name = string("q_23_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 5, 64])]; + tensor var_2209_cast_fp16 = reshape(shape = concat_210x, x = linear_78_cast_fp16)[name = string("op_2209_cast_fp16")]; + tensor k_23_perm_0 = const()[name = string("k_23_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 5, 64])]; + tensor var_2212_cast_fp16 = reshape(shape = concat_211x, x = linear_79_cast_fp16)[name = string("op_2212_cast_fp16")]; + tensor v_state_23_perm_0 = const()[name = string("v_state_23_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_23_cast_fp16 = transpose(perm = q_23_perm_0, x = var_2206_cast_fp16)[name = string("transpose_83")]; + tensor var_2216_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2216_cast_fp16")]; + tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; + tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; + fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2227_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_25_promoted_to_fp16)[name = string("op_2227_cast_fp16")]; + bool var_2229_interleave_0 = const()[name = string("op_2229_interleave_0"), val = bool(false)]; + tensor var_2229_cast_fp16 = concat(axis = var_85, interleave = var_2229_interleave_0, values = (var_2227_cast_fp16, x1_45_cast_fp16))[name = string("op_2229_cast_fp16")]; + tensor var_2230_cast_fp16 = mul(x = var_2229_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2230_cast_fp16")]; + tensor query_states_47_cast_fp16 = add(x = var_2216_cast_fp16, y = var_2230_cast_fp16)[name = string("query_states_47_cast_fp16")]; + tensor k_23_cast_fp16 = transpose(perm = k_23_perm_0, x = var_2209_cast_fp16)[name = string("transpose_82")]; + tensor var_2232_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2232_cast_fp16")]; + tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; + tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; + fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2243_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_2243_cast_fp16")]; + bool var_2245_interleave_0 = const()[name = string("op_2245_interleave_0"), val = bool(false)]; + tensor var_2245_cast_fp16 = concat(axis = var_85, interleave = var_2245_interleave_0, values = (var_2243_cast_fp16, x1_47_cast_fp16))[name = string("op_2245_cast_fp16")]; + tensor var_2246_cast_fp16 = mul(x = var_2245_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2246_cast_fp16")]; + tensor k_state_23_cast_fp16 = add(x = var_2232_cast_fp16, y = var_2246_cast_fp16)[name = string("k_state_23_cast_fp16")]; + tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; + tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; + tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; + tensor concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = tensor([11])]; + int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; + bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; + tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, expand_dims_132, expand_dims_133, expand_dims_2, expand_dims_135))[name = string("concat_214")]; + tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_214, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = k_state_23_cast_fp16, x = coreml_update_state_84)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_86")]; + tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_23_cast_fp16 = transpose(perm = v_state_23_perm_0, x = var_2212_cast_fp16)[name = string("transpose_81")]; + tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_214, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = v_state_23_cast_fp16, x = coreml_update_state_85)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_87")]; + tensor var_2269_begin_0 = const()[name = string("op_2269_begin_0"), val = tensor([11, 0, 0, 0, 0])]; + tensor var_2269_end_0 = const()[name = string("op_2269_end_0"), val = tensor([12, 1, 5, 2048, 64])]; + tensor var_2269_end_mask_0 = const()[name = string("op_2269_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2269_squeeze_mask_0 = const()[name = string("op_2269_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2269_cast_fp16 = slice_by_index(begin = var_2269_begin_0, end = var_2269_end_0, end_mask = var_2269_end_mask_0, squeeze_mask = var_2269_squeeze_mask_0, x = coreml_update_state_86)[name = string("op_2269_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = string("op_2272_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2272_end_mask_0 = const()[name = string("op_2272_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = concat_11, end_mask = var_2272_end_mask_0, x = var_2269_cast_fp16)[name = string("op_2272_cast_fp16")]; + tensor var_2274_begin_0 = const()[name = string("op_2274_begin_0"), val = tensor([11, 0, 0, 0, 0])]; + tensor var_2274_end_0 = const()[name = string("op_2274_end_0"), val = tensor([12, 1, 5, 2048, 64])]; + tensor var_2274_end_mask_0 = const()[name = string("op_2274_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2274_squeeze_mask_0 = const()[name = string("op_2274_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2274_cast_fp16 = slice_by_index(begin = var_2274_begin_0, end = var_2274_end_0, end_mask = var_2274_end_mask_0, squeeze_mask = var_2274_squeeze_mask_0, x = coreml_update_state_87)[name = string("op_2274_cast_fp16")]; + tensor var_2277_begin_0 = const()[name = string("op_2277_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2277_end_mask_0 = const()[name = string("op_2277_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2277_cast_fp16 = slice_by_index(begin = var_2277_begin_0, end = concat_11, end_mask = var_2277_end_mask_0, x = var_2274_cast_fp16)[name = string("op_2277_cast_fp16")]; + tensor var_2279_shape_cast_fp16 = shape(x = var_2272_cast_fp16)[name = string("op_2279_shape_cast_fp16")]; + int32 gather_211 = const()[name = string("gather_211"), val = int32(1)]; + int32 gather_212 = const()[name = string("gather_212"), val = int32(5)]; + int32 gather_213_axis_0 = const()[name = string("gather_213_axis_0"), val = int32(0)]; + int32 gather_213_batch_dims_0 = const()[name = string("gather_213_batch_dims_0"), val = int32(0)]; + bool gather_213_validate_indices_0 = const()[name = string("gather_213_validate_indices_0"), val = bool(false)]; + string var_2279_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2279_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_213_to_uint16 = const()[name = string("select_213_to_uint16"), val = uint16(2)]; + tensor var_2279_shape_cast_fp16_to_uint16 = cast(dtype = var_2279_shape_cast_fp16_to_uint16_dtype_0, x = var_2279_shape_cast_fp16)[name = string("cast_166")]; + uint16 gather_213_cast_uint16 = gather(axis = gather_213_axis_0, batch_dims = gather_213_batch_dims_0, indices = select_213_to_uint16, validate_indices = gather_213_validate_indices_0, x = var_2279_shape_cast_fp16_to_uint16)[name = string("gather_213_cast_uint16")]; + string gather_213_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_213_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_214 = const()[name = string("gather_214"), val = int32(64)]; + tensor var_2286_axes_0 = const()[name = string("op_2286_axes_0"), val = tensor([2])]; + tensor var_2286_cast_fp16 = expand_dims(axes = var_2286_axes_0, x = var_2272_cast_fp16)[name = string("op_2286_cast_fp16")]; + tensor shape_237_cast_fp16 = shape(x = var_2286_cast_fp16)[name = string("shape_237_cast_fp16")]; + int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; + bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; + int32 gather_213_cast_uint16_to_int32 = cast(dtype = gather_213_cast_uint16_to_int32_dtype_0, x = gather_213_cast_uint16)[name = string("cast_165")]; + tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (gather_211, gather_212, var_89, gather_213_cast_uint16_to_int32, gather_214))[name = string("concat_222")]; + tensor real_div_22 = real_div(x = concat_222, y = shape_237_cast_fp16)[name = string("real_div_22")]; + tensor hidden_states_341_cast_fp16 = tile(reps = real_div_22, x = var_2286_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; + tensor concat_223x = const()[name = string("concat_223x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_47_cast_fp16 = reshape(shape = concat_223x, x = hidden_states_341_cast_fp16)[name = string("key_states_47_cast_fp16")]; + tensor var_2296_shape_cast_fp16 = shape(x = var_2277_cast_fp16)[name = string("op_2296_shape_cast_fp16")]; + int32 gather_215 = const()[name = string("gather_215"), val = int32(1)]; + int32 gather_216 = const()[name = string("gather_216"), val = int32(5)]; + int32 gather_217_axis_0 = const()[name = string("gather_217_axis_0"), val = int32(0)]; + int32 gather_217_batch_dims_0 = const()[name = string("gather_217_batch_dims_0"), val = int32(0)]; + bool gather_217_validate_indices_0 = const()[name = string("gather_217_validate_indices_0"), val = bool(false)]; + string var_2296_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2296_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_217_to_uint16 = const()[name = string("select_217_to_uint16"), val = uint16(2)]; + tensor var_2296_shape_cast_fp16_to_uint16 = cast(dtype = var_2296_shape_cast_fp16_to_uint16_dtype_0, x = var_2296_shape_cast_fp16)[name = string("cast_164")]; + uint16 gather_217_cast_uint16 = gather(axis = gather_217_axis_0, batch_dims = gather_217_batch_dims_0, indices = select_217_to_uint16, validate_indices = gather_217_validate_indices_0, x = var_2296_shape_cast_fp16_to_uint16)[name = string("gather_217_cast_uint16")]; + string gather_217_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_217_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_218 = const()[name = string("gather_218"), val = int32(64)]; + tensor var_2303_axes_0 = const()[name = string("op_2303_axes_0"), val = tensor([2])]; + tensor var_2303_cast_fp16 = expand_dims(axes = var_2303_axes_0, x = var_2277_cast_fp16)[name = string("op_2303_cast_fp16")]; + tensor shape_242_cast_fp16 = shape(x = var_2303_cast_fp16)[name = string("shape_242_cast_fp16")]; + int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; + bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; + int32 gather_217_cast_uint16_to_int32 = cast(dtype = gather_217_cast_uint16_to_int32_dtype_0, x = gather_217_cast_uint16)[name = string("cast_163")]; + tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (gather_215, gather_216, var_89, gather_217_cast_uint16_to_int32, gather_218))[name = string("concat_224")]; + tensor real_div_23 = real_div(x = concat_224, y = shape_242_cast_fp16)[name = string("real_div_23")]; + tensor hidden_states_345_cast_fp16 = tile(reps = real_div_23, x = var_2303_cast_fp16)[name = string("hidden_states_345_cast_fp16")]; + tensor concat_225x = const()[name = string("concat_225x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_47_cast_fp16 = reshape(shape = concat_225x, x = hidden_states_345_cast_fp16)[name = string("value_states_47_cast_fp16")]; + tensor var_2313_shape_cast_fp16 = shape(x = key_states_47_cast_fp16)[name = string("op_2313_shape_cast_fp16")]; + int32 gather_219_axis_0 = const()[name = string("gather_219_axis_0"), val = int32(0)]; + int32 gather_219_batch_dims_0 = const()[name = string("gather_219_batch_dims_0"), val = int32(0)]; + bool gather_219_validate_indices_0 = const()[name = string("gather_219_validate_indices_0"), val = bool(false)]; + string var_2313_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2313_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_219_to_uint16 = const()[name = string("select_219_to_uint16"), val = uint16(2)]; + tensor var_2313_shape_cast_fp16_to_uint16 = cast(dtype = var_2313_shape_cast_fp16_to_uint16_dtype_0, x = var_2313_shape_cast_fp16)[name = string("cast_162")]; + uint16 gather_219_cast_uint16 = gather(axis = gather_219_axis_0, batch_dims = gather_219_batch_dims_0, indices = select_219_to_uint16, validate_indices = gather_219_validate_indices_0, x = var_2313_shape_cast_fp16_to_uint16)[name = string("gather_219_cast_uint16")]; + string gather_219_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_219_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_226_values0_0 = const()[name = string("concat_226_values0_0"), val = int32(1)]; + int32 concat_226_values1_0 = const()[name = string("concat_226_values1_0"), val = int32(1)]; + int32 concat_226_values2_0 = const()[name = string("concat_226_values2_0"), val = int32(0)]; + int32 concat_226_axis_0 = const()[name = string("concat_226_axis_0"), val = int32(0)]; + bool concat_226_interleave_0 = const()[name = string("concat_226_interleave_0"), val = bool(false)]; + int32 gather_219_cast_uint16_to_int32 = cast(dtype = gather_219_cast_uint16_to_int32_dtype_0, x = gather_219_cast_uint16)[name = string("cast_161")]; + tensor concat_226 = concat(axis = concat_226_axis_0, interleave = concat_226_interleave_0, values = (concat_226_values0_0, concat_226_values1_0, concat_226_values2_0, gather_219_cast_uint16_to_int32))[name = string("concat_226")]; + tensor causal_mask_25_begin_0 = const()[name = string("causal_mask_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_25_end_mask_0 = const()[name = string("causal_mask_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_25_cast_fp16 = slice_by_index(begin = causal_mask_25_begin_0, end = concat_226, end_mask = causal_mask_25_end_mask_0, x = causal_mask)[name = string("causal_mask_25_cast_fp16")]; + tensor attn_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_25_cast_fp16, key = key_states_47_cast_fp16, query = query_states_47_cast_fp16, value = value_states_47_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_2319_perm_0 = const()[name = string("op_2319_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_227_axis_0 = const()[name = string("concat_227_axis_0"), val = int32(0)]; + bool concat_227_interleave_0 = const()[name = string("concat_227_interleave_0"), val = bool(false)]; + int32 gather_203_cast_uint16_to_int32 = cast(dtype = gather_203_cast_uint16_to_int32_dtype_0, x = gather_203_cast_uint16)[name = string("cast_160")]; + tensor concat_227 = concat(axis = concat_227_axis_0, interleave = concat_227_interleave_0, values = (gather_202, gather_203_cast_uint16_to_int32, var_85))[name = string("concat_227")]; + tensor var_2319_cast_fp16 = transpose(perm = var_2319_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_80")]; + tensor input_89_cast_fp16 = reshape(shape = concat_227, x = var_2319_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88295744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88756608))))[name = string("model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_80_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized, x = input_89_cast_fp16)[name = string("linear_80_cast_fp16")]; + tensor hidden_states_349_cast_fp16 = add(x = hidden_states_329_cast_fp16, y = linear_80_cast_fp16)[name = string("hidden_states_349_cast_fp16")]; + fp16 var_80_promoted_23_to_fp16 = const()[name = string("op_80_promoted_23_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2328_cast_fp16 = pow(x = hidden_states_349_cast_fp16, y = var_80_promoted_23_to_fp16)[name = string("op_2328_cast_fp16")]; + tensor variance_47_axes_0 = const()[name = string("variance_47_axes_0"), val = tensor([-1])]; + bool variance_47_keep_dims_0 = const()[name = string("variance_47_keep_dims_0"), val = bool(true)]; + tensor variance_47_cast_fp16 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_2328_cast_fp16)[name = string("variance_47_cast_fp16")]; + fp16 var_2331_to_fp16 = const()[name = string("op_2331_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2332_cast_fp16 = add(x = variance_47_cast_fp16, y = var_2331_to_fp16)[name = string("op_2332_cast_fp16")]; + fp32 var_2333_epsilon_0 = const()[name = string("op_2333_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2333_cast_fp16 = rsqrt(epsilon = var_2333_epsilon_0, x = var_2332_cast_fp16)[name = string("op_2333_cast_fp16")]; + tensor hidden_states_353_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = var_2333_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; + tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88814272)))]; + tensor input_91_cast_fp16 = mul(x = model_model_layers_11_post_attention_layernorm_weight_to_fp16, y = hidden_states_353_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88816256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90045120))))[name = string("model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_81_cast_fp16")]; + tensor var_2345_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_2345_cast_fp16")]; + tensor model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90198784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91427648))))[name = string("model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_82_cast_fp16")]; + tensor input_95_cast_fp16 = mul(x = var_2345_cast_fp16, y = linear_82_cast_fp16)[name = string("input_95_cast_fp16")]; + tensor model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91581312))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92810176))))[name = string("model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_83_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = string("linear_83_cast_fp16")]; + tensor hidden_states_359_cast_fp16 = add(x = hidden_states_349_cast_fp16, y = linear_83_cast_fp16)[name = string("hidden_states_359_cast_fp16")]; + fp16 var_80_promoted_24_to_fp16 = const()[name = string("op_80_promoted_24_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2358_cast_fp16 = pow(x = hidden_states_359_cast_fp16, y = var_80_promoted_24_to_fp16)[name = string("op_2358_cast_fp16")]; + tensor variance_49_axes_0 = const()[name = string("variance_49_axes_0"), val = tensor([-1])]; + bool variance_49_keep_dims_0 = const()[name = string("variance_49_keep_dims_0"), val = bool(true)]; + tensor variance_49_cast_fp16 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = var_2358_cast_fp16)[name = string("variance_49_cast_fp16")]; + fp16 var_2361_to_fp16 = const()[name = string("op_2361_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2362_cast_fp16 = add(x = variance_49_cast_fp16, y = var_2361_to_fp16)[name = string("op_2362_cast_fp16")]; + fp32 var_2363_epsilon_0 = const()[name = string("op_2363_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2363_cast_fp16 = rsqrt(epsilon = var_2363_epsilon_0, x = var_2362_cast_fp16)[name = string("op_2363_cast_fp16")]; + tensor hidden_states_363_cast_fp16 = mul(x = hidden_states_359_cast_fp16, y = var_2363_cast_fp16)[name = string("hidden_states_363_cast_fp16")]; + tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92963840)))]; + tensor hidden_states_367_cast_fp16 = mul(x = model_model_layers_12_input_layernorm_weight_to_fp16, y = hidden_states_363_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; + tensor var_2374_shape_cast_fp16 = shape(x = hidden_states_367_cast_fp16)[name = string("op_2374_shape_cast_fp16")]; + int32 gather_220 = const()[name = string("gather_220"), val = int32(1)]; + int32 gather_221_axis_0 = const()[name = string("gather_221_axis_0"), val = int32(0)]; + int32 gather_221_batch_dims_0 = const()[name = string("gather_221_batch_dims_0"), val = int32(0)]; + bool gather_221_validate_indices_0 = const()[name = string("gather_221_validate_indices_0"), val = bool(false)]; + string var_2374_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2374_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_221_to_uint16 = const()[name = string("select_221_to_uint16"), val = uint16(1)]; + tensor var_2374_shape_cast_fp16_to_uint16 = cast(dtype = var_2374_shape_cast_fp16_to_uint16_dtype_0, x = var_2374_shape_cast_fp16)[name = string("cast_159")]; + uint16 gather_221_cast_uint16 = gather(axis = gather_221_axis_0, batch_dims = gather_221_batch_dims_0, indices = select_221_to_uint16, validate_indices = gather_221_validate_indices_0, x = var_2374_shape_cast_fp16_to_uint16)[name = string("gather_221_cast_uint16")]; + string gather_221_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_221_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92965824))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93426688))))[name = string("model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_84_cast_fp16")]; + tensor model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93484352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93638016))))[name = string("model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_85_cast_fp16")]; + tensor model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93657280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93810944))))[name = string("model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_86_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_86_cast_fp16")]; + tensor concat_228x = const()[name = string("concat_228x"), val = tensor([1, -1, 15, 64])]; + tensor var_2383_cast_fp16 = reshape(shape = concat_228x, x = linear_84_cast_fp16)[name = string("op_2383_cast_fp16")]; + tensor q_25_perm_0 = const()[name = string("q_25_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_229x = const()[name = string("concat_229x"), val = tensor([1, -1, 5, 64])]; + tensor var_2386_cast_fp16 = reshape(shape = concat_229x, x = linear_85_cast_fp16)[name = string("op_2386_cast_fp16")]; + tensor k_25_perm_0 = const()[name = string("k_25_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_230x = const()[name = string("concat_230x"), val = tensor([1, -1, 5, 64])]; + tensor var_2389_cast_fp16 = reshape(shape = concat_230x, x = linear_86_cast_fp16)[name = string("op_2389_cast_fp16")]; + tensor v_state_25_perm_0 = const()[name = string("v_state_25_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_25_cast_fp16 = transpose(perm = q_25_perm_0, x = var_2383_cast_fp16)[name = string("transpose_79")]; + tensor var_2393_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2393_cast_fp16")]; + tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; + tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2404_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_2404_cast_fp16")]; + bool var_2406_interleave_0 = const()[name = string("op_2406_interleave_0"), val = bool(false)]; + tensor var_2406_cast_fp16 = concat(axis = var_85, interleave = var_2406_interleave_0, values = (var_2404_cast_fp16, x1_49_cast_fp16))[name = string("op_2406_cast_fp16")]; + tensor var_2407_cast_fp16 = mul(x = var_2406_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2407_cast_fp16")]; + tensor query_states_51_cast_fp16 = add(x = var_2393_cast_fp16, y = var_2407_cast_fp16)[name = string("query_states_51_cast_fp16")]; + tensor k_25_cast_fp16 = transpose(perm = k_25_perm_0, x = var_2386_cast_fp16)[name = string("transpose_78")]; + tensor var_2409_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2409_cast_fp16")]; + tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; + tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2420_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_2420_cast_fp16")]; + bool var_2422_interleave_0 = const()[name = string("op_2422_interleave_0"), val = bool(false)]; + tensor var_2422_cast_fp16 = concat(axis = var_85, interleave = var_2422_interleave_0, values = (var_2420_cast_fp16, x1_51_cast_fp16))[name = string("op_2422_cast_fp16")]; + tensor var_2423_cast_fp16 = mul(x = var_2422_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2423_cast_fp16")]; + tensor k_state_25_cast_fp16 = add(x = var_2409_cast_fp16, y = var_2423_cast_fp16)[name = string("k_state_25_cast_fp16")]; + tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; + tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; + tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; + tensor concat_233_values0_0 = const()[name = string("concat_233_values0_0"), val = tensor([12])]; + int32 concat_233_axis_0 = const()[name = string("concat_233_axis_0"), val = int32(0)]; + bool concat_233_interleave_0 = const()[name = string("concat_233_interleave_0"), val = bool(false)]; + tensor concat_233 = concat(axis = concat_233_axis_0, interleave = concat_233_interleave_0, values = (concat_233_values0_0, expand_dims_144, expand_dims_145, expand_dims_2, expand_dims_147))[name = string("concat_233")]; + tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_233, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = k_state_25_cast_fp16, x = coreml_update_state_86)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_88")]; + tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_25_cast_fp16 = transpose(perm = v_state_25_perm_0, x = var_2389_cast_fp16)[name = string("transpose_77")]; + tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_233, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = v_state_25_cast_fp16, x = coreml_update_state_87)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_89")]; + tensor var_2446_begin_0 = const()[name = string("op_2446_begin_0"), val = tensor([12, 0, 0, 0, 0])]; + tensor var_2446_end_0 = const()[name = string("op_2446_end_0"), val = tensor([13, 1, 5, 2048, 64])]; + tensor var_2446_end_mask_0 = const()[name = string("op_2446_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2446_squeeze_mask_0 = const()[name = string("op_2446_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2446_cast_fp16 = slice_by_index(begin = var_2446_begin_0, end = var_2446_end_0, end_mask = var_2446_end_mask_0, squeeze_mask = var_2446_squeeze_mask_0, x = coreml_update_state_88)[name = string("op_2446_cast_fp16")]; + tensor var_2449_begin_0 = const()[name = string("op_2449_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2449_end_mask_0 = const()[name = string("op_2449_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2449_cast_fp16 = slice_by_index(begin = var_2449_begin_0, end = concat_11, end_mask = var_2449_end_mask_0, x = var_2446_cast_fp16)[name = string("op_2449_cast_fp16")]; + tensor var_2451_begin_0 = const()[name = string("op_2451_begin_0"), val = tensor([12, 0, 0, 0, 0])]; + tensor var_2451_end_0 = const()[name = string("op_2451_end_0"), val = tensor([13, 1, 5, 2048, 64])]; + tensor var_2451_end_mask_0 = const()[name = string("op_2451_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2451_squeeze_mask_0 = const()[name = string("op_2451_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2451_cast_fp16 = slice_by_index(begin = var_2451_begin_0, end = var_2451_end_0, end_mask = var_2451_end_mask_0, squeeze_mask = var_2451_squeeze_mask_0, x = coreml_update_state_89)[name = string("op_2451_cast_fp16")]; + tensor var_2454_begin_0 = const()[name = string("op_2454_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2454_end_mask_0 = const()[name = string("op_2454_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2454_cast_fp16 = slice_by_index(begin = var_2454_begin_0, end = concat_11, end_mask = var_2454_end_mask_0, x = var_2451_cast_fp16)[name = string("op_2454_cast_fp16")]; + tensor var_2456_shape_cast_fp16 = shape(x = var_2449_cast_fp16)[name = string("op_2456_shape_cast_fp16")]; + int32 gather_229 = const()[name = string("gather_229"), val = int32(1)]; + int32 gather_230 = const()[name = string("gather_230"), val = int32(5)]; + int32 gather_231_axis_0 = const()[name = string("gather_231_axis_0"), val = int32(0)]; + int32 gather_231_batch_dims_0 = const()[name = string("gather_231_batch_dims_0"), val = int32(0)]; + bool gather_231_validate_indices_0 = const()[name = string("gather_231_validate_indices_0"), val = bool(false)]; + string var_2456_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2456_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_231_to_uint16 = const()[name = string("select_231_to_uint16"), val = uint16(2)]; + tensor var_2456_shape_cast_fp16_to_uint16 = cast(dtype = var_2456_shape_cast_fp16_to_uint16_dtype_0, x = var_2456_shape_cast_fp16)[name = string("cast_158")]; + uint16 gather_231_cast_uint16 = gather(axis = gather_231_axis_0, batch_dims = gather_231_batch_dims_0, indices = select_231_to_uint16, validate_indices = gather_231_validate_indices_0, x = var_2456_shape_cast_fp16_to_uint16)[name = string("gather_231_cast_uint16")]; + string gather_231_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_231_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_232 = const()[name = string("gather_232"), val = int32(64)]; + tensor var_2463_axes_0 = const()[name = string("op_2463_axes_0"), val = tensor([2])]; + tensor var_2463_cast_fp16 = expand_dims(axes = var_2463_axes_0, x = var_2449_cast_fp16)[name = string("op_2463_cast_fp16")]; + tensor shape_257_cast_fp16 = shape(x = var_2463_cast_fp16)[name = string("shape_257_cast_fp16")]; + int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; + bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; + int32 gather_231_cast_uint16_to_int32 = cast(dtype = gather_231_cast_uint16_to_int32_dtype_0, x = gather_231_cast_uint16)[name = string("cast_157")]; + tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (gather_229, gather_230, var_89, gather_231_cast_uint16_to_int32, gather_232))[name = string("concat_241")]; + tensor real_div_24 = real_div(x = concat_241, y = shape_257_cast_fp16)[name = string("real_div_24")]; + tensor hidden_states_371_cast_fp16 = tile(reps = real_div_24, x = var_2463_cast_fp16)[name = string("hidden_states_371_cast_fp16")]; + tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_51_cast_fp16 = reshape(shape = concat_242x, x = hidden_states_371_cast_fp16)[name = string("key_states_51_cast_fp16")]; + tensor var_2473_shape_cast_fp16 = shape(x = var_2454_cast_fp16)[name = string("op_2473_shape_cast_fp16")]; + int32 gather_233 = const()[name = string("gather_233"), val = int32(1)]; + int32 gather_234 = const()[name = string("gather_234"), val = int32(5)]; + int32 gather_235_axis_0 = const()[name = string("gather_235_axis_0"), val = int32(0)]; + int32 gather_235_batch_dims_0 = const()[name = string("gather_235_batch_dims_0"), val = int32(0)]; + bool gather_235_validate_indices_0 = const()[name = string("gather_235_validate_indices_0"), val = bool(false)]; + string var_2473_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2473_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_235_to_uint16 = const()[name = string("select_235_to_uint16"), val = uint16(2)]; + tensor var_2473_shape_cast_fp16_to_uint16 = cast(dtype = var_2473_shape_cast_fp16_to_uint16_dtype_0, x = var_2473_shape_cast_fp16)[name = string("cast_156")]; + uint16 gather_235_cast_uint16 = gather(axis = gather_235_axis_0, batch_dims = gather_235_batch_dims_0, indices = select_235_to_uint16, validate_indices = gather_235_validate_indices_0, x = var_2473_shape_cast_fp16_to_uint16)[name = string("gather_235_cast_uint16")]; + string gather_235_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_235_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_236 = const()[name = string("gather_236"), val = int32(64)]; + tensor var_2480_axes_0 = const()[name = string("op_2480_axes_0"), val = tensor([2])]; + tensor var_2480_cast_fp16 = expand_dims(axes = var_2480_axes_0, x = var_2454_cast_fp16)[name = string("op_2480_cast_fp16")]; + tensor shape_262_cast_fp16 = shape(x = var_2480_cast_fp16)[name = string("shape_262_cast_fp16")]; + int32 concat_243_axis_0 = const()[name = string("concat_243_axis_0"), val = int32(0)]; + bool concat_243_interleave_0 = const()[name = string("concat_243_interleave_0"), val = bool(false)]; + int32 gather_235_cast_uint16_to_int32 = cast(dtype = gather_235_cast_uint16_to_int32_dtype_0, x = gather_235_cast_uint16)[name = string("cast_155")]; + tensor concat_243 = concat(axis = concat_243_axis_0, interleave = concat_243_interleave_0, values = (gather_233, gather_234, var_89, gather_235_cast_uint16_to_int32, gather_236))[name = string("concat_243")]; + tensor real_div_25 = real_div(x = concat_243, y = shape_262_cast_fp16)[name = string("real_div_25")]; + tensor hidden_states_375_cast_fp16 = tile(reps = real_div_25, x = var_2480_cast_fp16)[name = string("hidden_states_375_cast_fp16")]; + tensor concat_244x = const()[name = string("concat_244x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_51_cast_fp16 = reshape(shape = concat_244x, x = hidden_states_375_cast_fp16)[name = string("value_states_51_cast_fp16")]; + tensor var_2490_shape_cast_fp16 = shape(x = key_states_51_cast_fp16)[name = string("op_2490_shape_cast_fp16")]; + int32 gather_237_axis_0 = const()[name = string("gather_237_axis_0"), val = int32(0)]; + int32 gather_237_batch_dims_0 = const()[name = string("gather_237_batch_dims_0"), val = int32(0)]; + bool gather_237_validate_indices_0 = const()[name = string("gather_237_validate_indices_0"), val = bool(false)]; + string var_2490_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2490_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_237_to_uint16 = const()[name = string("select_237_to_uint16"), val = uint16(2)]; + tensor var_2490_shape_cast_fp16_to_uint16 = cast(dtype = var_2490_shape_cast_fp16_to_uint16_dtype_0, x = var_2490_shape_cast_fp16)[name = string("cast_154")]; + uint16 gather_237_cast_uint16 = gather(axis = gather_237_axis_0, batch_dims = gather_237_batch_dims_0, indices = select_237_to_uint16, validate_indices = gather_237_validate_indices_0, x = var_2490_shape_cast_fp16_to_uint16)[name = string("gather_237_cast_uint16")]; + string gather_237_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_237_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_245_values0_0 = const()[name = string("concat_245_values0_0"), val = int32(1)]; + int32 concat_245_values1_0 = const()[name = string("concat_245_values1_0"), val = int32(1)]; + int32 concat_245_values2_0 = const()[name = string("concat_245_values2_0"), val = int32(0)]; + int32 concat_245_axis_0 = const()[name = string("concat_245_axis_0"), val = int32(0)]; + bool concat_245_interleave_0 = const()[name = string("concat_245_interleave_0"), val = bool(false)]; + int32 gather_237_cast_uint16_to_int32 = cast(dtype = gather_237_cast_uint16_to_int32_dtype_0, x = gather_237_cast_uint16)[name = string("cast_153")]; + tensor concat_245 = concat(axis = concat_245_axis_0, interleave = concat_245_interleave_0, values = (concat_245_values0_0, concat_245_values1_0, concat_245_values2_0, gather_237_cast_uint16_to_int32))[name = string("concat_245")]; + tensor causal_mask_27_begin_0 = const()[name = string("causal_mask_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_27_end_mask_0 = const()[name = string("causal_mask_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_27_cast_fp16 = slice_by_index(begin = causal_mask_27_begin_0, end = concat_245, end_mask = causal_mask_27_end_mask_0, x = causal_mask)[name = string("causal_mask_27_cast_fp16")]; + tensor attn_output_49_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_27_cast_fp16, key = key_states_51_cast_fp16, query = query_states_51_cast_fp16, value = value_states_51_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_2496_perm_0 = const()[name = string("op_2496_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)]; + bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)]; + int32 gather_221_cast_uint16_to_int32 = cast(dtype = gather_221_cast_uint16_to_int32_dtype_0, x = gather_221_cast_uint16)[name = string("cast_152")]; + tensor concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (gather_220, gather_221_cast_uint16_to_int32, var_85))[name = string("concat_246")]; + tensor var_2496_cast_fp16 = transpose(perm = var_2496_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_76")]; + tensor input_97_cast_fp16 = reshape(shape = concat_246, x = var_2496_cast_fp16)[name = string("input_97_cast_fp16")]; + tensor model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93830208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94291072))))[name = string("model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_87_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized, x = input_97_cast_fp16)[name = string("linear_87_cast_fp16")]; + tensor hidden_states_379_cast_fp16 = add(x = hidden_states_359_cast_fp16, y = linear_87_cast_fp16)[name = string("hidden_states_379_cast_fp16")]; + fp16 var_80_promoted_25_to_fp16 = const()[name = string("op_80_promoted_25_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2505_cast_fp16 = pow(x = hidden_states_379_cast_fp16, y = var_80_promoted_25_to_fp16)[name = string("op_2505_cast_fp16")]; + tensor variance_51_axes_0 = const()[name = string("variance_51_axes_0"), val = tensor([-1])]; + bool variance_51_keep_dims_0 = const()[name = string("variance_51_keep_dims_0"), val = bool(true)]; + tensor variance_51_cast_fp16 = reduce_mean(axes = variance_51_axes_0, keep_dims = variance_51_keep_dims_0, x = var_2505_cast_fp16)[name = string("variance_51_cast_fp16")]; + fp16 var_2508_to_fp16 = const()[name = string("op_2508_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2509_cast_fp16 = add(x = variance_51_cast_fp16, y = var_2508_to_fp16)[name = string("op_2509_cast_fp16")]; + fp32 var_2510_epsilon_0 = const()[name = string("op_2510_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2510_cast_fp16 = rsqrt(epsilon = var_2510_epsilon_0, x = var_2509_cast_fp16)[name = string("op_2510_cast_fp16")]; + tensor hidden_states_383_cast_fp16 = mul(x = hidden_states_379_cast_fp16, y = var_2510_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; + tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94348736)))]; + tensor input_99_cast_fp16 = mul(x = model_model_layers_12_post_attention_layernorm_weight_to_fp16, y = hidden_states_383_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94350720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95579584))))[name = string("model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = string("linear_88_cast_fp16")]; + tensor var_2522_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_2522_cast_fp16")]; + tensor model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95733248))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96962112))))[name = string("model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = string("linear_89_cast_fp16")]; + tensor input_103_cast_fp16 = mul(x = var_2522_cast_fp16, y = linear_89_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97115776))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98344640))))[name = string("model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_90_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized, x = input_103_cast_fp16)[name = string("linear_90_cast_fp16")]; + tensor hidden_states_389_cast_fp16 = add(x = hidden_states_379_cast_fp16, y = linear_90_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; + fp16 var_80_promoted_26_to_fp16 = const()[name = string("op_80_promoted_26_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2535_cast_fp16 = pow(x = hidden_states_389_cast_fp16, y = var_80_promoted_26_to_fp16)[name = string("op_2535_cast_fp16")]; + tensor variance_53_axes_0 = const()[name = string("variance_53_axes_0"), val = tensor([-1])]; + bool variance_53_keep_dims_0 = const()[name = string("variance_53_keep_dims_0"), val = bool(true)]; + tensor variance_53_cast_fp16 = reduce_mean(axes = variance_53_axes_0, keep_dims = variance_53_keep_dims_0, x = var_2535_cast_fp16)[name = string("variance_53_cast_fp16")]; + fp16 var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2539_cast_fp16 = add(x = variance_53_cast_fp16, y = var_2538_to_fp16)[name = string("op_2539_cast_fp16")]; + fp32 var_2540_epsilon_0 = const()[name = string("op_2540_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2540_cast_fp16 = rsqrt(epsilon = var_2540_epsilon_0, x = var_2539_cast_fp16)[name = string("op_2540_cast_fp16")]; + tensor hidden_states_393_cast_fp16 = mul(x = hidden_states_389_cast_fp16, y = var_2540_cast_fp16)[name = string("hidden_states_393_cast_fp16")]; + tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98498304)))]; + tensor hidden_states_397_cast_fp16 = mul(x = model_model_layers_13_input_layernorm_weight_to_fp16, y = hidden_states_393_cast_fp16)[name = string("hidden_states_397_cast_fp16")]; + tensor var_2551_shape_cast_fp16 = shape(x = hidden_states_397_cast_fp16)[name = string("op_2551_shape_cast_fp16")]; + int32 gather_238 = const()[name = string("gather_238"), val = int32(1)]; + int32 gather_239_axis_0 = const()[name = string("gather_239_axis_0"), val = int32(0)]; + int32 gather_239_batch_dims_0 = const()[name = string("gather_239_batch_dims_0"), val = int32(0)]; + bool gather_239_validate_indices_0 = const()[name = string("gather_239_validate_indices_0"), val = bool(false)]; + string var_2551_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2551_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_239_to_uint16 = const()[name = string("select_239_to_uint16"), val = uint16(1)]; + tensor var_2551_shape_cast_fp16_to_uint16 = cast(dtype = var_2551_shape_cast_fp16_to_uint16_dtype_0, x = var_2551_shape_cast_fp16)[name = string("cast_151")]; + uint16 gather_239_cast_uint16 = gather(axis = gather_239_axis_0, batch_dims = gather_239_batch_dims_0, indices = select_239_to_uint16, validate_indices = gather_239_validate_indices_0, x = var_2551_shape_cast_fp16_to_uint16)[name = string("gather_239_cast_uint16")]; + string gather_239_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_239_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98500288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98961152))))[name = string("model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_91_cast_fp16")]; + tensor model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99018816))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99172480))))[name = string("model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_92_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_92_cast_fp16")]; + tensor model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99191744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99345408))))[name = string("model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_93_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_93_cast_fp16")]; + tensor concat_247x = const()[name = string("concat_247x"), val = tensor([1, -1, 15, 64])]; + tensor var_2560_cast_fp16 = reshape(shape = concat_247x, x = linear_91_cast_fp16)[name = string("op_2560_cast_fp16")]; + tensor q_27_perm_0 = const()[name = string("q_27_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_248x = const()[name = string("concat_248x"), val = tensor([1, -1, 5, 64])]; + tensor var_2563_cast_fp16 = reshape(shape = concat_248x, x = linear_92_cast_fp16)[name = string("op_2563_cast_fp16")]; + tensor k_27_perm_0 = const()[name = string("k_27_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_249x = const()[name = string("concat_249x"), val = tensor([1, -1, 5, 64])]; + tensor var_2566_cast_fp16 = reshape(shape = concat_249x, x = linear_93_cast_fp16)[name = string("op_2566_cast_fp16")]; + tensor v_state_27_perm_0 = const()[name = string("v_state_27_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_27_cast_fp16 = transpose(perm = q_27_perm_0, x = var_2560_cast_fp16)[name = string("transpose_75")]; + tensor var_2570_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2570_cast_fp16")]; + tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; + tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; + fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2581_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_2581_cast_fp16")]; + bool var_2583_interleave_0 = const()[name = string("op_2583_interleave_0"), val = bool(false)]; + tensor var_2583_cast_fp16 = concat(axis = var_85, interleave = var_2583_interleave_0, values = (var_2581_cast_fp16, x1_53_cast_fp16))[name = string("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = mul(x = var_2583_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2584_cast_fp16")]; + tensor query_states_55_cast_fp16 = add(x = var_2570_cast_fp16, y = var_2584_cast_fp16)[name = string("query_states_55_cast_fp16")]; + tensor k_27_cast_fp16 = transpose(perm = k_27_perm_0, x = var_2563_cast_fp16)[name = string("transpose_74")]; + tensor var_2586_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2586_cast_fp16")]; + tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; + tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2597_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2597_cast_fp16")]; + bool var_2599_interleave_0 = const()[name = string("op_2599_interleave_0"), val = bool(false)]; + tensor var_2599_cast_fp16 = concat(axis = var_85, interleave = var_2599_interleave_0, values = (var_2597_cast_fp16, x1_55_cast_fp16))[name = string("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = mul(x = var_2599_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2600_cast_fp16")]; + tensor k_state_27_cast_fp16 = add(x = var_2586_cast_fp16, y = var_2600_cast_fp16)[name = string("k_state_27_cast_fp16")]; + tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([0])]; + tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; + tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; + tensor concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = tensor([13])]; + int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; + bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; + tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, expand_dims_156, expand_dims_157, expand_dims_2, expand_dims_159))[name = string("concat_252")]; + tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_252, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = k_state_27_cast_fp16, x = coreml_update_state_88)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_90")]; + tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_27_cast_fp16 = transpose(perm = v_state_27_perm_0, x = var_2566_cast_fp16)[name = string("transpose_73")]; + tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_252, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = v_state_27_cast_fp16, x = coreml_update_state_89)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_91")]; + tensor var_2623_begin_0 = const()[name = string("op_2623_begin_0"), val = tensor([13, 0, 0, 0, 0])]; + tensor var_2623_end_0 = const()[name = string("op_2623_end_0"), val = tensor([14, 1, 5, 2048, 64])]; + tensor var_2623_end_mask_0 = const()[name = string("op_2623_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2623_squeeze_mask_0 = const()[name = string("op_2623_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2623_cast_fp16 = slice_by_index(begin = var_2623_begin_0, end = var_2623_end_0, end_mask = var_2623_end_mask_0, squeeze_mask = var_2623_squeeze_mask_0, x = coreml_update_state_90)[name = string("op_2623_cast_fp16")]; + tensor var_2626_begin_0 = const()[name = string("op_2626_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2626_end_mask_0 = const()[name = string("op_2626_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2626_cast_fp16 = slice_by_index(begin = var_2626_begin_0, end = concat_11, end_mask = var_2626_end_mask_0, x = var_2623_cast_fp16)[name = string("op_2626_cast_fp16")]; + tensor var_2628_begin_0 = const()[name = string("op_2628_begin_0"), val = tensor([13, 0, 0, 0, 0])]; + tensor var_2628_end_0 = const()[name = string("op_2628_end_0"), val = tensor([14, 1, 5, 2048, 64])]; + tensor var_2628_end_mask_0 = const()[name = string("op_2628_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2628_squeeze_mask_0 = const()[name = string("op_2628_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2628_cast_fp16 = slice_by_index(begin = var_2628_begin_0, end = var_2628_end_0, end_mask = var_2628_end_mask_0, squeeze_mask = var_2628_squeeze_mask_0, x = coreml_update_state_91)[name = string("op_2628_cast_fp16")]; + tensor var_2631_begin_0 = const()[name = string("op_2631_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2631_end_mask_0 = const()[name = string("op_2631_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2631_cast_fp16 = slice_by_index(begin = var_2631_begin_0, end = concat_11, end_mask = var_2631_end_mask_0, x = var_2628_cast_fp16)[name = string("op_2631_cast_fp16")]; + tensor var_2633_shape_cast_fp16 = shape(x = var_2626_cast_fp16)[name = string("op_2633_shape_cast_fp16")]; + int32 gather_247 = const()[name = string("gather_247"), val = int32(1)]; + int32 gather_248 = const()[name = string("gather_248"), val = int32(5)]; + int32 gather_249_axis_0 = const()[name = string("gather_249_axis_0"), val = int32(0)]; + int32 gather_249_batch_dims_0 = const()[name = string("gather_249_batch_dims_0"), val = int32(0)]; + bool gather_249_validate_indices_0 = const()[name = string("gather_249_validate_indices_0"), val = bool(false)]; + string var_2633_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2633_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_249_to_uint16 = const()[name = string("select_249_to_uint16"), val = uint16(2)]; + tensor var_2633_shape_cast_fp16_to_uint16 = cast(dtype = var_2633_shape_cast_fp16_to_uint16_dtype_0, x = var_2633_shape_cast_fp16)[name = string("cast_150")]; + uint16 gather_249_cast_uint16 = gather(axis = gather_249_axis_0, batch_dims = gather_249_batch_dims_0, indices = select_249_to_uint16, validate_indices = gather_249_validate_indices_0, x = var_2633_shape_cast_fp16_to_uint16)[name = string("gather_249_cast_uint16")]; + string gather_249_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_249_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_250 = const()[name = string("gather_250"), val = int32(64)]; + tensor var_2640_axes_0 = const()[name = string("op_2640_axes_0"), val = tensor([2])]; + tensor var_2640_cast_fp16 = expand_dims(axes = var_2640_axes_0, x = var_2626_cast_fp16)[name = string("op_2640_cast_fp16")]; + tensor shape_277_cast_fp16 = shape(x = var_2640_cast_fp16)[name = string("shape_277_cast_fp16")]; + int32 concat_260_axis_0 = const()[name = string("concat_260_axis_0"), val = int32(0)]; + bool concat_260_interleave_0 = const()[name = string("concat_260_interleave_0"), val = bool(false)]; + int32 gather_249_cast_uint16_to_int32 = cast(dtype = gather_249_cast_uint16_to_int32_dtype_0, x = gather_249_cast_uint16)[name = string("cast_149")]; + tensor concat_260 = concat(axis = concat_260_axis_0, interleave = concat_260_interleave_0, values = (gather_247, gather_248, var_89, gather_249_cast_uint16_to_int32, gather_250))[name = string("concat_260")]; + tensor real_div_26 = real_div(x = concat_260, y = shape_277_cast_fp16)[name = string("real_div_26")]; + tensor hidden_states_401_cast_fp16 = tile(reps = real_div_26, x = var_2640_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; + tensor concat_261x = const()[name = string("concat_261x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_55_cast_fp16 = reshape(shape = concat_261x, x = hidden_states_401_cast_fp16)[name = string("key_states_55_cast_fp16")]; + tensor var_2650_shape_cast_fp16 = shape(x = var_2631_cast_fp16)[name = string("op_2650_shape_cast_fp16")]; + int32 gather_251 = const()[name = string("gather_251"), val = int32(1)]; + int32 gather_252 = const()[name = string("gather_252"), val = int32(5)]; + int32 gather_253_axis_0 = const()[name = string("gather_253_axis_0"), val = int32(0)]; + int32 gather_253_batch_dims_0 = const()[name = string("gather_253_batch_dims_0"), val = int32(0)]; + bool gather_253_validate_indices_0 = const()[name = string("gather_253_validate_indices_0"), val = bool(false)]; + string var_2650_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2650_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_253_to_uint16 = const()[name = string("select_253_to_uint16"), val = uint16(2)]; + tensor var_2650_shape_cast_fp16_to_uint16 = cast(dtype = var_2650_shape_cast_fp16_to_uint16_dtype_0, x = var_2650_shape_cast_fp16)[name = string("cast_148")]; + uint16 gather_253_cast_uint16 = gather(axis = gather_253_axis_0, batch_dims = gather_253_batch_dims_0, indices = select_253_to_uint16, validate_indices = gather_253_validate_indices_0, x = var_2650_shape_cast_fp16_to_uint16)[name = string("gather_253_cast_uint16")]; + string gather_253_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_253_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_254 = const()[name = string("gather_254"), val = int32(64)]; + tensor var_2657_axes_0 = const()[name = string("op_2657_axes_0"), val = tensor([2])]; + tensor var_2657_cast_fp16 = expand_dims(axes = var_2657_axes_0, x = var_2631_cast_fp16)[name = string("op_2657_cast_fp16")]; + tensor shape_282_cast_fp16 = shape(x = var_2657_cast_fp16)[name = string("shape_282_cast_fp16")]; + int32 concat_262_axis_0 = const()[name = string("concat_262_axis_0"), val = int32(0)]; + bool concat_262_interleave_0 = const()[name = string("concat_262_interleave_0"), val = bool(false)]; + int32 gather_253_cast_uint16_to_int32 = cast(dtype = gather_253_cast_uint16_to_int32_dtype_0, x = gather_253_cast_uint16)[name = string("cast_147")]; + tensor concat_262 = concat(axis = concat_262_axis_0, interleave = concat_262_interleave_0, values = (gather_251, gather_252, var_89, gather_253_cast_uint16_to_int32, gather_254))[name = string("concat_262")]; + tensor real_div_27 = real_div(x = concat_262, y = shape_282_cast_fp16)[name = string("real_div_27")]; + tensor hidden_states_405_cast_fp16 = tile(reps = real_div_27, x = var_2657_cast_fp16)[name = string("hidden_states_405_cast_fp16")]; + tensor concat_263x = const()[name = string("concat_263x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_55_cast_fp16 = reshape(shape = concat_263x, x = hidden_states_405_cast_fp16)[name = string("value_states_55_cast_fp16")]; + tensor var_2667_shape_cast_fp16 = shape(x = key_states_55_cast_fp16)[name = string("op_2667_shape_cast_fp16")]; + int32 gather_255_axis_0 = const()[name = string("gather_255_axis_0"), val = int32(0)]; + int32 gather_255_batch_dims_0 = const()[name = string("gather_255_batch_dims_0"), val = int32(0)]; + bool gather_255_validate_indices_0 = const()[name = string("gather_255_validate_indices_0"), val = bool(false)]; + string var_2667_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2667_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_255_to_uint16 = const()[name = string("select_255_to_uint16"), val = uint16(2)]; + tensor var_2667_shape_cast_fp16_to_uint16 = cast(dtype = var_2667_shape_cast_fp16_to_uint16_dtype_0, x = var_2667_shape_cast_fp16)[name = string("cast_146")]; + uint16 gather_255_cast_uint16 = gather(axis = gather_255_axis_0, batch_dims = gather_255_batch_dims_0, indices = select_255_to_uint16, validate_indices = gather_255_validate_indices_0, x = var_2667_shape_cast_fp16_to_uint16)[name = string("gather_255_cast_uint16")]; + string gather_255_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_255_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_264_values0_0 = const()[name = string("concat_264_values0_0"), val = int32(1)]; + int32 concat_264_values1_0 = const()[name = string("concat_264_values1_0"), val = int32(1)]; + int32 concat_264_values2_0 = const()[name = string("concat_264_values2_0"), val = int32(0)]; + int32 concat_264_axis_0 = const()[name = string("concat_264_axis_0"), val = int32(0)]; + bool concat_264_interleave_0 = const()[name = string("concat_264_interleave_0"), val = bool(false)]; + int32 gather_255_cast_uint16_to_int32 = cast(dtype = gather_255_cast_uint16_to_int32_dtype_0, x = gather_255_cast_uint16)[name = string("cast_145")]; + tensor concat_264 = concat(axis = concat_264_axis_0, interleave = concat_264_interleave_0, values = (concat_264_values0_0, concat_264_values1_0, concat_264_values2_0, gather_255_cast_uint16_to_int32))[name = string("concat_264")]; + tensor causal_mask_29_begin_0 = const()[name = string("causal_mask_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_29_end_mask_0 = const()[name = string("causal_mask_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_29_cast_fp16 = slice_by_index(begin = causal_mask_29_begin_0, end = concat_264, end_mask = causal_mask_29_end_mask_0, x = causal_mask)[name = string("causal_mask_29_cast_fp16")]; + tensor attn_output_53_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_29_cast_fp16, key = key_states_55_cast_fp16, query = query_states_55_cast_fp16, value = value_states_55_cast_fp16)[name = string("attn_output_53_cast_fp16")]; + tensor var_2673_perm_0 = const()[name = string("op_2673_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_265_axis_0 = const()[name = string("concat_265_axis_0"), val = int32(0)]; + bool concat_265_interleave_0 = const()[name = string("concat_265_interleave_0"), val = bool(false)]; + int32 gather_239_cast_uint16_to_int32 = cast(dtype = gather_239_cast_uint16_to_int32_dtype_0, x = gather_239_cast_uint16)[name = string("cast_144")]; + tensor concat_265 = concat(axis = concat_265_axis_0, interleave = concat_265_interleave_0, values = (gather_238, gather_239_cast_uint16_to_int32, var_85))[name = string("concat_265")]; + tensor var_2673_cast_fp16 = transpose(perm = var_2673_perm_0, x = attn_output_53_cast_fp16)[name = string("transpose_72")]; + tensor input_105_cast_fp16 = reshape(shape = concat_265, x = var_2673_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99364672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99825536))))[name = string("model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_94_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = string("linear_94_cast_fp16")]; + tensor hidden_states_409_cast_fp16 = add(x = hidden_states_389_cast_fp16, y = linear_94_cast_fp16)[name = string("hidden_states_409_cast_fp16")]; + fp16 var_80_promoted_27_to_fp16 = const()[name = string("op_80_promoted_27_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2682_cast_fp16 = pow(x = hidden_states_409_cast_fp16, y = var_80_promoted_27_to_fp16)[name = string("op_2682_cast_fp16")]; + tensor variance_55_axes_0 = const()[name = string("variance_55_axes_0"), val = tensor([-1])]; + bool variance_55_keep_dims_0 = const()[name = string("variance_55_keep_dims_0"), val = bool(true)]; + tensor variance_55_cast_fp16 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = var_2682_cast_fp16)[name = string("variance_55_cast_fp16")]; + fp16 var_2685_to_fp16 = const()[name = string("op_2685_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2686_cast_fp16 = add(x = variance_55_cast_fp16, y = var_2685_to_fp16)[name = string("op_2686_cast_fp16")]; + fp32 var_2687_epsilon_0 = const()[name = string("op_2687_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2687_cast_fp16 = rsqrt(epsilon = var_2687_epsilon_0, x = var_2686_cast_fp16)[name = string("op_2687_cast_fp16")]; + tensor hidden_states_413_cast_fp16 = mul(x = hidden_states_409_cast_fp16, y = var_2687_cast_fp16)[name = string("hidden_states_413_cast_fp16")]; + tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99883200)))]; + tensor input_107_cast_fp16 = mul(x = model_model_layers_13_post_attention_layernorm_weight_to_fp16, y = hidden_states_413_cast_fp16)[name = string("input_107_cast_fp16")]; + tensor model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99885184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101114048))))[name = string("model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = string("linear_95_cast_fp16")]; + tensor var_2699_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_2699_cast_fp16")]; + tensor model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101267712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102496576))))[name = string("model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = string("linear_96_cast_fp16")]; + tensor input_111_cast_fp16 = mul(x = var_2699_cast_fp16, y = linear_96_cast_fp16)[name = string("input_111_cast_fp16")]; + tensor model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102650240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103879104))))[name = string("model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_97_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = string("linear_97_cast_fp16")]; + tensor hidden_states_419_cast_fp16 = add(x = hidden_states_409_cast_fp16, y = linear_97_cast_fp16)[name = string("hidden_states_419_cast_fp16")]; + fp16 var_80_promoted_28_to_fp16 = const()[name = string("op_80_promoted_28_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2712_cast_fp16 = pow(x = hidden_states_419_cast_fp16, y = var_80_promoted_28_to_fp16)[name = string("op_2712_cast_fp16")]; + tensor variance_57_axes_0 = const()[name = string("variance_57_axes_0"), val = tensor([-1])]; + bool variance_57_keep_dims_0 = const()[name = string("variance_57_keep_dims_0"), val = bool(true)]; + tensor variance_57_cast_fp16 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = var_2712_cast_fp16)[name = string("variance_57_cast_fp16")]; + fp16 var_2715_to_fp16 = const()[name = string("op_2715_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2716_cast_fp16 = add(x = variance_57_cast_fp16, y = var_2715_to_fp16)[name = string("op_2716_cast_fp16")]; + fp32 var_2717_epsilon_0 = const()[name = string("op_2717_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2717_cast_fp16 = rsqrt(epsilon = var_2717_epsilon_0, x = var_2716_cast_fp16)[name = string("op_2717_cast_fp16")]; + tensor hidden_states_423_cast_fp16 = mul(x = hidden_states_419_cast_fp16, y = var_2717_cast_fp16)[name = string("hidden_states_423_cast_fp16")]; + tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104032768)))]; + tensor hidden_states_427_cast_fp16 = mul(x = model_model_layers_14_input_layernorm_weight_to_fp16, y = hidden_states_423_cast_fp16)[name = string("hidden_states_427_cast_fp16")]; + tensor var_2728_shape_cast_fp16 = shape(x = hidden_states_427_cast_fp16)[name = string("op_2728_shape_cast_fp16")]; + int32 gather_256 = const()[name = string("gather_256"), val = int32(1)]; + int32 gather_257_axis_0 = const()[name = string("gather_257_axis_0"), val = int32(0)]; + int32 gather_257_batch_dims_0 = const()[name = string("gather_257_batch_dims_0"), val = int32(0)]; + bool gather_257_validate_indices_0 = const()[name = string("gather_257_validate_indices_0"), val = bool(false)]; + string var_2728_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2728_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_257_to_uint16 = const()[name = string("select_257_to_uint16"), val = uint16(1)]; + tensor var_2728_shape_cast_fp16_to_uint16 = cast(dtype = var_2728_shape_cast_fp16_to_uint16_dtype_0, x = var_2728_shape_cast_fp16)[name = string("cast_143")]; + uint16 gather_257_cast_uint16 = gather(axis = gather_257_axis_0, batch_dims = gather_257_batch_dims_0, indices = select_257_to_uint16, validate_indices = gather_257_validate_indices_0, x = var_2728_shape_cast_fp16_to_uint16)[name = string("gather_257_cast_uint16")]; + string gather_257_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_257_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104034752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104495616))))[name = string("model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_98_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_98_cast_fp16")]; + tensor model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104553280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104706944))))[name = string("model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_99_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_99_cast_fp16")]; + tensor model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104726208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104879872))))[name = string("model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_100_cast_fp16")]; + tensor concat_266x = const()[name = string("concat_266x"), val = tensor([1, -1, 15, 64])]; + tensor var_2737_cast_fp16 = reshape(shape = concat_266x, x = linear_98_cast_fp16)[name = string("op_2737_cast_fp16")]; + tensor q_29_perm_0 = const()[name = string("q_29_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_267x = const()[name = string("concat_267x"), val = tensor([1, -1, 5, 64])]; + tensor var_2740_cast_fp16 = reshape(shape = concat_267x, x = linear_99_cast_fp16)[name = string("op_2740_cast_fp16")]; + tensor k_29_perm_0 = const()[name = string("k_29_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_268x = const()[name = string("concat_268x"), val = tensor([1, -1, 5, 64])]; + tensor var_2743_cast_fp16 = reshape(shape = concat_268x, x = linear_100_cast_fp16)[name = string("op_2743_cast_fp16")]; + tensor v_state_29_perm_0 = const()[name = string("v_state_29_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_29_cast_fp16 = transpose(perm = q_29_perm_0, x = var_2737_cast_fp16)[name = string("transpose_71")]; + tensor var_2747_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2747_cast_fp16")]; + tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; + tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; + fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2758_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2758_cast_fp16")]; + bool var_2760_interleave_0 = const()[name = string("op_2760_interleave_0"), val = bool(false)]; + tensor var_2760_cast_fp16 = concat(axis = var_85, interleave = var_2760_interleave_0, values = (var_2758_cast_fp16, x1_57_cast_fp16))[name = string("op_2760_cast_fp16")]; + tensor var_2761_cast_fp16 = mul(x = var_2760_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2761_cast_fp16")]; + tensor query_states_59_cast_fp16 = add(x = var_2747_cast_fp16, y = var_2761_cast_fp16)[name = string("query_states_59_cast_fp16")]; + tensor k_29_cast_fp16 = transpose(perm = k_29_perm_0, x = var_2740_cast_fp16)[name = string("transpose_70")]; + tensor var_2763_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2763_cast_fp16")]; + tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; + tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2774_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2774_cast_fp16")]; + bool var_2776_interleave_0 = const()[name = string("op_2776_interleave_0"), val = bool(false)]; + tensor var_2776_cast_fp16 = concat(axis = var_85, interleave = var_2776_interleave_0, values = (var_2774_cast_fp16, x1_59_cast_fp16))[name = string("op_2776_cast_fp16")]; + tensor var_2777_cast_fp16 = mul(x = var_2776_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2777_cast_fp16")]; + tensor k_state_29_cast_fp16 = add(x = var_2763_cast_fp16, y = var_2777_cast_fp16)[name = string("k_state_29_cast_fp16")]; + tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([0])]; + tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; + tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; + tensor concat_271_values0_0 = const()[name = string("concat_271_values0_0"), val = tensor([14])]; + int32 concat_271_axis_0 = const()[name = string("concat_271_axis_0"), val = int32(0)]; + bool concat_271_interleave_0 = const()[name = string("concat_271_interleave_0"), val = bool(false)]; + tensor concat_271 = concat(axis = concat_271_axis_0, interleave = concat_271_interleave_0, values = (concat_271_values0_0, expand_dims_168, expand_dims_169, expand_dims_2, expand_dims_171))[name = string("concat_271")]; + tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_271, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = k_state_29_cast_fp16, x = coreml_update_state_90)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_92")]; + tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_29_cast_fp16 = transpose(perm = v_state_29_perm_0, x = var_2743_cast_fp16)[name = string("transpose_69")]; + tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_271, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = v_state_29_cast_fp16, x = coreml_update_state_91)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_93")]; + tensor var_2800_begin_0 = const()[name = string("op_2800_begin_0"), val = tensor([14, 0, 0, 0, 0])]; + tensor var_2800_end_0 = const()[name = string("op_2800_end_0"), val = tensor([15, 1, 5, 2048, 64])]; + tensor var_2800_end_mask_0 = const()[name = string("op_2800_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2800_squeeze_mask_0 = const()[name = string("op_2800_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2800_cast_fp16 = slice_by_index(begin = var_2800_begin_0, end = var_2800_end_0, end_mask = var_2800_end_mask_0, squeeze_mask = var_2800_squeeze_mask_0, x = coreml_update_state_92)[name = string("op_2800_cast_fp16")]; + tensor var_2803_begin_0 = const()[name = string("op_2803_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2803_end_mask_0 = const()[name = string("op_2803_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2803_cast_fp16 = slice_by_index(begin = var_2803_begin_0, end = concat_11, end_mask = var_2803_end_mask_0, x = var_2800_cast_fp16)[name = string("op_2803_cast_fp16")]; + tensor var_2805_begin_0 = const()[name = string("op_2805_begin_0"), val = tensor([14, 0, 0, 0, 0])]; + tensor var_2805_end_0 = const()[name = string("op_2805_end_0"), val = tensor([15, 1, 5, 2048, 64])]; + tensor var_2805_end_mask_0 = const()[name = string("op_2805_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2805_squeeze_mask_0 = const()[name = string("op_2805_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2805_cast_fp16 = slice_by_index(begin = var_2805_begin_0, end = var_2805_end_0, end_mask = var_2805_end_mask_0, squeeze_mask = var_2805_squeeze_mask_0, x = coreml_update_state_93)[name = string("op_2805_cast_fp16")]; + tensor var_2808_begin_0 = const()[name = string("op_2808_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2808_end_mask_0 = const()[name = string("op_2808_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2808_cast_fp16 = slice_by_index(begin = var_2808_begin_0, end = concat_11, end_mask = var_2808_end_mask_0, x = var_2805_cast_fp16)[name = string("op_2808_cast_fp16")]; + tensor var_2810_shape_cast_fp16 = shape(x = var_2803_cast_fp16)[name = string("op_2810_shape_cast_fp16")]; + int32 gather_265 = const()[name = string("gather_265"), val = int32(1)]; + int32 gather_266 = const()[name = string("gather_266"), val = int32(5)]; + int32 gather_267_axis_0 = const()[name = string("gather_267_axis_0"), val = int32(0)]; + int32 gather_267_batch_dims_0 = const()[name = string("gather_267_batch_dims_0"), val = int32(0)]; + bool gather_267_validate_indices_0 = const()[name = string("gather_267_validate_indices_0"), val = bool(false)]; + string var_2810_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2810_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_267_to_uint16 = const()[name = string("select_267_to_uint16"), val = uint16(2)]; + tensor var_2810_shape_cast_fp16_to_uint16 = cast(dtype = var_2810_shape_cast_fp16_to_uint16_dtype_0, x = var_2810_shape_cast_fp16)[name = string("cast_142")]; + uint16 gather_267_cast_uint16 = gather(axis = gather_267_axis_0, batch_dims = gather_267_batch_dims_0, indices = select_267_to_uint16, validate_indices = gather_267_validate_indices_0, x = var_2810_shape_cast_fp16_to_uint16)[name = string("gather_267_cast_uint16")]; + string gather_267_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_267_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_268 = const()[name = string("gather_268"), val = int32(64)]; + tensor var_2817_axes_0 = const()[name = string("op_2817_axes_0"), val = tensor([2])]; + tensor var_2817_cast_fp16 = expand_dims(axes = var_2817_axes_0, x = var_2803_cast_fp16)[name = string("op_2817_cast_fp16")]; + tensor shape_297_cast_fp16 = shape(x = var_2817_cast_fp16)[name = string("shape_297_cast_fp16")]; + int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)]; + bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)]; + int32 gather_267_cast_uint16_to_int32 = cast(dtype = gather_267_cast_uint16_to_int32_dtype_0, x = gather_267_cast_uint16)[name = string("cast_141")]; + tensor concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_265, gather_266, var_89, gather_267_cast_uint16_to_int32, gather_268))[name = string("concat_279")]; + tensor real_div_28 = real_div(x = concat_279, y = shape_297_cast_fp16)[name = string("real_div_28")]; + tensor hidden_states_431_cast_fp16 = tile(reps = real_div_28, x = var_2817_cast_fp16)[name = string("hidden_states_431_cast_fp16")]; + tensor concat_280x = const()[name = string("concat_280x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_59_cast_fp16 = reshape(shape = concat_280x, x = hidden_states_431_cast_fp16)[name = string("key_states_59_cast_fp16")]; + tensor var_2827_shape_cast_fp16 = shape(x = var_2808_cast_fp16)[name = string("op_2827_shape_cast_fp16")]; + int32 gather_269 = const()[name = string("gather_269"), val = int32(1)]; + int32 gather_270 = const()[name = string("gather_270"), val = int32(5)]; + int32 gather_271_axis_0 = const()[name = string("gather_271_axis_0"), val = int32(0)]; + int32 gather_271_batch_dims_0 = const()[name = string("gather_271_batch_dims_0"), val = int32(0)]; + bool gather_271_validate_indices_0 = const()[name = string("gather_271_validate_indices_0"), val = bool(false)]; + string var_2827_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2827_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_271_to_uint16 = const()[name = string("select_271_to_uint16"), val = uint16(2)]; + tensor var_2827_shape_cast_fp16_to_uint16 = cast(dtype = var_2827_shape_cast_fp16_to_uint16_dtype_0, x = var_2827_shape_cast_fp16)[name = string("cast_140")]; + uint16 gather_271_cast_uint16 = gather(axis = gather_271_axis_0, batch_dims = gather_271_batch_dims_0, indices = select_271_to_uint16, validate_indices = gather_271_validate_indices_0, x = var_2827_shape_cast_fp16_to_uint16)[name = string("gather_271_cast_uint16")]; + string gather_271_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_271_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_272 = const()[name = string("gather_272"), val = int32(64)]; + tensor var_2834_axes_0 = const()[name = string("op_2834_axes_0"), val = tensor([2])]; + tensor var_2834_cast_fp16 = expand_dims(axes = var_2834_axes_0, x = var_2808_cast_fp16)[name = string("op_2834_cast_fp16")]; + tensor shape_302_cast_fp16 = shape(x = var_2834_cast_fp16)[name = string("shape_302_cast_fp16")]; + int32 concat_281_axis_0 = const()[name = string("concat_281_axis_0"), val = int32(0)]; + bool concat_281_interleave_0 = const()[name = string("concat_281_interleave_0"), val = bool(false)]; + int32 gather_271_cast_uint16_to_int32 = cast(dtype = gather_271_cast_uint16_to_int32_dtype_0, x = gather_271_cast_uint16)[name = string("cast_139")]; + tensor concat_281 = concat(axis = concat_281_axis_0, interleave = concat_281_interleave_0, values = (gather_269, gather_270, var_89, gather_271_cast_uint16_to_int32, gather_272))[name = string("concat_281")]; + tensor real_div_29 = real_div(x = concat_281, y = shape_302_cast_fp16)[name = string("real_div_29")]; + tensor hidden_states_435_cast_fp16 = tile(reps = real_div_29, x = var_2834_cast_fp16)[name = string("hidden_states_435_cast_fp16")]; + tensor concat_282x = const()[name = string("concat_282x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_59_cast_fp16 = reshape(shape = concat_282x, x = hidden_states_435_cast_fp16)[name = string("value_states_59_cast_fp16")]; + tensor var_2844_shape_cast_fp16 = shape(x = key_states_59_cast_fp16)[name = string("op_2844_shape_cast_fp16")]; + int32 gather_273_axis_0 = const()[name = string("gather_273_axis_0"), val = int32(0)]; + int32 gather_273_batch_dims_0 = const()[name = string("gather_273_batch_dims_0"), val = int32(0)]; + bool gather_273_validate_indices_0 = const()[name = string("gather_273_validate_indices_0"), val = bool(false)]; + string var_2844_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2844_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_273_to_uint16 = const()[name = string("select_273_to_uint16"), val = uint16(2)]; + tensor var_2844_shape_cast_fp16_to_uint16 = cast(dtype = var_2844_shape_cast_fp16_to_uint16_dtype_0, x = var_2844_shape_cast_fp16)[name = string("cast_138")]; + uint16 gather_273_cast_uint16 = gather(axis = gather_273_axis_0, batch_dims = gather_273_batch_dims_0, indices = select_273_to_uint16, validate_indices = gather_273_validate_indices_0, x = var_2844_shape_cast_fp16_to_uint16)[name = string("gather_273_cast_uint16")]; + string gather_273_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_273_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_283_values0_0 = const()[name = string("concat_283_values0_0"), val = int32(1)]; + int32 concat_283_values1_0 = const()[name = string("concat_283_values1_0"), val = int32(1)]; + int32 concat_283_values2_0 = const()[name = string("concat_283_values2_0"), val = int32(0)]; + int32 concat_283_axis_0 = const()[name = string("concat_283_axis_0"), val = int32(0)]; + bool concat_283_interleave_0 = const()[name = string("concat_283_interleave_0"), val = bool(false)]; + int32 gather_273_cast_uint16_to_int32 = cast(dtype = gather_273_cast_uint16_to_int32_dtype_0, x = gather_273_cast_uint16)[name = string("cast_137")]; + tensor concat_283 = concat(axis = concat_283_axis_0, interleave = concat_283_interleave_0, values = (concat_283_values0_0, concat_283_values1_0, concat_283_values2_0, gather_273_cast_uint16_to_int32))[name = string("concat_283")]; + tensor causal_mask_31_begin_0 = const()[name = string("causal_mask_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_31_end_mask_0 = const()[name = string("causal_mask_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_31_cast_fp16 = slice_by_index(begin = causal_mask_31_begin_0, end = concat_283, end_mask = causal_mask_31_end_mask_0, x = causal_mask)[name = string("causal_mask_31_cast_fp16")]; + tensor attn_output_57_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_31_cast_fp16, key = key_states_59_cast_fp16, query = query_states_59_cast_fp16, value = value_states_59_cast_fp16)[name = string("attn_output_57_cast_fp16")]; + tensor var_2850_perm_0 = const()[name = string("op_2850_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_284_axis_0 = const()[name = string("concat_284_axis_0"), val = int32(0)]; + bool concat_284_interleave_0 = const()[name = string("concat_284_interleave_0"), val = bool(false)]; + int32 gather_257_cast_uint16_to_int32 = cast(dtype = gather_257_cast_uint16_to_int32_dtype_0, x = gather_257_cast_uint16)[name = string("cast_136")]; + tensor concat_284 = concat(axis = concat_284_axis_0, interleave = concat_284_interleave_0, values = (gather_256, gather_257_cast_uint16_to_int32, var_85))[name = string("concat_284")]; + tensor var_2850_cast_fp16 = transpose(perm = var_2850_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_68")]; + tensor input_113_cast_fp16 = reshape(shape = concat_284, x = var_2850_cast_fp16)[name = string("input_113_cast_fp16")]; + tensor model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104899136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105360000))))[name = string("model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_101_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized, x = input_113_cast_fp16)[name = string("linear_101_cast_fp16")]; + tensor hidden_states_439_cast_fp16 = add(x = hidden_states_419_cast_fp16, y = linear_101_cast_fp16)[name = string("hidden_states_439_cast_fp16")]; + fp16 var_80_promoted_29_to_fp16 = const()[name = string("op_80_promoted_29_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2859_cast_fp16 = pow(x = hidden_states_439_cast_fp16, y = var_80_promoted_29_to_fp16)[name = string("op_2859_cast_fp16")]; + tensor variance_59_axes_0 = const()[name = string("variance_59_axes_0"), val = tensor([-1])]; + bool variance_59_keep_dims_0 = const()[name = string("variance_59_keep_dims_0"), val = bool(true)]; + tensor variance_59_cast_fp16 = reduce_mean(axes = variance_59_axes_0, keep_dims = variance_59_keep_dims_0, x = var_2859_cast_fp16)[name = string("variance_59_cast_fp16")]; + fp16 var_2862_to_fp16 = const()[name = string("op_2862_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2863_cast_fp16 = add(x = variance_59_cast_fp16, y = var_2862_to_fp16)[name = string("op_2863_cast_fp16")]; + fp32 var_2864_epsilon_0 = const()[name = string("op_2864_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2864_cast_fp16 = rsqrt(epsilon = var_2864_epsilon_0, x = var_2863_cast_fp16)[name = string("op_2864_cast_fp16")]; + tensor hidden_states_443_cast_fp16 = mul(x = hidden_states_439_cast_fp16, y = var_2864_cast_fp16)[name = string("hidden_states_443_cast_fp16")]; + tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105417664)))]; + tensor input_115_cast_fp16 = mul(x = model_model_layers_14_post_attention_layernorm_weight_to_fp16, y = hidden_states_443_cast_fp16)[name = string("input_115_cast_fp16")]; + tensor model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105419648))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106648512))))[name = string("model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_102_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized, x = input_115_cast_fp16)[name = string("linear_102_cast_fp16")]; + tensor var_2876_cast_fp16 = silu(x = linear_102_cast_fp16)[name = string("op_2876_cast_fp16")]; + tensor model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106802176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108031040))))[name = string("model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_103_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized, x = input_115_cast_fp16)[name = string("linear_103_cast_fp16")]; + tensor input_119_cast_fp16 = mul(x = var_2876_cast_fp16, y = linear_103_cast_fp16)[name = string("input_119_cast_fp16")]; + tensor model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108184704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109413568))))[name = string("model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_104_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = string("linear_104_cast_fp16")]; + tensor hidden_states_449_cast_fp16 = add(x = hidden_states_439_cast_fp16, y = linear_104_cast_fp16)[name = string("hidden_states_449_cast_fp16")]; + fp16 var_80_promoted_30_to_fp16 = const()[name = string("op_80_promoted_30_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2889_cast_fp16 = pow(x = hidden_states_449_cast_fp16, y = var_80_promoted_30_to_fp16)[name = string("op_2889_cast_fp16")]; + tensor variance_61_axes_0 = const()[name = string("variance_61_axes_0"), val = tensor([-1])]; + bool variance_61_keep_dims_0 = const()[name = string("variance_61_keep_dims_0"), val = bool(true)]; + tensor variance_61_cast_fp16 = reduce_mean(axes = variance_61_axes_0, keep_dims = variance_61_keep_dims_0, x = var_2889_cast_fp16)[name = string("variance_61_cast_fp16")]; + fp16 var_2892_to_fp16 = const()[name = string("op_2892_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2893_cast_fp16 = add(x = variance_61_cast_fp16, y = var_2892_to_fp16)[name = string("op_2893_cast_fp16")]; + fp32 var_2894_epsilon_0 = const()[name = string("op_2894_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2894_cast_fp16 = rsqrt(epsilon = var_2894_epsilon_0, x = var_2893_cast_fp16)[name = string("op_2894_cast_fp16")]; + tensor hidden_states_453_cast_fp16 = mul(x = hidden_states_449_cast_fp16, y = var_2894_cast_fp16)[name = string("hidden_states_453_cast_fp16")]; + tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109567232)))]; + tensor hidden_states_457_cast_fp16 = mul(x = model_model_layers_15_input_layernorm_weight_to_fp16, y = hidden_states_453_cast_fp16)[name = string("hidden_states_457_cast_fp16")]; + tensor var_2905_shape_cast_fp16 = shape(x = hidden_states_457_cast_fp16)[name = string("op_2905_shape_cast_fp16")]; + int32 gather_274 = const()[name = string("gather_274"), val = int32(1)]; + int32 gather_275_axis_0 = const()[name = string("gather_275_axis_0"), val = int32(0)]; + int32 gather_275_batch_dims_0 = const()[name = string("gather_275_batch_dims_0"), val = int32(0)]; + bool gather_275_validate_indices_0 = const()[name = string("gather_275_validate_indices_0"), val = bool(false)]; + string var_2905_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2905_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_275_to_uint16 = const()[name = string("select_275_to_uint16"), val = uint16(1)]; + tensor var_2905_shape_cast_fp16_to_uint16 = cast(dtype = var_2905_shape_cast_fp16_to_uint16_dtype_0, x = var_2905_shape_cast_fp16)[name = string("cast_135")]; + uint16 gather_275_cast_uint16 = gather(axis = gather_275_axis_0, batch_dims = gather_275_batch_dims_0, indices = select_275_to_uint16, validate_indices = gather_275_validate_indices_0, x = var_2905_shape_cast_fp16_to_uint16)[name = string("gather_275_cast_uint16")]; + string gather_275_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_275_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109569216))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110030080))))[name = string("model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_105_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_105_cast_fp16")]; + tensor model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110087744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110241408))))[name = string("model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_106_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_106_cast_fp16")]; + tensor model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110260672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110414336))))[name = string("model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_107_cast_fp16")]; + tensor concat_285x = const()[name = string("concat_285x"), val = tensor([1, -1, 15, 64])]; + tensor var_2914_cast_fp16 = reshape(shape = concat_285x, x = linear_105_cast_fp16)[name = string("op_2914_cast_fp16")]; + tensor q_31_perm_0 = const()[name = string("q_31_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 5, 64])]; + tensor var_2917_cast_fp16 = reshape(shape = concat_286x, x = linear_106_cast_fp16)[name = string("op_2917_cast_fp16")]; + tensor k_31_perm_0 = const()[name = string("k_31_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 5, 64])]; + tensor var_2920_cast_fp16 = reshape(shape = concat_287x, x = linear_107_cast_fp16)[name = string("op_2920_cast_fp16")]; + tensor v_state_31_perm_0 = const()[name = string("v_state_31_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_31_cast_fp16 = transpose(perm = q_31_perm_0, x = var_2914_cast_fp16)[name = string("transpose_67")]; + tensor var_2924_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2924_cast_fp16")]; + tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; + tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2935_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_2935_cast_fp16")]; + bool var_2937_interleave_0 = const()[name = string("op_2937_interleave_0"), val = bool(false)]; + tensor var_2937_cast_fp16 = concat(axis = var_85, interleave = var_2937_interleave_0, values = (var_2935_cast_fp16, x1_61_cast_fp16))[name = string("op_2937_cast_fp16")]; + tensor var_2938_cast_fp16 = mul(x = var_2937_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2938_cast_fp16")]; + tensor query_states_63_cast_fp16 = add(x = var_2924_cast_fp16, y = var_2938_cast_fp16)[name = string("query_states_63_cast_fp16")]; + tensor k_31_cast_fp16 = transpose(perm = k_31_perm_0, x = var_2917_cast_fp16)[name = string("transpose_66")]; + tensor var_2940_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2940_cast_fp16")]; + tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; + tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2951_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2951_cast_fp16")]; + bool var_2953_interleave_0 = const()[name = string("op_2953_interleave_0"), val = bool(false)]; + tensor var_2953_cast_fp16 = concat(axis = var_85, interleave = var_2953_interleave_0, values = (var_2951_cast_fp16, x1_63_cast_fp16))[name = string("op_2953_cast_fp16")]; + tensor var_2954_cast_fp16 = mul(x = var_2953_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2954_cast_fp16")]; + tensor k_state_31_cast_fp16 = add(x = var_2940_cast_fp16, y = var_2954_cast_fp16)[name = string("k_state_31_cast_fp16")]; + tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([0])]; + tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; + tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; + tensor concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor([15])]; + int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; + bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; + tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_180, expand_dims_181, expand_dims_2, expand_dims_183))[name = string("concat_290")]; + tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_290, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = k_state_31_cast_fp16, x = coreml_update_state_92)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_94_write_state")]; + tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_94")]; + tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_31_cast_fp16 = transpose(perm = v_state_31_perm_0, x = var_2920_cast_fp16)[name = string("transpose_65")]; + tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_290, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = v_state_31_cast_fp16, x = coreml_update_state_93)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_95_write_state")]; + tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_95")]; + tensor var_2977_begin_0 = const()[name = string("op_2977_begin_0"), val = tensor([15, 0, 0, 0, 0])]; + tensor var_2977_end_0 = const()[name = string("op_2977_end_0"), val = tensor([16, 1, 5, 2048, 64])]; + tensor var_2977_end_mask_0 = const()[name = string("op_2977_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2977_squeeze_mask_0 = const()[name = string("op_2977_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2977_cast_fp16 = slice_by_index(begin = var_2977_begin_0, end = var_2977_end_0, end_mask = var_2977_end_mask_0, squeeze_mask = var_2977_squeeze_mask_0, x = coreml_update_state_94)[name = string("op_2977_cast_fp16")]; + tensor var_2980_begin_0 = const()[name = string("op_2980_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2980_end_mask_0 = const()[name = string("op_2980_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2980_cast_fp16 = slice_by_index(begin = var_2980_begin_0, end = concat_11, end_mask = var_2980_end_mask_0, x = var_2977_cast_fp16)[name = string("op_2980_cast_fp16")]; + tensor var_2982_begin_0 = const()[name = string("op_2982_begin_0"), val = tensor([15, 0, 0, 0, 0])]; + tensor var_2982_end_0 = const()[name = string("op_2982_end_0"), val = tensor([16, 1, 5, 2048, 64])]; + tensor var_2982_end_mask_0 = const()[name = string("op_2982_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2982_squeeze_mask_0 = const()[name = string("op_2982_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2982_cast_fp16 = slice_by_index(begin = var_2982_begin_0, end = var_2982_end_0, end_mask = var_2982_end_mask_0, squeeze_mask = var_2982_squeeze_mask_0, x = coreml_update_state_95)[name = string("op_2982_cast_fp16")]; + tensor var_2985_begin_0 = const()[name = string("op_2985_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2985_end_mask_0 = const()[name = string("op_2985_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2985_cast_fp16 = slice_by_index(begin = var_2985_begin_0, end = concat_11, end_mask = var_2985_end_mask_0, x = var_2982_cast_fp16)[name = string("op_2985_cast_fp16")]; + tensor var_2987_shape_cast_fp16 = shape(x = var_2980_cast_fp16)[name = string("op_2987_shape_cast_fp16")]; + int32 gather_283 = const()[name = string("gather_283"), val = int32(1)]; + int32 gather_284 = const()[name = string("gather_284"), val = int32(5)]; + int32 gather_285_axis_0 = const()[name = string("gather_285_axis_0"), val = int32(0)]; + int32 gather_285_batch_dims_0 = const()[name = string("gather_285_batch_dims_0"), val = int32(0)]; + bool gather_285_validate_indices_0 = const()[name = string("gather_285_validate_indices_0"), val = bool(false)]; + string var_2987_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2987_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_285_to_uint16 = const()[name = string("select_285_to_uint16"), val = uint16(2)]; + tensor var_2987_shape_cast_fp16_to_uint16 = cast(dtype = var_2987_shape_cast_fp16_to_uint16_dtype_0, x = var_2987_shape_cast_fp16)[name = string("cast_134")]; + uint16 gather_285_cast_uint16 = gather(axis = gather_285_axis_0, batch_dims = gather_285_batch_dims_0, indices = select_285_to_uint16, validate_indices = gather_285_validate_indices_0, x = var_2987_shape_cast_fp16_to_uint16)[name = string("gather_285_cast_uint16")]; + string gather_285_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_285_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_286 = const()[name = string("gather_286"), val = int32(64)]; + tensor var_2994_axes_0 = const()[name = string("op_2994_axes_0"), val = tensor([2])]; + tensor var_2994_cast_fp16 = expand_dims(axes = var_2994_axes_0, x = var_2980_cast_fp16)[name = string("op_2994_cast_fp16")]; + tensor shape_317_cast_fp16 = shape(x = var_2994_cast_fp16)[name = string("shape_317_cast_fp16")]; + int32 concat_298_axis_0 = const()[name = string("concat_298_axis_0"), val = int32(0)]; + bool concat_298_interleave_0 = const()[name = string("concat_298_interleave_0"), val = bool(false)]; + int32 gather_285_cast_uint16_to_int32 = cast(dtype = gather_285_cast_uint16_to_int32_dtype_0, x = gather_285_cast_uint16)[name = string("cast_133")]; + tensor concat_298 = concat(axis = concat_298_axis_0, interleave = concat_298_interleave_0, values = (gather_283, gather_284, var_89, gather_285_cast_uint16_to_int32, gather_286))[name = string("concat_298")]; + tensor real_div_30 = real_div(x = concat_298, y = shape_317_cast_fp16)[name = string("real_div_30")]; + tensor hidden_states_461_cast_fp16 = tile(reps = real_div_30, x = var_2994_cast_fp16)[name = string("hidden_states_461_cast_fp16")]; + tensor concat_299x = const()[name = string("concat_299x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_63_cast_fp16 = reshape(shape = concat_299x, x = hidden_states_461_cast_fp16)[name = string("key_states_63_cast_fp16")]; + tensor var_3004_shape_cast_fp16 = shape(x = var_2985_cast_fp16)[name = string("op_3004_shape_cast_fp16")]; + int32 gather_287 = const()[name = string("gather_287"), val = int32(1)]; + int32 gather_288 = const()[name = string("gather_288"), val = int32(5)]; + int32 gather_289_axis_0 = const()[name = string("gather_289_axis_0"), val = int32(0)]; + int32 gather_289_batch_dims_0 = const()[name = string("gather_289_batch_dims_0"), val = int32(0)]; + bool gather_289_validate_indices_0 = const()[name = string("gather_289_validate_indices_0"), val = bool(false)]; + string var_3004_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3004_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_289_to_uint16 = const()[name = string("select_289_to_uint16"), val = uint16(2)]; + tensor var_3004_shape_cast_fp16_to_uint16 = cast(dtype = var_3004_shape_cast_fp16_to_uint16_dtype_0, x = var_3004_shape_cast_fp16)[name = string("cast_132")]; + uint16 gather_289_cast_uint16 = gather(axis = gather_289_axis_0, batch_dims = gather_289_batch_dims_0, indices = select_289_to_uint16, validate_indices = gather_289_validate_indices_0, x = var_3004_shape_cast_fp16_to_uint16)[name = string("gather_289_cast_uint16")]; + string gather_289_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_289_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_290 = const()[name = string("gather_290"), val = int32(64)]; + tensor var_3011_axes_0 = const()[name = string("op_3011_axes_0"), val = tensor([2])]; + tensor var_3011_cast_fp16 = expand_dims(axes = var_3011_axes_0, x = var_2985_cast_fp16)[name = string("op_3011_cast_fp16")]; + tensor shape_322_cast_fp16 = shape(x = var_3011_cast_fp16)[name = string("shape_322_cast_fp16")]; + int32 concat_300_axis_0 = const()[name = string("concat_300_axis_0"), val = int32(0)]; + bool concat_300_interleave_0 = const()[name = string("concat_300_interleave_0"), val = bool(false)]; + int32 gather_289_cast_uint16_to_int32 = cast(dtype = gather_289_cast_uint16_to_int32_dtype_0, x = gather_289_cast_uint16)[name = string("cast_131")]; + tensor concat_300 = concat(axis = concat_300_axis_0, interleave = concat_300_interleave_0, values = (gather_287, gather_288, var_89, gather_289_cast_uint16_to_int32, gather_290))[name = string("concat_300")]; + tensor real_div_31 = real_div(x = concat_300, y = shape_322_cast_fp16)[name = string("real_div_31")]; + tensor hidden_states_465_cast_fp16 = tile(reps = real_div_31, x = var_3011_cast_fp16)[name = string("hidden_states_465_cast_fp16")]; + tensor concat_301x = const()[name = string("concat_301x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_63_cast_fp16 = reshape(shape = concat_301x, x = hidden_states_465_cast_fp16)[name = string("value_states_63_cast_fp16")]; + tensor var_3021_shape_cast_fp16 = shape(x = key_states_63_cast_fp16)[name = string("op_3021_shape_cast_fp16")]; + int32 gather_291_axis_0 = const()[name = string("gather_291_axis_0"), val = int32(0)]; + int32 gather_291_batch_dims_0 = const()[name = string("gather_291_batch_dims_0"), val = int32(0)]; + bool gather_291_validate_indices_0 = const()[name = string("gather_291_validate_indices_0"), val = bool(false)]; + string var_3021_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3021_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_291_to_uint16 = const()[name = string("select_291_to_uint16"), val = uint16(2)]; + tensor var_3021_shape_cast_fp16_to_uint16 = cast(dtype = var_3021_shape_cast_fp16_to_uint16_dtype_0, x = var_3021_shape_cast_fp16)[name = string("cast_130")]; + uint16 gather_291_cast_uint16 = gather(axis = gather_291_axis_0, batch_dims = gather_291_batch_dims_0, indices = select_291_to_uint16, validate_indices = gather_291_validate_indices_0, x = var_3021_shape_cast_fp16_to_uint16)[name = string("gather_291_cast_uint16")]; + string gather_291_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_291_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(1)]; + int32 concat_302_values1_0 = const()[name = string("concat_302_values1_0"), val = int32(1)]; + int32 concat_302_values2_0 = const()[name = string("concat_302_values2_0"), val = int32(0)]; + int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)]; + bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)]; + int32 gather_291_cast_uint16_to_int32 = cast(dtype = gather_291_cast_uint16_to_int32_dtype_0, x = gather_291_cast_uint16)[name = string("cast_129")]; + tensor concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, concat_302_values1_0, concat_302_values2_0, gather_291_cast_uint16_to_int32))[name = string("concat_302")]; + tensor causal_mask_33_begin_0 = const()[name = string("causal_mask_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_33_end_mask_0 = const()[name = string("causal_mask_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_33_cast_fp16 = slice_by_index(begin = causal_mask_33_begin_0, end = concat_302, end_mask = causal_mask_33_end_mask_0, x = causal_mask)[name = string("causal_mask_33_cast_fp16")]; + tensor attn_output_61_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_33_cast_fp16, key = key_states_63_cast_fp16, query = query_states_63_cast_fp16, value = value_states_63_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_3027_perm_0 = const()[name = string("op_3027_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_303_axis_0 = const()[name = string("concat_303_axis_0"), val = int32(0)]; + bool concat_303_interleave_0 = const()[name = string("concat_303_interleave_0"), val = bool(false)]; + int32 gather_275_cast_uint16_to_int32 = cast(dtype = gather_275_cast_uint16_to_int32_dtype_0, x = gather_275_cast_uint16)[name = string("cast_128")]; + tensor concat_303 = concat(axis = concat_303_axis_0, interleave = concat_303_interleave_0, values = (gather_274, gather_275_cast_uint16_to_int32, var_85))[name = string("concat_303")]; + tensor var_3027_cast_fp16 = transpose(perm = var_3027_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_64")]; + tensor input_121_cast_fp16 = reshape(shape = concat_303, x = var_3027_cast_fp16)[name = string("input_121_cast_fp16")]; + tensor model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110433600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110894464))))[name = string("model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_108_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized, x = input_121_cast_fp16)[name = string("linear_108_cast_fp16")]; + tensor hidden_states_469_cast_fp16 = add(x = hidden_states_449_cast_fp16, y = linear_108_cast_fp16)[name = string("hidden_states_469_cast_fp16")]; + fp16 var_80_promoted_31_to_fp16 = const()[name = string("op_80_promoted_31_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3036_cast_fp16 = pow(x = hidden_states_469_cast_fp16, y = var_80_promoted_31_to_fp16)[name = string("op_3036_cast_fp16")]; + tensor variance_63_axes_0 = const()[name = string("variance_63_axes_0"), val = tensor([-1])]; + bool variance_63_keep_dims_0 = const()[name = string("variance_63_keep_dims_0"), val = bool(true)]; + tensor variance_63_cast_fp16 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = var_3036_cast_fp16)[name = string("variance_63_cast_fp16")]; + fp16 var_3039_to_fp16 = const()[name = string("op_3039_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3040_cast_fp16 = add(x = variance_63_cast_fp16, y = var_3039_to_fp16)[name = string("op_3040_cast_fp16")]; + fp32 var_3041_epsilon_0 = const()[name = string("op_3041_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3041_cast_fp16 = rsqrt(epsilon = var_3041_epsilon_0, x = var_3040_cast_fp16)[name = string("op_3041_cast_fp16")]; + tensor hidden_states_473_cast_fp16 = mul(x = hidden_states_469_cast_fp16, y = var_3041_cast_fp16)[name = string("hidden_states_473_cast_fp16")]; + tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110952128)))]; + tensor input_123_cast_fp16 = mul(x = model_model_layers_15_post_attention_layernorm_weight_to_fp16, y = hidden_states_473_cast_fp16)[name = string("input_123_cast_fp16")]; + tensor model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110954112))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112182976))))[name = string("model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_109_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_109_cast_fp16")]; + tensor var_3053_cast_fp16 = silu(x = linear_109_cast_fp16)[name = string("op_3053_cast_fp16")]; + tensor model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112336640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113565504))))[name = string("model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_110_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_110_cast_fp16")]; + tensor input_127_cast_fp16 = mul(x = var_3053_cast_fp16, y = linear_110_cast_fp16)[name = string("input_127_cast_fp16")]; + tensor model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113719168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114948032))))[name = string("model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_111_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized, x = input_127_cast_fp16)[name = string("linear_111_cast_fp16")]; + tensor hidden_states_479_cast_fp16 = add(x = hidden_states_469_cast_fp16, y = linear_111_cast_fp16)[name = string("hidden_states_479_cast_fp16")]; + fp16 var_80_promoted_32_to_fp16 = const()[name = string("op_80_promoted_32_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3066_cast_fp16 = pow(x = hidden_states_479_cast_fp16, y = var_80_promoted_32_to_fp16)[name = string("op_3066_cast_fp16")]; + tensor variance_65_axes_0 = const()[name = string("variance_65_axes_0"), val = tensor([-1])]; + bool variance_65_keep_dims_0 = const()[name = string("variance_65_keep_dims_0"), val = bool(true)]; + tensor variance_65_cast_fp16 = reduce_mean(axes = variance_65_axes_0, keep_dims = variance_65_keep_dims_0, x = var_3066_cast_fp16)[name = string("variance_65_cast_fp16")]; + fp16 var_3069_to_fp16 = const()[name = string("op_3069_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3070_cast_fp16 = add(x = variance_65_cast_fp16, y = var_3069_to_fp16)[name = string("op_3070_cast_fp16")]; + fp32 var_3071_epsilon_0 = const()[name = string("op_3071_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3071_cast_fp16 = rsqrt(epsilon = var_3071_epsilon_0, x = var_3070_cast_fp16)[name = string("op_3071_cast_fp16")]; + tensor hidden_states_483_cast_fp16 = mul(x = hidden_states_479_cast_fp16, y = var_3071_cast_fp16)[name = string("hidden_states_483_cast_fp16")]; + tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115101696)))]; + tensor hidden_states_487_cast_fp16 = mul(x = model_model_layers_16_input_layernorm_weight_to_fp16, y = hidden_states_483_cast_fp16)[name = string("hidden_states_487_cast_fp16")]; + tensor var_3082_shape_cast_fp16 = shape(x = hidden_states_487_cast_fp16)[name = string("op_3082_shape_cast_fp16")]; + int32 gather_292 = const()[name = string("gather_292"), val = int32(1)]; + int32 gather_293_axis_0 = const()[name = string("gather_293_axis_0"), val = int32(0)]; + int32 gather_293_batch_dims_0 = const()[name = string("gather_293_batch_dims_0"), val = int32(0)]; + bool gather_293_validate_indices_0 = const()[name = string("gather_293_validate_indices_0"), val = bool(false)]; + string var_3082_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3082_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_293_to_uint16 = const()[name = string("select_293_to_uint16"), val = uint16(1)]; + tensor var_3082_shape_cast_fp16_to_uint16 = cast(dtype = var_3082_shape_cast_fp16_to_uint16_dtype_0, x = var_3082_shape_cast_fp16)[name = string("cast_127")]; + uint16 gather_293_cast_uint16 = gather(axis = gather_293_axis_0, batch_dims = gather_293_batch_dims_0, indices = select_293_to_uint16, validate_indices = gather_293_validate_indices_0, x = var_3082_shape_cast_fp16_to_uint16)[name = string("gather_293_cast_uint16")]; + string gather_293_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_293_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115103680))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115564544))))[name = string("model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_112_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_487_cast_fp16)[name = string("linear_112_cast_fp16")]; + tensor model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115622208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115775872))))[name = string("model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_487_cast_fp16)[name = string("linear_113_cast_fp16")]; + tensor model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115795136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115948800))))[name = string("model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_114_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_487_cast_fp16)[name = string("linear_114_cast_fp16")]; + tensor concat_304x = const()[name = string("concat_304x"), val = tensor([1, -1, 15, 64])]; + tensor var_3091_cast_fp16 = reshape(shape = concat_304x, x = linear_112_cast_fp16)[name = string("op_3091_cast_fp16")]; + tensor q_33_perm_0 = const()[name = string("q_33_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_305x = const()[name = string("concat_305x"), val = tensor([1, -1, 5, 64])]; + tensor var_3094_cast_fp16 = reshape(shape = concat_305x, x = linear_113_cast_fp16)[name = string("op_3094_cast_fp16")]; + tensor k_33_perm_0 = const()[name = string("k_33_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_306x = const()[name = string("concat_306x"), val = tensor([1, -1, 5, 64])]; + tensor var_3097_cast_fp16 = reshape(shape = concat_306x, x = linear_114_cast_fp16)[name = string("op_3097_cast_fp16")]; + tensor v_state_33_perm_0 = const()[name = string("v_state_33_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_33_cast_fp16 = transpose(perm = q_33_perm_0, x = var_3091_cast_fp16)[name = string("transpose_63")]; + tensor var_3101_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3101_cast_fp16")]; + tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; + tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3112_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_3112_cast_fp16")]; + bool var_3114_interleave_0 = const()[name = string("op_3114_interleave_0"), val = bool(false)]; + tensor var_3114_cast_fp16 = concat(axis = var_85, interleave = var_3114_interleave_0, values = (var_3112_cast_fp16, x1_65_cast_fp16))[name = string("op_3114_cast_fp16")]; + tensor var_3115_cast_fp16 = mul(x = var_3114_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3115_cast_fp16")]; + tensor query_states_67_cast_fp16 = add(x = var_3101_cast_fp16, y = var_3115_cast_fp16)[name = string("query_states_67_cast_fp16")]; + tensor k_33_cast_fp16 = transpose(perm = k_33_perm_0, x = var_3094_cast_fp16)[name = string("transpose_62")]; + tensor var_3117_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3117_cast_fp16")]; + tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; + tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; + fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3128_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_3128_cast_fp16")]; + bool var_3130_interleave_0 = const()[name = string("op_3130_interleave_0"), val = bool(false)]; + tensor var_3130_cast_fp16 = concat(axis = var_85, interleave = var_3130_interleave_0, values = (var_3128_cast_fp16, x1_67_cast_fp16))[name = string("op_3130_cast_fp16")]; + tensor var_3131_cast_fp16 = mul(x = var_3130_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3131_cast_fp16")]; + tensor k_state_33_cast_fp16 = add(x = var_3117_cast_fp16, y = var_3131_cast_fp16)[name = string("k_state_33_cast_fp16")]; + tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; + tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; + tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; + tensor concat_309_values0_0 = const()[name = string("concat_309_values0_0"), val = tensor([16])]; + int32 concat_309_axis_0 = const()[name = string("concat_309_axis_0"), val = int32(0)]; + bool concat_309_interleave_0 = const()[name = string("concat_309_interleave_0"), val = bool(false)]; + tensor concat_309 = concat(axis = concat_309_axis_0, interleave = concat_309_interleave_0, values = (concat_309_values0_0, expand_dims_192, expand_dims_193, expand_dims_2, expand_dims_195))[name = string("concat_309")]; + tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_309, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = k_state_33_cast_fp16, x = coreml_update_state_94)[name = string("key_cache_internal_tensor_assign_17_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_17_cast_fp16, input = key_cache)[name = string("coreml_update_state_96_write_state")]; + tensor coreml_update_state_96 = read_state(input = key_cache)[name = string("coreml_update_state_96")]; + tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_33_cast_fp16 = transpose(perm = v_state_33_perm_0, x = var_3097_cast_fp16)[name = string("transpose_61")]; + tensor value_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_309, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = v_state_33_cast_fp16, x = coreml_update_state_95)[name = string("value_cache_internal_tensor_assign_17_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_17_cast_fp16, input = value_cache)[name = string("coreml_update_state_97_write_state")]; + tensor coreml_update_state_97 = read_state(input = value_cache)[name = string("coreml_update_state_97")]; + tensor var_3154_begin_0 = const()[name = string("op_3154_begin_0"), val = tensor([16, 0, 0, 0, 0])]; + tensor var_3154_end_0 = const()[name = string("op_3154_end_0"), val = tensor([17, 1, 5, 2048, 64])]; + tensor var_3154_end_mask_0 = const()[name = string("op_3154_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3154_squeeze_mask_0 = const()[name = string("op_3154_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3154_cast_fp16 = slice_by_index(begin = var_3154_begin_0, end = var_3154_end_0, end_mask = var_3154_end_mask_0, squeeze_mask = var_3154_squeeze_mask_0, x = coreml_update_state_96)[name = string("op_3154_cast_fp16")]; + tensor var_3157_begin_0 = const()[name = string("op_3157_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3157_end_mask_0 = const()[name = string("op_3157_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3157_cast_fp16 = slice_by_index(begin = var_3157_begin_0, end = concat_11, end_mask = var_3157_end_mask_0, x = var_3154_cast_fp16)[name = string("op_3157_cast_fp16")]; + tensor var_3159_begin_0 = const()[name = string("op_3159_begin_0"), val = tensor([16, 0, 0, 0, 0])]; + tensor var_3159_end_0 = const()[name = string("op_3159_end_0"), val = tensor([17, 1, 5, 2048, 64])]; + tensor var_3159_end_mask_0 = const()[name = string("op_3159_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3159_squeeze_mask_0 = const()[name = string("op_3159_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3159_cast_fp16 = slice_by_index(begin = var_3159_begin_0, end = var_3159_end_0, end_mask = var_3159_end_mask_0, squeeze_mask = var_3159_squeeze_mask_0, x = coreml_update_state_97)[name = string("op_3159_cast_fp16")]; + tensor var_3162_begin_0 = const()[name = string("op_3162_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3162_end_mask_0 = const()[name = string("op_3162_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3162_cast_fp16 = slice_by_index(begin = var_3162_begin_0, end = concat_11, end_mask = var_3162_end_mask_0, x = var_3159_cast_fp16)[name = string("op_3162_cast_fp16")]; + tensor var_3164_shape_cast_fp16 = shape(x = var_3157_cast_fp16)[name = string("op_3164_shape_cast_fp16")]; + int32 gather_301 = const()[name = string("gather_301"), val = int32(1)]; + int32 gather_302 = const()[name = string("gather_302"), val = int32(5)]; + int32 gather_303_axis_0 = const()[name = string("gather_303_axis_0"), val = int32(0)]; + int32 gather_303_batch_dims_0 = const()[name = string("gather_303_batch_dims_0"), val = int32(0)]; + bool gather_303_validate_indices_0 = const()[name = string("gather_303_validate_indices_0"), val = bool(false)]; + string var_3164_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3164_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_303_to_uint16 = const()[name = string("select_303_to_uint16"), val = uint16(2)]; + tensor var_3164_shape_cast_fp16_to_uint16 = cast(dtype = var_3164_shape_cast_fp16_to_uint16_dtype_0, x = var_3164_shape_cast_fp16)[name = string("cast_126")]; + uint16 gather_303_cast_uint16 = gather(axis = gather_303_axis_0, batch_dims = gather_303_batch_dims_0, indices = select_303_to_uint16, validate_indices = gather_303_validate_indices_0, x = var_3164_shape_cast_fp16_to_uint16)[name = string("gather_303_cast_uint16")]; + string gather_303_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_303_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_304 = const()[name = string("gather_304"), val = int32(64)]; + tensor var_3171_axes_0 = const()[name = string("op_3171_axes_0"), val = tensor([2])]; + tensor var_3171_cast_fp16 = expand_dims(axes = var_3171_axes_0, x = var_3157_cast_fp16)[name = string("op_3171_cast_fp16")]; + tensor shape_337_cast_fp16 = shape(x = var_3171_cast_fp16)[name = string("shape_337_cast_fp16")]; + int32 concat_317_axis_0 = const()[name = string("concat_317_axis_0"), val = int32(0)]; + bool concat_317_interleave_0 = const()[name = string("concat_317_interleave_0"), val = bool(false)]; + int32 gather_303_cast_uint16_to_int32 = cast(dtype = gather_303_cast_uint16_to_int32_dtype_0, x = gather_303_cast_uint16)[name = string("cast_125")]; + tensor concat_317 = concat(axis = concat_317_axis_0, interleave = concat_317_interleave_0, values = (gather_301, gather_302, var_89, gather_303_cast_uint16_to_int32, gather_304))[name = string("concat_317")]; + tensor real_div_32 = real_div(x = concat_317, y = shape_337_cast_fp16)[name = string("real_div_32")]; + tensor hidden_states_491_cast_fp16 = tile(reps = real_div_32, x = var_3171_cast_fp16)[name = string("hidden_states_491_cast_fp16")]; + tensor concat_318x = const()[name = string("concat_318x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_67_cast_fp16 = reshape(shape = concat_318x, x = hidden_states_491_cast_fp16)[name = string("key_states_67_cast_fp16")]; + tensor var_3181_shape_cast_fp16 = shape(x = var_3162_cast_fp16)[name = string("op_3181_shape_cast_fp16")]; + int32 gather_305 = const()[name = string("gather_305"), val = int32(1)]; + int32 gather_306 = const()[name = string("gather_306"), val = int32(5)]; + int32 gather_307_axis_0 = const()[name = string("gather_307_axis_0"), val = int32(0)]; + int32 gather_307_batch_dims_0 = const()[name = string("gather_307_batch_dims_0"), val = int32(0)]; + bool gather_307_validate_indices_0 = const()[name = string("gather_307_validate_indices_0"), val = bool(false)]; + string var_3181_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3181_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_307_to_uint16 = const()[name = string("select_307_to_uint16"), val = uint16(2)]; + tensor var_3181_shape_cast_fp16_to_uint16 = cast(dtype = var_3181_shape_cast_fp16_to_uint16_dtype_0, x = var_3181_shape_cast_fp16)[name = string("cast_124")]; + uint16 gather_307_cast_uint16 = gather(axis = gather_307_axis_0, batch_dims = gather_307_batch_dims_0, indices = select_307_to_uint16, validate_indices = gather_307_validate_indices_0, x = var_3181_shape_cast_fp16_to_uint16)[name = string("gather_307_cast_uint16")]; + string gather_307_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_307_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_308 = const()[name = string("gather_308"), val = int32(64)]; + tensor var_3188_axes_0 = const()[name = string("op_3188_axes_0"), val = tensor([2])]; + tensor var_3188_cast_fp16 = expand_dims(axes = var_3188_axes_0, x = var_3162_cast_fp16)[name = string("op_3188_cast_fp16")]; + tensor shape_342_cast_fp16 = shape(x = var_3188_cast_fp16)[name = string("shape_342_cast_fp16")]; + int32 concat_319_axis_0 = const()[name = string("concat_319_axis_0"), val = int32(0)]; + bool concat_319_interleave_0 = const()[name = string("concat_319_interleave_0"), val = bool(false)]; + int32 gather_307_cast_uint16_to_int32 = cast(dtype = gather_307_cast_uint16_to_int32_dtype_0, x = gather_307_cast_uint16)[name = string("cast_123")]; + tensor concat_319 = concat(axis = concat_319_axis_0, interleave = concat_319_interleave_0, values = (gather_305, gather_306, var_89, gather_307_cast_uint16_to_int32, gather_308))[name = string("concat_319")]; + tensor real_div_33 = real_div(x = concat_319, y = shape_342_cast_fp16)[name = string("real_div_33")]; + tensor hidden_states_495_cast_fp16 = tile(reps = real_div_33, x = var_3188_cast_fp16)[name = string("hidden_states_495_cast_fp16")]; + tensor concat_320x = const()[name = string("concat_320x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_67_cast_fp16 = reshape(shape = concat_320x, x = hidden_states_495_cast_fp16)[name = string("value_states_67_cast_fp16")]; + tensor var_3198_shape_cast_fp16 = shape(x = key_states_67_cast_fp16)[name = string("op_3198_shape_cast_fp16")]; + int32 gather_309_axis_0 = const()[name = string("gather_309_axis_0"), val = int32(0)]; + int32 gather_309_batch_dims_0 = const()[name = string("gather_309_batch_dims_0"), val = int32(0)]; + bool gather_309_validate_indices_0 = const()[name = string("gather_309_validate_indices_0"), val = bool(false)]; + string var_3198_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3198_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_309_to_uint16 = const()[name = string("select_309_to_uint16"), val = uint16(2)]; + tensor var_3198_shape_cast_fp16_to_uint16 = cast(dtype = var_3198_shape_cast_fp16_to_uint16_dtype_0, x = var_3198_shape_cast_fp16)[name = string("cast_122")]; + uint16 gather_309_cast_uint16 = gather(axis = gather_309_axis_0, batch_dims = gather_309_batch_dims_0, indices = select_309_to_uint16, validate_indices = gather_309_validate_indices_0, x = var_3198_shape_cast_fp16_to_uint16)[name = string("gather_309_cast_uint16")]; + string gather_309_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_309_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_321_values0_0 = const()[name = string("concat_321_values0_0"), val = int32(1)]; + int32 concat_321_values1_0 = const()[name = string("concat_321_values1_0"), val = int32(1)]; + int32 concat_321_values2_0 = const()[name = string("concat_321_values2_0"), val = int32(0)]; + int32 concat_321_axis_0 = const()[name = string("concat_321_axis_0"), val = int32(0)]; + bool concat_321_interleave_0 = const()[name = string("concat_321_interleave_0"), val = bool(false)]; + int32 gather_309_cast_uint16_to_int32 = cast(dtype = gather_309_cast_uint16_to_int32_dtype_0, x = gather_309_cast_uint16)[name = string("cast_121")]; + tensor concat_321 = concat(axis = concat_321_axis_0, interleave = concat_321_interleave_0, values = (concat_321_values0_0, concat_321_values1_0, concat_321_values2_0, gather_309_cast_uint16_to_int32))[name = string("concat_321")]; + tensor causal_mask_35_begin_0 = const()[name = string("causal_mask_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_35_end_mask_0 = const()[name = string("causal_mask_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_35_cast_fp16 = slice_by_index(begin = causal_mask_35_begin_0, end = concat_321, end_mask = causal_mask_35_end_mask_0, x = causal_mask)[name = string("causal_mask_35_cast_fp16")]; + tensor attn_output_65_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_35_cast_fp16, key = key_states_67_cast_fp16, query = query_states_67_cast_fp16, value = value_states_67_cast_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor var_3204_perm_0 = const()[name = string("op_3204_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_322_axis_0 = const()[name = string("concat_322_axis_0"), val = int32(0)]; + bool concat_322_interleave_0 = const()[name = string("concat_322_interleave_0"), val = bool(false)]; + int32 gather_293_cast_uint16_to_int32 = cast(dtype = gather_293_cast_uint16_to_int32_dtype_0, x = gather_293_cast_uint16)[name = string("cast_120")]; + tensor concat_322 = concat(axis = concat_322_axis_0, interleave = concat_322_interleave_0, values = (gather_292, gather_293_cast_uint16_to_int32, var_85))[name = string("concat_322")]; + tensor var_3204_cast_fp16 = transpose(perm = var_3204_perm_0, x = attn_output_65_cast_fp16)[name = string("transpose_60")]; + tensor input_129_cast_fp16 = reshape(shape = concat_322, x = var_3204_cast_fp16)[name = string("input_129_cast_fp16")]; + tensor model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115968064))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116428928))))[name = string("model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_115_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized, x = input_129_cast_fp16)[name = string("linear_115_cast_fp16")]; + tensor hidden_states_499_cast_fp16 = add(x = hidden_states_479_cast_fp16, y = linear_115_cast_fp16)[name = string("hidden_states_499_cast_fp16")]; + fp16 var_80_promoted_33_to_fp16 = const()[name = string("op_80_promoted_33_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3213_cast_fp16 = pow(x = hidden_states_499_cast_fp16, y = var_80_promoted_33_to_fp16)[name = string("op_3213_cast_fp16")]; + tensor variance_67_axes_0 = const()[name = string("variance_67_axes_0"), val = tensor([-1])]; + bool variance_67_keep_dims_0 = const()[name = string("variance_67_keep_dims_0"), val = bool(true)]; + tensor variance_67_cast_fp16 = reduce_mean(axes = variance_67_axes_0, keep_dims = variance_67_keep_dims_0, x = var_3213_cast_fp16)[name = string("variance_67_cast_fp16")]; + fp16 var_3216_to_fp16 = const()[name = string("op_3216_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3217_cast_fp16 = add(x = variance_67_cast_fp16, y = var_3216_to_fp16)[name = string("op_3217_cast_fp16")]; + fp32 var_3218_epsilon_0 = const()[name = string("op_3218_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3218_cast_fp16 = rsqrt(epsilon = var_3218_epsilon_0, x = var_3217_cast_fp16)[name = string("op_3218_cast_fp16")]; + tensor hidden_states_503_cast_fp16 = mul(x = hidden_states_499_cast_fp16, y = var_3218_cast_fp16)[name = string("hidden_states_503_cast_fp16")]; + tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116486592)))]; + tensor input_131_cast_fp16 = mul(x = model_model_layers_16_post_attention_layernorm_weight_to_fp16, y = hidden_states_503_cast_fp16)[name = string("input_131_cast_fp16")]; + tensor model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116488576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117717440))))[name = string("model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_116_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = string("linear_116_cast_fp16")]; + tensor var_3230_cast_fp16 = silu(x = linear_116_cast_fp16)[name = string("op_3230_cast_fp16")]; + tensor model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117871104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119099968))))[name = string("model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_117_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = string("linear_117_cast_fp16")]; + tensor input_135_cast_fp16 = mul(x = var_3230_cast_fp16, y = linear_117_cast_fp16)[name = string("input_135_cast_fp16")]; + tensor model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119253632))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120482496))))[name = string("model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_118_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized, x = input_135_cast_fp16)[name = string("linear_118_cast_fp16")]; + tensor hidden_states_509_cast_fp16 = add(x = hidden_states_499_cast_fp16, y = linear_118_cast_fp16)[name = string("hidden_states_509_cast_fp16")]; + fp16 var_80_promoted_34_to_fp16 = const()[name = string("op_80_promoted_34_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3243_cast_fp16 = pow(x = hidden_states_509_cast_fp16, y = var_80_promoted_34_to_fp16)[name = string("op_3243_cast_fp16")]; + tensor variance_69_axes_0 = const()[name = string("variance_69_axes_0"), val = tensor([-1])]; + bool variance_69_keep_dims_0 = const()[name = string("variance_69_keep_dims_0"), val = bool(true)]; + tensor variance_69_cast_fp16 = reduce_mean(axes = variance_69_axes_0, keep_dims = variance_69_keep_dims_0, x = var_3243_cast_fp16)[name = string("variance_69_cast_fp16")]; + fp16 var_3246_to_fp16 = const()[name = string("op_3246_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3247_cast_fp16 = add(x = variance_69_cast_fp16, y = var_3246_to_fp16)[name = string("op_3247_cast_fp16")]; + fp32 var_3248_epsilon_0 = const()[name = string("op_3248_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3248_cast_fp16 = rsqrt(epsilon = var_3248_epsilon_0, x = var_3247_cast_fp16)[name = string("op_3248_cast_fp16")]; + tensor hidden_states_513_cast_fp16 = mul(x = hidden_states_509_cast_fp16, y = var_3248_cast_fp16)[name = string("hidden_states_513_cast_fp16")]; + tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120636160)))]; + tensor hidden_states_517_cast_fp16 = mul(x = model_model_layers_17_input_layernorm_weight_to_fp16, y = hidden_states_513_cast_fp16)[name = string("hidden_states_517_cast_fp16")]; + tensor var_3259_shape_cast_fp16 = shape(x = hidden_states_517_cast_fp16)[name = string("op_3259_shape_cast_fp16")]; + int32 gather_310 = const()[name = string("gather_310"), val = int32(1)]; + int32 gather_311_axis_0 = const()[name = string("gather_311_axis_0"), val = int32(0)]; + int32 gather_311_batch_dims_0 = const()[name = string("gather_311_batch_dims_0"), val = int32(0)]; + bool gather_311_validate_indices_0 = const()[name = string("gather_311_validate_indices_0"), val = bool(false)]; + string var_3259_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3259_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_311_to_uint16 = const()[name = string("select_311_to_uint16"), val = uint16(1)]; + tensor var_3259_shape_cast_fp16_to_uint16 = cast(dtype = var_3259_shape_cast_fp16_to_uint16_dtype_0, x = var_3259_shape_cast_fp16)[name = string("cast_119")]; + uint16 gather_311_cast_uint16 = gather(axis = gather_311_axis_0, batch_dims = gather_311_batch_dims_0, indices = select_311_to_uint16, validate_indices = gather_311_validate_indices_0, x = var_3259_shape_cast_fp16_to_uint16)[name = string("gather_311_cast_uint16")]; + string gather_311_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_311_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120638144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121099008))))[name = string("model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_119_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_517_cast_fp16)[name = string("linear_119_cast_fp16")]; + tensor model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121156672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121310336))))[name = string("model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_120_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_517_cast_fp16)[name = string("linear_120_cast_fp16")]; + tensor model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121329600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121483264))))[name = string("model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_517_cast_fp16)[name = string("linear_121_cast_fp16")]; + tensor concat_323x = const()[name = string("concat_323x"), val = tensor([1, -1, 15, 64])]; + tensor var_3268_cast_fp16 = reshape(shape = concat_323x, x = linear_119_cast_fp16)[name = string("op_3268_cast_fp16")]; + tensor q_35_perm_0 = const()[name = string("q_35_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_324x = const()[name = string("concat_324x"), val = tensor([1, -1, 5, 64])]; + tensor var_3271_cast_fp16 = reshape(shape = concat_324x, x = linear_120_cast_fp16)[name = string("op_3271_cast_fp16")]; + tensor k_35_perm_0 = const()[name = string("k_35_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_325x = const()[name = string("concat_325x"), val = tensor([1, -1, 5, 64])]; + tensor var_3274_cast_fp16 = reshape(shape = concat_325x, x = linear_121_cast_fp16)[name = string("op_3274_cast_fp16")]; + tensor v_state_35_perm_0 = const()[name = string("v_state_35_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_35_cast_fp16 = transpose(perm = q_35_perm_0, x = var_3268_cast_fp16)[name = string("transpose_59")]; + tensor var_3278_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3278_cast_fp16")]; + tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; + tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; + fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3289_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_3289_cast_fp16")]; + bool var_3291_interleave_0 = const()[name = string("op_3291_interleave_0"), val = bool(false)]; + tensor var_3291_cast_fp16 = concat(axis = var_85, interleave = var_3291_interleave_0, values = (var_3289_cast_fp16, x1_69_cast_fp16))[name = string("op_3291_cast_fp16")]; + tensor var_3292_cast_fp16 = mul(x = var_3291_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3292_cast_fp16")]; + tensor query_states_71_cast_fp16 = add(x = var_3278_cast_fp16, y = var_3292_cast_fp16)[name = string("query_states_71_cast_fp16")]; + tensor k_35_cast_fp16 = transpose(perm = k_35_perm_0, x = var_3271_cast_fp16)[name = string("transpose_58")]; + tensor var_3294_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3294_cast_fp16")]; + tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; + tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3305_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_3305_cast_fp16")]; + bool var_3307_interleave_0 = const()[name = string("op_3307_interleave_0"), val = bool(false)]; + tensor var_3307_cast_fp16 = concat(axis = var_85, interleave = var_3307_interleave_0, values = (var_3305_cast_fp16, x1_71_cast_fp16))[name = string("op_3307_cast_fp16")]; + tensor var_3308_cast_fp16 = mul(x = var_3307_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3308_cast_fp16")]; + tensor k_state_35_cast_fp16 = add(x = var_3294_cast_fp16, y = var_3308_cast_fp16)[name = string("k_state_35_cast_fp16")]; + tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([0])]; + tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; + tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; + tensor concat_328_values0_0 = const()[name = string("concat_328_values0_0"), val = tensor([17])]; + int32 concat_328_axis_0 = const()[name = string("concat_328_axis_0"), val = int32(0)]; + bool concat_328_interleave_0 = const()[name = string("concat_328_interleave_0"), val = bool(false)]; + tensor concat_328 = concat(axis = concat_328_axis_0, interleave = concat_328_interleave_0, values = (concat_328_values0_0, expand_dims_204, expand_dims_205, expand_dims_2, expand_dims_207))[name = string("concat_328")]; + tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_328, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = k_state_35_cast_fp16, x = coreml_update_state_96)[name = string("key_cache_internal_tensor_assign_18_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_18_cast_fp16, input = key_cache)[name = string("coreml_update_state_98_write_state")]; + tensor coreml_update_state_98 = read_state(input = key_cache)[name = string("coreml_update_state_98")]; + tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_35_cast_fp16 = transpose(perm = v_state_35_perm_0, x = var_3274_cast_fp16)[name = string("transpose_57")]; + tensor value_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_328, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = v_state_35_cast_fp16, x = coreml_update_state_97)[name = string("value_cache_internal_tensor_assign_18_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_18_cast_fp16, input = value_cache)[name = string("coreml_update_state_99_write_state")]; + tensor coreml_update_state_99 = read_state(input = value_cache)[name = string("coreml_update_state_99")]; + tensor var_3331_begin_0 = const()[name = string("op_3331_begin_0"), val = tensor([17, 0, 0, 0, 0])]; + tensor var_3331_end_0 = const()[name = string("op_3331_end_0"), val = tensor([18, 1, 5, 2048, 64])]; + tensor var_3331_end_mask_0 = const()[name = string("op_3331_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3331_squeeze_mask_0 = const()[name = string("op_3331_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3331_cast_fp16 = slice_by_index(begin = var_3331_begin_0, end = var_3331_end_0, end_mask = var_3331_end_mask_0, squeeze_mask = var_3331_squeeze_mask_0, x = coreml_update_state_98)[name = string("op_3331_cast_fp16")]; + tensor var_3334_begin_0 = const()[name = string("op_3334_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3334_end_mask_0 = const()[name = string("op_3334_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3334_cast_fp16 = slice_by_index(begin = var_3334_begin_0, end = concat_11, end_mask = var_3334_end_mask_0, x = var_3331_cast_fp16)[name = string("op_3334_cast_fp16")]; + tensor var_3336_begin_0 = const()[name = string("op_3336_begin_0"), val = tensor([17, 0, 0, 0, 0])]; + tensor var_3336_end_0 = const()[name = string("op_3336_end_0"), val = tensor([18, 1, 5, 2048, 64])]; + tensor var_3336_end_mask_0 = const()[name = string("op_3336_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3336_squeeze_mask_0 = const()[name = string("op_3336_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3336_cast_fp16 = slice_by_index(begin = var_3336_begin_0, end = var_3336_end_0, end_mask = var_3336_end_mask_0, squeeze_mask = var_3336_squeeze_mask_0, x = coreml_update_state_99)[name = string("op_3336_cast_fp16")]; + tensor var_3339_begin_0 = const()[name = string("op_3339_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3339_end_mask_0 = const()[name = string("op_3339_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3339_cast_fp16 = slice_by_index(begin = var_3339_begin_0, end = concat_11, end_mask = var_3339_end_mask_0, x = var_3336_cast_fp16)[name = string("op_3339_cast_fp16")]; + tensor var_3341_shape_cast_fp16 = shape(x = var_3334_cast_fp16)[name = string("op_3341_shape_cast_fp16")]; + int32 gather_319 = const()[name = string("gather_319"), val = int32(1)]; + int32 gather_320 = const()[name = string("gather_320"), val = int32(5)]; + int32 gather_321_axis_0 = const()[name = string("gather_321_axis_0"), val = int32(0)]; + int32 gather_321_batch_dims_0 = const()[name = string("gather_321_batch_dims_0"), val = int32(0)]; + bool gather_321_validate_indices_0 = const()[name = string("gather_321_validate_indices_0"), val = bool(false)]; + string var_3341_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3341_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_321_to_uint16 = const()[name = string("select_321_to_uint16"), val = uint16(2)]; + tensor var_3341_shape_cast_fp16_to_uint16 = cast(dtype = var_3341_shape_cast_fp16_to_uint16_dtype_0, x = var_3341_shape_cast_fp16)[name = string("cast_118")]; + uint16 gather_321_cast_uint16 = gather(axis = gather_321_axis_0, batch_dims = gather_321_batch_dims_0, indices = select_321_to_uint16, validate_indices = gather_321_validate_indices_0, x = var_3341_shape_cast_fp16_to_uint16)[name = string("gather_321_cast_uint16")]; + string gather_321_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_321_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_322 = const()[name = string("gather_322"), val = int32(64)]; + tensor var_3348_axes_0 = const()[name = string("op_3348_axes_0"), val = tensor([2])]; + tensor var_3348_cast_fp16 = expand_dims(axes = var_3348_axes_0, x = var_3334_cast_fp16)[name = string("op_3348_cast_fp16")]; + tensor shape_357_cast_fp16 = shape(x = var_3348_cast_fp16)[name = string("shape_357_cast_fp16")]; + int32 concat_336_axis_0 = const()[name = string("concat_336_axis_0"), val = int32(0)]; + bool concat_336_interleave_0 = const()[name = string("concat_336_interleave_0"), val = bool(false)]; + int32 gather_321_cast_uint16_to_int32 = cast(dtype = gather_321_cast_uint16_to_int32_dtype_0, x = gather_321_cast_uint16)[name = string("cast_117")]; + tensor concat_336 = concat(axis = concat_336_axis_0, interleave = concat_336_interleave_0, values = (gather_319, gather_320, var_89, gather_321_cast_uint16_to_int32, gather_322))[name = string("concat_336")]; + tensor real_div_34 = real_div(x = concat_336, y = shape_357_cast_fp16)[name = string("real_div_34")]; + tensor hidden_states_521_cast_fp16 = tile(reps = real_div_34, x = var_3348_cast_fp16)[name = string("hidden_states_521_cast_fp16")]; + tensor concat_337x = const()[name = string("concat_337x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_71_cast_fp16 = reshape(shape = concat_337x, x = hidden_states_521_cast_fp16)[name = string("key_states_71_cast_fp16")]; + tensor var_3358_shape_cast_fp16 = shape(x = var_3339_cast_fp16)[name = string("op_3358_shape_cast_fp16")]; + int32 gather_323 = const()[name = string("gather_323"), val = int32(1)]; + int32 gather_324 = const()[name = string("gather_324"), val = int32(5)]; + int32 gather_325_axis_0 = const()[name = string("gather_325_axis_0"), val = int32(0)]; + int32 gather_325_batch_dims_0 = const()[name = string("gather_325_batch_dims_0"), val = int32(0)]; + bool gather_325_validate_indices_0 = const()[name = string("gather_325_validate_indices_0"), val = bool(false)]; + string var_3358_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3358_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_325_to_uint16 = const()[name = string("select_325_to_uint16"), val = uint16(2)]; + tensor var_3358_shape_cast_fp16_to_uint16 = cast(dtype = var_3358_shape_cast_fp16_to_uint16_dtype_0, x = var_3358_shape_cast_fp16)[name = string("cast_116")]; + uint16 gather_325_cast_uint16 = gather(axis = gather_325_axis_0, batch_dims = gather_325_batch_dims_0, indices = select_325_to_uint16, validate_indices = gather_325_validate_indices_0, x = var_3358_shape_cast_fp16_to_uint16)[name = string("gather_325_cast_uint16")]; + string gather_325_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_325_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_326 = const()[name = string("gather_326"), val = int32(64)]; + tensor var_3365_axes_0 = const()[name = string("op_3365_axes_0"), val = tensor([2])]; + tensor var_3365_cast_fp16 = expand_dims(axes = var_3365_axes_0, x = var_3339_cast_fp16)[name = string("op_3365_cast_fp16")]; + tensor shape_362_cast_fp16 = shape(x = var_3365_cast_fp16)[name = string("shape_362_cast_fp16")]; + int32 concat_338_axis_0 = const()[name = string("concat_338_axis_0"), val = int32(0)]; + bool concat_338_interleave_0 = const()[name = string("concat_338_interleave_0"), val = bool(false)]; + int32 gather_325_cast_uint16_to_int32 = cast(dtype = gather_325_cast_uint16_to_int32_dtype_0, x = gather_325_cast_uint16)[name = string("cast_115")]; + tensor concat_338 = concat(axis = concat_338_axis_0, interleave = concat_338_interleave_0, values = (gather_323, gather_324, var_89, gather_325_cast_uint16_to_int32, gather_326))[name = string("concat_338")]; + tensor real_div_35 = real_div(x = concat_338, y = shape_362_cast_fp16)[name = string("real_div_35")]; + tensor hidden_states_525_cast_fp16 = tile(reps = real_div_35, x = var_3365_cast_fp16)[name = string("hidden_states_525_cast_fp16")]; + tensor concat_339x = const()[name = string("concat_339x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_71_cast_fp16 = reshape(shape = concat_339x, x = hidden_states_525_cast_fp16)[name = string("value_states_71_cast_fp16")]; + tensor var_3375_shape_cast_fp16 = shape(x = key_states_71_cast_fp16)[name = string("op_3375_shape_cast_fp16")]; + int32 gather_327_axis_0 = const()[name = string("gather_327_axis_0"), val = int32(0)]; + int32 gather_327_batch_dims_0 = const()[name = string("gather_327_batch_dims_0"), val = int32(0)]; + bool gather_327_validate_indices_0 = const()[name = string("gather_327_validate_indices_0"), val = bool(false)]; + string var_3375_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3375_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_327_to_uint16 = const()[name = string("select_327_to_uint16"), val = uint16(2)]; + tensor var_3375_shape_cast_fp16_to_uint16 = cast(dtype = var_3375_shape_cast_fp16_to_uint16_dtype_0, x = var_3375_shape_cast_fp16)[name = string("cast_114")]; + uint16 gather_327_cast_uint16 = gather(axis = gather_327_axis_0, batch_dims = gather_327_batch_dims_0, indices = select_327_to_uint16, validate_indices = gather_327_validate_indices_0, x = var_3375_shape_cast_fp16_to_uint16)[name = string("gather_327_cast_uint16")]; + string gather_327_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_327_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)]; + int32 concat_340_values1_0 = const()[name = string("concat_340_values1_0"), val = int32(1)]; + int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(0)]; + int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)]; + bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)]; + int32 gather_327_cast_uint16_to_int32 = cast(dtype = gather_327_cast_uint16_to_int32_dtype_0, x = gather_327_cast_uint16)[name = string("cast_113")]; + tensor concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, concat_340_values1_0, concat_340_values2_0, gather_327_cast_uint16_to_int32))[name = string("concat_340")]; + tensor causal_mask_37_begin_0 = const()[name = string("causal_mask_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_37_end_mask_0 = const()[name = string("causal_mask_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_37_cast_fp16 = slice_by_index(begin = causal_mask_37_begin_0, end = concat_340, end_mask = causal_mask_37_end_mask_0, x = causal_mask)[name = string("causal_mask_37_cast_fp16")]; + tensor attn_output_69_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_37_cast_fp16, key = key_states_71_cast_fp16, query = query_states_71_cast_fp16, value = value_states_71_cast_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor var_3381_perm_0 = const()[name = string("op_3381_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_341_axis_0 = const()[name = string("concat_341_axis_0"), val = int32(0)]; + bool concat_341_interleave_0 = const()[name = string("concat_341_interleave_0"), val = bool(false)]; + int32 gather_311_cast_uint16_to_int32 = cast(dtype = gather_311_cast_uint16_to_int32_dtype_0, x = gather_311_cast_uint16)[name = string("cast_112")]; + tensor concat_341 = concat(axis = concat_341_axis_0, interleave = concat_341_interleave_0, values = (gather_310, gather_311_cast_uint16_to_int32, var_85))[name = string("concat_341")]; + tensor var_3381_cast_fp16 = transpose(perm = var_3381_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_56")]; + tensor input_137_cast_fp16 = reshape(shape = concat_341, x = var_3381_cast_fp16)[name = string("input_137_cast_fp16")]; + tensor model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121502528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121963392))))[name = string("model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_122_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized, x = input_137_cast_fp16)[name = string("linear_122_cast_fp16")]; + tensor hidden_states_529_cast_fp16 = add(x = hidden_states_509_cast_fp16, y = linear_122_cast_fp16)[name = string("hidden_states_529_cast_fp16")]; + fp16 var_80_promoted_35_to_fp16 = const()[name = string("op_80_promoted_35_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3390_cast_fp16 = pow(x = hidden_states_529_cast_fp16, y = var_80_promoted_35_to_fp16)[name = string("op_3390_cast_fp16")]; + tensor variance_71_axes_0 = const()[name = string("variance_71_axes_0"), val = tensor([-1])]; + bool variance_71_keep_dims_0 = const()[name = string("variance_71_keep_dims_0"), val = bool(true)]; + tensor variance_71_cast_fp16 = reduce_mean(axes = variance_71_axes_0, keep_dims = variance_71_keep_dims_0, x = var_3390_cast_fp16)[name = string("variance_71_cast_fp16")]; + fp16 var_3393_to_fp16 = const()[name = string("op_3393_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3394_cast_fp16 = add(x = variance_71_cast_fp16, y = var_3393_to_fp16)[name = string("op_3394_cast_fp16")]; + fp32 var_3395_epsilon_0 = const()[name = string("op_3395_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3395_cast_fp16 = rsqrt(epsilon = var_3395_epsilon_0, x = var_3394_cast_fp16)[name = string("op_3395_cast_fp16")]; + tensor hidden_states_533_cast_fp16 = mul(x = hidden_states_529_cast_fp16, y = var_3395_cast_fp16)[name = string("hidden_states_533_cast_fp16")]; + tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122021056)))]; + tensor input_139_cast_fp16 = mul(x = model_model_layers_17_post_attention_layernorm_weight_to_fp16, y = hidden_states_533_cast_fp16)[name = string("input_139_cast_fp16")]; + tensor model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122023040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123251904))))[name = string("model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_123_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = string("linear_123_cast_fp16")]; + tensor var_3407_cast_fp16 = silu(x = linear_123_cast_fp16)[name = string("op_3407_cast_fp16")]; + tensor model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123405568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124634432))))[name = string("model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_124_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = string("linear_124_cast_fp16")]; + tensor input_143_cast_fp16 = mul(x = var_3407_cast_fp16, y = linear_124_cast_fp16)[name = string("input_143_cast_fp16")]; + tensor model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124788096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126016960))))[name = string("model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_125_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized, x = input_143_cast_fp16)[name = string("linear_125_cast_fp16")]; + tensor hidden_states_539_cast_fp16 = add(x = hidden_states_529_cast_fp16, y = linear_125_cast_fp16)[name = string("hidden_states_539_cast_fp16")]; + fp16 var_80_promoted_36_to_fp16 = const()[name = string("op_80_promoted_36_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3420_cast_fp16 = pow(x = hidden_states_539_cast_fp16, y = var_80_promoted_36_to_fp16)[name = string("op_3420_cast_fp16")]; + tensor variance_73_axes_0 = const()[name = string("variance_73_axes_0"), val = tensor([-1])]; + bool variance_73_keep_dims_0 = const()[name = string("variance_73_keep_dims_0"), val = bool(true)]; + tensor variance_73_cast_fp16 = reduce_mean(axes = variance_73_axes_0, keep_dims = variance_73_keep_dims_0, x = var_3420_cast_fp16)[name = string("variance_73_cast_fp16")]; + fp16 var_3423_to_fp16 = const()[name = string("op_3423_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3424_cast_fp16 = add(x = variance_73_cast_fp16, y = var_3423_to_fp16)[name = string("op_3424_cast_fp16")]; + fp32 var_3425_epsilon_0 = const()[name = string("op_3425_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3425_cast_fp16 = rsqrt(epsilon = var_3425_epsilon_0, x = var_3424_cast_fp16)[name = string("op_3425_cast_fp16")]; + tensor hidden_states_543_cast_fp16 = mul(x = hidden_states_539_cast_fp16, y = var_3425_cast_fp16)[name = string("hidden_states_543_cast_fp16")]; + tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126170624)))]; + tensor hidden_states_547_cast_fp16 = mul(x = model_model_layers_18_input_layernorm_weight_to_fp16, y = hidden_states_543_cast_fp16)[name = string("hidden_states_547_cast_fp16")]; + tensor var_3436_shape_cast_fp16 = shape(x = hidden_states_547_cast_fp16)[name = string("op_3436_shape_cast_fp16")]; + int32 gather_328 = const()[name = string("gather_328"), val = int32(1)]; + int32 gather_329_axis_0 = const()[name = string("gather_329_axis_0"), val = int32(0)]; + int32 gather_329_batch_dims_0 = const()[name = string("gather_329_batch_dims_0"), val = int32(0)]; + bool gather_329_validate_indices_0 = const()[name = string("gather_329_validate_indices_0"), val = bool(false)]; + string var_3436_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3436_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_329_to_uint16 = const()[name = string("select_329_to_uint16"), val = uint16(1)]; + tensor var_3436_shape_cast_fp16_to_uint16 = cast(dtype = var_3436_shape_cast_fp16_to_uint16_dtype_0, x = var_3436_shape_cast_fp16)[name = string("cast_111")]; + uint16 gather_329_cast_uint16 = gather(axis = gather_329_axis_0, batch_dims = gather_329_batch_dims_0, indices = select_329_to_uint16, validate_indices = gather_329_validate_indices_0, x = var_3436_shape_cast_fp16_to_uint16)[name = string("gather_329_cast_uint16")]; + string gather_329_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_329_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126172608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126633472))))[name = string("model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_126_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_547_cast_fp16)[name = string("linear_126_cast_fp16")]; + tensor model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126691136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126844800))))[name = string("model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_547_cast_fp16)[name = string("linear_127_cast_fp16")]; + tensor model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126864064))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127017728))))[name = string("model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_128_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_547_cast_fp16)[name = string("linear_128_cast_fp16")]; + tensor concat_342x = const()[name = string("concat_342x"), val = tensor([1, -1, 15, 64])]; + tensor var_3445_cast_fp16 = reshape(shape = concat_342x, x = linear_126_cast_fp16)[name = string("op_3445_cast_fp16")]; + tensor q_37_perm_0 = const()[name = string("q_37_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_343x = const()[name = string("concat_343x"), val = tensor([1, -1, 5, 64])]; + tensor var_3448_cast_fp16 = reshape(shape = concat_343x, x = linear_127_cast_fp16)[name = string("op_3448_cast_fp16")]; + tensor k_37_perm_0 = const()[name = string("k_37_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_344x = const()[name = string("concat_344x"), val = tensor([1, -1, 5, 64])]; + tensor var_3451_cast_fp16 = reshape(shape = concat_344x, x = linear_128_cast_fp16)[name = string("op_3451_cast_fp16")]; + tensor v_state_37_perm_0 = const()[name = string("v_state_37_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_37_cast_fp16 = transpose(perm = q_37_perm_0, x = var_3445_cast_fp16)[name = string("transpose_55")]; + tensor var_3455_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3455_cast_fp16")]; + tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; + tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; + fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3466_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_3466_cast_fp16")]; + bool var_3468_interleave_0 = const()[name = string("op_3468_interleave_0"), val = bool(false)]; + tensor var_3468_cast_fp16 = concat(axis = var_85, interleave = var_3468_interleave_0, values = (var_3466_cast_fp16, x1_73_cast_fp16))[name = string("op_3468_cast_fp16")]; + tensor var_3469_cast_fp16 = mul(x = var_3468_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3469_cast_fp16")]; + tensor query_states_75_cast_fp16 = add(x = var_3455_cast_fp16, y = var_3469_cast_fp16)[name = string("query_states_75_cast_fp16")]; + tensor k_37_cast_fp16 = transpose(perm = k_37_perm_0, x = var_3448_cast_fp16)[name = string("transpose_54")]; + tensor var_3471_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3471_cast_fp16")]; + tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; + tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; + fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3482_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_3482_cast_fp16")]; + bool var_3484_interleave_0 = const()[name = string("op_3484_interleave_0"), val = bool(false)]; + tensor var_3484_cast_fp16 = concat(axis = var_85, interleave = var_3484_interleave_0, values = (var_3482_cast_fp16, x1_75_cast_fp16))[name = string("op_3484_cast_fp16")]; + tensor var_3485_cast_fp16 = mul(x = var_3484_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3485_cast_fp16")]; + tensor k_state_37_cast_fp16 = add(x = var_3471_cast_fp16, y = var_3485_cast_fp16)[name = string("k_state_37_cast_fp16")]; + tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([0])]; + tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; + tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; + tensor concat_347_values0_0 = const()[name = string("concat_347_values0_0"), val = tensor([18])]; + int32 concat_347_axis_0 = const()[name = string("concat_347_axis_0"), val = int32(0)]; + bool concat_347_interleave_0 = const()[name = string("concat_347_interleave_0"), val = bool(false)]; + tensor concat_347 = concat(axis = concat_347_axis_0, interleave = concat_347_interleave_0, values = (concat_347_values0_0, expand_dims_216, expand_dims_217, expand_dims_2, expand_dims_219))[name = string("concat_347")]; + tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_347, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = k_state_37_cast_fp16, x = coreml_update_state_98)[name = string("key_cache_internal_tensor_assign_19_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_19_cast_fp16, input = key_cache)[name = string("coreml_update_state_100_write_state")]; + tensor coreml_update_state_100 = read_state(input = key_cache)[name = string("coreml_update_state_100")]; + tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_37_cast_fp16 = transpose(perm = v_state_37_perm_0, x = var_3451_cast_fp16)[name = string("transpose_53")]; + tensor value_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_347, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = v_state_37_cast_fp16, x = coreml_update_state_99)[name = string("value_cache_internal_tensor_assign_19_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_19_cast_fp16, input = value_cache)[name = string("coreml_update_state_101_write_state")]; + tensor coreml_update_state_101 = read_state(input = value_cache)[name = string("coreml_update_state_101")]; + tensor var_3508_begin_0 = const()[name = string("op_3508_begin_0"), val = tensor([18, 0, 0, 0, 0])]; + tensor var_3508_end_0 = const()[name = string("op_3508_end_0"), val = tensor([19, 1, 5, 2048, 64])]; + tensor var_3508_end_mask_0 = const()[name = string("op_3508_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3508_squeeze_mask_0 = const()[name = string("op_3508_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3508_cast_fp16 = slice_by_index(begin = var_3508_begin_0, end = var_3508_end_0, end_mask = var_3508_end_mask_0, squeeze_mask = var_3508_squeeze_mask_0, x = coreml_update_state_100)[name = string("op_3508_cast_fp16")]; + tensor var_3511_begin_0 = const()[name = string("op_3511_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3511_end_mask_0 = const()[name = string("op_3511_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3511_cast_fp16 = slice_by_index(begin = var_3511_begin_0, end = concat_11, end_mask = var_3511_end_mask_0, x = var_3508_cast_fp16)[name = string("op_3511_cast_fp16")]; + tensor var_3513_begin_0 = const()[name = string("op_3513_begin_0"), val = tensor([18, 0, 0, 0, 0])]; + tensor var_3513_end_0 = const()[name = string("op_3513_end_0"), val = tensor([19, 1, 5, 2048, 64])]; + tensor var_3513_end_mask_0 = const()[name = string("op_3513_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3513_squeeze_mask_0 = const()[name = string("op_3513_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3513_cast_fp16 = slice_by_index(begin = var_3513_begin_0, end = var_3513_end_0, end_mask = var_3513_end_mask_0, squeeze_mask = var_3513_squeeze_mask_0, x = coreml_update_state_101)[name = string("op_3513_cast_fp16")]; + tensor var_3516_begin_0 = const()[name = string("op_3516_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3516_end_mask_0 = const()[name = string("op_3516_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3516_cast_fp16 = slice_by_index(begin = var_3516_begin_0, end = concat_11, end_mask = var_3516_end_mask_0, x = var_3513_cast_fp16)[name = string("op_3516_cast_fp16")]; + tensor var_3518_shape_cast_fp16 = shape(x = var_3511_cast_fp16)[name = string("op_3518_shape_cast_fp16")]; + int32 gather_337 = const()[name = string("gather_337"), val = int32(1)]; + int32 gather_338 = const()[name = string("gather_338"), val = int32(5)]; + int32 gather_339_axis_0 = const()[name = string("gather_339_axis_0"), val = int32(0)]; + int32 gather_339_batch_dims_0 = const()[name = string("gather_339_batch_dims_0"), val = int32(0)]; + bool gather_339_validate_indices_0 = const()[name = string("gather_339_validate_indices_0"), val = bool(false)]; + string var_3518_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3518_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_339_to_uint16 = const()[name = string("select_339_to_uint16"), val = uint16(2)]; + tensor var_3518_shape_cast_fp16_to_uint16 = cast(dtype = var_3518_shape_cast_fp16_to_uint16_dtype_0, x = var_3518_shape_cast_fp16)[name = string("cast_110")]; + uint16 gather_339_cast_uint16 = gather(axis = gather_339_axis_0, batch_dims = gather_339_batch_dims_0, indices = select_339_to_uint16, validate_indices = gather_339_validate_indices_0, x = var_3518_shape_cast_fp16_to_uint16)[name = string("gather_339_cast_uint16")]; + string gather_339_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_339_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_340 = const()[name = string("gather_340"), val = int32(64)]; + tensor var_3525_axes_0 = const()[name = string("op_3525_axes_0"), val = tensor([2])]; + tensor var_3525_cast_fp16 = expand_dims(axes = var_3525_axes_0, x = var_3511_cast_fp16)[name = string("op_3525_cast_fp16")]; + tensor shape_377_cast_fp16 = shape(x = var_3525_cast_fp16)[name = string("shape_377_cast_fp16")]; + int32 concat_355_axis_0 = const()[name = string("concat_355_axis_0"), val = int32(0)]; + bool concat_355_interleave_0 = const()[name = string("concat_355_interleave_0"), val = bool(false)]; + int32 gather_339_cast_uint16_to_int32 = cast(dtype = gather_339_cast_uint16_to_int32_dtype_0, x = gather_339_cast_uint16)[name = string("cast_109")]; + tensor concat_355 = concat(axis = concat_355_axis_0, interleave = concat_355_interleave_0, values = (gather_337, gather_338, var_89, gather_339_cast_uint16_to_int32, gather_340))[name = string("concat_355")]; + tensor real_div_36 = real_div(x = concat_355, y = shape_377_cast_fp16)[name = string("real_div_36")]; + tensor hidden_states_551_cast_fp16 = tile(reps = real_div_36, x = var_3525_cast_fp16)[name = string("hidden_states_551_cast_fp16")]; + tensor concat_356x = const()[name = string("concat_356x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_75_cast_fp16 = reshape(shape = concat_356x, x = hidden_states_551_cast_fp16)[name = string("key_states_75_cast_fp16")]; + tensor var_3535_shape_cast_fp16 = shape(x = var_3516_cast_fp16)[name = string("op_3535_shape_cast_fp16")]; + int32 gather_341 = const()[name = string("gather_341"), val = int32(1)]; + int32 gather_342 = const()[name = string("gather_342"), val = int32(5)]; + int32 gather_343_axis_0 = const()[name = string("gather_343_axis_0"), val = int32(0)]; + int32 gather_343_batch_dims_0 = const()[name = string("gather_343_batch_dims_0"), val = int32(0)]; + bool gather_343_validate_indices_0 = const()[name = string("gather_343_validate_indices_0"), val = bool(false)]; + string var_3535_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3535_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_343_to_uint16 = const()[name = string("select_343_to_uint16"), val = uint16(2)]; + tensor var_3535_shape_cast_fp16_to_uint16 = cast(dtype = var_3535_shape_cast_fp16_to_uint16_dtype_0, x = var_3535_shape_cast_fp16)[name = string("cast_108")]; + uint16 gather_343_cast_uint16 = gather(axis = gather_343_axis_0, batch_dims = gather_343_batch_dims_0, indices = select_343_to_uint16, validate_indices = gather_343_validate_indices_0, x = var_3535_shape_cast_fp16_to_uint16)[name = string("gather_343_cast_uint16")]; + string gather_343_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_343_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_344 = const()[name = string("gather_344"), val = int32(64)]; + tensor var_3542_axes_0 = const()[name = string("op_3542_axes_0"), val = tensor([2])]; + tensor var_3542_cast_fp16 = expand_dims(axes = var_3542_axes_0, x = var_3516_cast_fp16)[name = string("op_3542_cast_fp16")]; + tensor shape_382_cast_fp16 = shape(x = var_3542_cast_fp16)[name = string("shape_382_cast_fp16")]; + int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)]; + bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)]; + int32 gather_343_cast_uint16_to_int32 = cast(dtype = gather_343_cast_uint16_to_int32_dtype_0, x = gather_343_cast_uint16)[name = string("cast_107")]; + tensor concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (gather_341, gather_342, var_89, gather_343_cast_uint16_to_int32, gather_344))[name = string("concat_357")]; + tensor real_div_37 = real_div(x = concat_357, y = shape_382_cast_fp16)[name = string("real_div_37")]; + tensor hidden_states_555_cast_fp16 = tile(reps = real_div_37, x = var_3542_cast_fp16)[name = string("hidden_states_555_cast_fp16")]; + tensor concat_358x = const()[name = string("concat_358x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_75_cast_fp16 = reshape(shape = concat_358x, x = hidden_states_555_cast_fp16)[name = string("value_states_75_cast_fp16")]; + tensor var_3552_shape_cast_fp16 = shape(x = key_states_75_cast_fp16)[name = string("op_3552_shape_cast_fp16")]; + int32 gather_345_axis_0 = const()[name = string("gather_345_axis_0"), val = int32(0)]; + int32 gather_345_batch_dims_0 = const()[name = string("gather_345_batch_dims_0"), val = int32(0)]; + bool gather_345_validate_indices_0 = const()[name = string("gather_345_validate_indices_0"), val = bool(false)]; + string var_3552_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3552_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_345_to_uint16 = const()[name = string("select_345_to_uint16"), val = uint16(2)]; + tensor var_3552_shape_cast_fp16_to_uint16 = cast(dtype = var_3552_shape_cast_fp16_to_uint16_dtype_0, x = var_3552_shape_cast_fp16)[name = string("cast_106")]; + uint16 gather_345_cast_uint16 = gather(axis = gather_345_axis_0, batch_dims = gather_345_batch_dims_0, indices = select_345_to_uint16, validate_indices = gather_345_validate_indices_0, x = var_3552_shape_cast_fp16_to_uint16)[name = string("gather_345_cast_uint16")]; + string gather_345_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_345_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_359_values0_0 = const()[name = string("concat_359_values0_0"), val = int32(1)]; + int32 concat_359_values1_0 = const()[name = string("concat_359_values1_0"), val = int32(1)]; + int32 concat_359_values2_0 = const()[name = string("concat_359_values2_0"), val = int32(0)]; + int32 concat_359_axis_0 = const()[name = string("concat_359_axis_0"), val = int32(0)]; + bool concat_359_interleave_0 = const()[name = string("concat_359_interleave_0"), val = bool(false)]; + int32 gather_345_cast_uint16_to_int32 = cast(dtype = gather_345_cast_uint16_to_int32_dtype_0, x = gather_345_cast_uint16)[name = string("cast_105")]; + tensor concat_359 = concat(axis = concat_359_axis_0, interleave = concat_359_interleave_0, values = (concat_359_values0_0, concat_359_values1_0, concat_359_values2_0, gather_345_cast_uint16_to_int32))[name = string("concat_359")]; + tensor causal_mask_39_begin_0 = const()[name = string("causal_mask_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_39_end_mask_0 = const()[name = string("causal_mask_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_39_cast_fp16 = slice_by_index(begin = causal_mask_39_begin_0, end = concat_359, end_mask = causal_mask_39_end_mask_0, x = causal_mask)[name = string("causal_mask_39_cast_fp16")]; + tensor attn_output_73_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_39_cast_fp16, key = key_states_75_cast_fp16, query = query_states_75_cast_fp16, value = value_states_75_cast_fp16)[name = string("attn_output_73_cast_fp16")]; + tensor var_3558_perm_0 = const()[name = string("op_3558_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_360_axis_0 = const()[name = string("concat_360_axis_0"), val = int32(0)]; + bool concat_360_interleave_0 = const()[name = string("concat_360_interleave_0"), val = bool(false)]; + int32 gather_329_cast_uint16_to_int32 = cast(dtype = gather_329_cast_uint16_to_int32_dtype_0, x = gather_329_cast_uint16)[name = string("cast_104")]; + tensor concat_360 = concat(axis = concat_360_axis_0, interleave = concat_360_interleave_0, values = (gather_328, gather_329_cast_uint16_to_int32, var_85))[name = string("concat_360")]; + tensor var_3558_cast_fp16 = transpose(perm = var_3558_perm_0, x = attn_output_73_cast_fp16)[name = string("transpose_52")]; + tensor input_145_cast_fp16 = reshape(shape = concat_360, x = var_3558_cast_fp16)[name = string("input_145_cast_fp16")]; + tensor model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127036992))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127497856))))[name = string("model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_129_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized, x = input_145_cast_fp16)[name = string("linear_129_cast_fp16")]; + tensor hidden_states_559_cast_fp16 = add(x = hidden_states_539_cast_fp16, y = linear_129_cast_fp16)[name = string("hidden_states_559_cast_fp16")]; + fp16 var_80_promoted_37_to_fp16 = const()[name = string("op_80_promoted_37_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3567_cast_fp16 = pow(x = hidden_states_559_cast_fp16, y = var_80_promoted_37_to_fp16)[name = string("op_3567_cast_fp16")]; + tensor variance_75_axes_0 = const()[name = string("variance_75_axes_0"), val = tensor([-1])]; + bool variance_75_keep_dims_0 = const()[name = string("variance_75_keep_dims_0"), val = bool(true)]; + tensor variance_75_cast_fp16 = reduce_mean(axes = variance_75_axes_0, keep_dims = variance_75_keep_dims_0, x = var_3567_cast_fp16)[name = string("variance_75_cast_fp16")]; + fp16 var_3570_to_fp16 = const()[name = string("op_3570_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3571_cast_fp16 = add(x = variance_75_cast_fp16, y = var_3570_to_fp16)[name = string("op_3571_cast_fp16")]; + fp32 var_3572_epsilon_0 = const()[name = string("op_3572_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3572_cast_fp16 = rsqrt(epsilon = var_3572_epsilon_0, x = var_3571_cast_fp16)[name = string("op_3572_cast_fp16")]; + tensor hidden_states_563_cast_fp16 = mul(x = hidden_states_559_cast_fp16, y = var_3572_cast_fp16)[name = string("hidden_states_563_cast_fp16")]; + tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127555520)))]; + tensor input_147_cast_fp16 = mul(x = model_model_layers_18_post_attention_layernorm_weight_to_fp16, y = hidden_states_563_cast_fp16)[name = string("input_147_cast_fp16")]; + tensor model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127557504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128786368))))[name = string("model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_130_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = string("linear_130_cast_fp16")]; + tensor var_3584_cast_fp16 = silu(x = linear_130_cast_fp16)[name = string("op_3584_cast_fp16")]; + tensor model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128940032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130168896))))[name = string("model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_131_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = string("linear_131_cast_fp16")]; + tensor input_151_cast_fp16 = mul(x = var_3584_cast_fp16, y = linear_131_cast_fp16)[name = string("input_151_cast_fp16")]; + tensor model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130322560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131551424))))[name = string("model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_132_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized, x = input_151_cast_fp16)[name = string("linear_132_cast_fp16")]; + tensor hidden_states_569_cast_fp16 = add(x = hidden_states_559_cast_fp16, y = linear_132_cast_fp16)[name = string("hidden_states_569_cast_fp16")]; + fp16 var_80_promoted_38_to_fp16 = const()[name = string("op_80_promoted_38_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3597_cast_fp16 = pow(x = hidden_states_569_cast_fp16, y = var_80_promoted_38_to_fp16)[name = string("op_3597_cast_fp16")]; + tensor variance_77_axes_0 = const()[name = string("variance_77_axes_0"), val = tensor([-1])]; + bool variance_77_keep_dims_0 = const()[name = string("variance_77_keep_dims_0"), val = bool(true)]; + tensor variance_77_cast_fp16 = reduce_mean(axes = variance_77_axes_0, keep_dims = variance_77_keep_dims_0, x = var_3597_cast_fp16)[name = string("variance_77_cast_fp16")]; + fp16 var_3600_to_fp16 = const()[name = string("op_3600_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3601_cast_fp16 = add(x = variance_77_cast_fp16, y = var_3600_to_fp16)[name = string("op_3601_cast_fp16")]; + fp32 var_3602_epsilon_0 = const()[name = string("op_3602_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3602_cast_fp16 = rsqrt(epsilon = var_3602_epsilon_0, x = var_3601_cast_fp16)[name = string("op_3602_cast_fp16")]; + tensor hidden_states_573_cast_fp16 = mul(x = hidden_states_569_cast_fp16, y = var_3602_cast_fp16)[name = string("hidden_states_573_cast_fp16")]; + tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131705088)))]; + tensor hidden_states_577_cast_fp16 = mul(x = model_model_layers_19_input_layernorm_weight_to_fp16, y = hidden_states_573_cast_fp16)[name = string("hidden_states_577_cast_fp16")]; + tensor var_3613_shape_cast_fp16 = shape(x = hidden_states_577_cast_fp16)[name = string("op_3613_shape_cast_fp16")]; + int32 gather_346 = const()[name = string("gather_346"), val = int32(1)]; + int32 gather_347_axis_0 = const()[name = string("gather_347_axis_0"), val = int32(0)]; + int32 gather_347_batch_dims_0 = const()[name = string("gather_347_batch_dims_0"), val = int32(0)]; + bool gather_347_validate_indices_0 = const()[name = string("gather_347_validate_indices_0"), val = bool(false)]; + string var_3613_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3613_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_347_to_uint16 = const()[name = string("select_347_to_uint16"), val = uint16(1)]; + tensor var_3613_shape_cast_fp16_to_uint16 = cast(dtype = var_3613_shape_cast_fp16_to_uint16_dtype_0, x = var_3613_shape_cast_fp16)[name = string("cast_103")]; + uint16 gather_347_cast_uint16 = gather(axis = gather_347_axis_0, batch_dims = gather_347_batch_dims_0, indices = select_347_to_uint16, validate_indices = gather_347_validate_indices_0, x = var_3613_shape_cast_fp16_to_uint16)[name = string("gather_347_cast_uint16")]; + string gather_347_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_347_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131707072))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132167936))))[name = string("model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_133_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_577_cast_fp16)[name = string("linear_133_cast_fp16")]; + tensor model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132225600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132379264))))[name = string("model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_577_cast_fp16)[name = string("linear_134_cast_fp16")]; + tensor model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132398528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132552192))))[name = string("model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_135_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_577_cast_fp16)[name = string("linear_135_cast_fp16")]; + tensor concat_361x = const()[name = string("concat_361x"), val = tensor([1, -1, 15, 64])]; + tensor var_3622_cast_fp16 = reshape(shape = concat_361x, x = linear_133_cast_fp16)[name = string("op_3622_cast_fp16")]; + tensor q_39_perm_0 = const()[name = string("q_39_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_362x = const()[name = string("concat_362x"), val = tensor([1, -1, 5, 64])]; + tensor var_3625_cast_fp16 = reshape(shape = concat_362x, x = linear_134_cast_fp16)[name = string("op_3625_cast_fp16")]; + tensor k_39_perm_0 = const()[name = string("k_39_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_363x = const()[name = string("concat_363x"), val = tensor([1, -1, 5, 64])]; + tensor var_3628_cast_fp16 = reshape(shape = concat_363x, x = linear_135_cast_fp16)[name = string("op_3628_cast_fp16")]; + tensor v_state_39_perm_0 = const()[name = string("v_state_39_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_39_cast_fp16 = transpose(perm = q_39_perm_0, x = var_3622_cast_fp16)[name = string("transpose_51")]; + tensor var_3632_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3632_cast_fp16")]; + tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; + tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; + fp16 const_41_promoted_to_fp16 = const()[name = string("const_41_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3643_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_41_promoted_to_fp16)[name = string("op_3643_cast_fp16")]; + bool var_3645_interleave_0 = const()[name = string("op_3645_interleave_0"), val = bool(false)]; + tensor var_3645_cast_fp16 = concat(axis = var_85, interleave = var_3645_interleave_0, values = (var_3643_cast_fp16, x1_77_cast_fp16))[name = string("op_3645_cast_fp16")]; + tensor var_3646_cast_fp16 = mul(x = var_3645_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3646_cast_fp16")]; + tensor query_states_79_cast_fp16 = add(x = var_3632_cast_fp16, y = var_3646_cast_fp16)[name = string("query_states_79_cast_fp16")]; + tensor k_39_cast_fp16 = transpose(perm = k_39_perm_0, x = var_3625_cast_fp16)[name = string("transpose_50")]; + tensor var_3648_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3648_cast_fp16")]; + tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; + tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; + fp16 const_42_promoted_to_fp16 = const()[name = string("const_42_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3659_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_42_promoted_to_fp16)[name = string("op_3659_cast_fp16")]; + bool var_3661_interleave_0 = const()[name = string("op_3661_interleave_0"), val = bool(false)]; + tensor var_3661_cast_fp16 = concat(axis = var_85, interleave = var_3661_interleave_0, values = (var_3659_cast_fp16, x1_79_cast_fp16))[name = string("op_3661_cast_fp16")]; + tensor var_3662_cast_fp16 = mul(x = var_3661_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3662_cast_fp16")]; + tensor k_state_39_cast_fp16 = add(x = var_3648_cast_fp16, y = var_3662_cast_fp16)[name = string("k_state_39_cast_fp16")]; + tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([0])]; + tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; + tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; + tensor concat_366_values0_0 = const()[name = string("concat_366_values0_0"), val = tensor([19])]; + int32 concat_366_axis_0 = const()[name = string("concat_366_axis_0"), val = int32(0)]; + bool concat_366_interleave_0 = const()[name = string("concat_366_interleave_0"), val = bool(false)]; + tensor concat_366 = concat(axis = concat_366_axis_0, interleave = concat_366_interleave_0, values = (concat_366_values0_0, expand_dims_228, expand_dims_229, expand_dims_2, expand_dims_231))[name = string("concat_366")]; + tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_366, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = k_state_39_cast_fp16, x = coreml_update_state_100)[name = string("key_cache_internal_tensor_assign_20_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_20_cast_fp16, input = key_cache)[name = string("coreml_update_state_102_write_state")]; + tensor coreml_update_state_102 = read_state(input = key_cache)[name = string("coreml_update_state_102")]; + tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_39_cast_fp16 = transpose(perm = v_state_39_perm_0, x = var_3628_cast_fp16)[name = string("transpose_49")]; + tensor value_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_366, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = v_state_39_cast_fp16, x = coreml_update_state_101)[name = string("value_cache_internal_tensor_assign_20_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_20_cast_fp16, input = value_cache)[name = string("coreml_update_state_103_write_state")]; + tensor coreml_update_state_103 = read_state(input = value_cache)[name = string("coreml_update_state_103")]; + tensor var_3685_begin_0 = const()[name = string("op_3685_begin_0"), val = tensor([19, 0, 0, 0, 0])]; + tensor var_3685_end_0 = const()[name = string("op_3685_end_0"), val = tensor([20, 1, 5, 2048, 64])]; + tensor var_3685_end_mask_0 = const()[name = string("op_3685_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3685_squeeze_mask_0 = const()[name = string("op_3685_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3685_cast_fp16 = slice_by_index(begin = var_3685_begin_0, end = var_3685_end_0, end_mask = var_3685_end_mask_0, squeeze_mask = var_3685_squeeze_mask_0, x = coreml_update_state_102)[name = string("op_3685_cast_fp16")]; + tensor var_3688_begin_0 = const()[name = string("op_3688_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3688_end_mask_0 = const()[name = string("op_3688_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3688_cast_fp16 = slice_by_index(begin = var_3688_begin_0, end = concat_11, end_mask = var_3688_end_mask_0, x = var_3685_cast_fp16)[name = string("op_3688_cast_fp16")]; + tensor var_3690_begin_0 = const()[name = string("op_3690_begin_0"), val = tensor([19, 0, 0, 0, 0])]; + tensor var_3690_end_0 = const()[name = string("op_3690_end_0"), val = tensor([20, 1, 5, 2048, 64])]; + tensor var_3690_end_mask_0 = const()[name = string("op_3690_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3690_squeeze_mask_0 = const()[name = string("op_3690_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3690_cast_fp16 = slice_by_index(begin = var_3690_begin_0, end = var_3690_end_0, end_mask = var_3690_end_mask_0, squeeze_mask = var_3690_squeeze_mask_0, x = coreml_update_state_103)[name = string("op_3690_cast_fp16")]; + tensor var_3693_begin_0 = const()[name = string("op_3693_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3693_end_mask_0 = const()[name = string("op_3693_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3693_cast_fp16 = slice_by_index(begin = var_3693_begin_0, end = concat_11, end_mask = var_3693_end_mask_0, x = var_3690_cast_fp16)[name = string("op_3693_cast_fp16")]; + tensor var_3695_shape_cast_fp16 = shape(x = var_3688_cast_fp16)[name = string("op_3695_shape_cast_fp16")]; + int32 gather_355 = const()[name = string("gather_355"), val = int32(1)]; + int32 gather_356 = const()[name = string("gather_356"), val = int32(5)]; + int32 gather_357_axis_0 = const()[name = string("gather_357_axis_0"), val = int32(0)]; + int32 gather_357_batch_dims_0 = const()[name = string("gather_357_batch_dims_0"), val = int32(0)]; + bool gather_357_validate_indices_0 = const()[name = string("gather_357_validate_indices_0"), val = bool(false)]; + string var_3695_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3695_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_357_to_uint16 = const()[name = string("select_357_to_uint16"), val = uint16(2)]; + tensor var_3695_shape_cast_fp16_to_uint16 = cast(dtype = var_3695_shape_cast_fp16_to_uint16_dtype_0, x = var_3695_shape_cast_fp16)[name = string("cast_102")]; + uint16 gather_357_cast_uint16 = gather(axis = gather_357_axis_0, batch_dims = gather_357_batch_dims_0, indices = select_357_to_uint16, validate_indices = gather_357_validate_indices_0, x = var_3695_shape_cast_fp16_to_uint16)[name = string("gather_357_cast_uint16")]; + string gather_357_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_357_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_358 = const()[name = string("gather_358"), val = int32(64)]; + tensor var_3702_axes_0 = const()[name = string("op_3702_axes_0"), val = tensor([2])]; + tensor var_3702_cast_fp16 = expand_dims(axes = var_3702_axes_0, x = var_3688_cast_fp16)[name = string("op_3702_cast_fp16")]; + tensor shape_397_cast_fp16 = shape(x = var_3702_cast_fp16)[name = string("shape_397_cast_fp16")]; + int32 concat_374_axis_0 = const()[name = string("concat_374_axis_0"), val = int32(0)]; + bool concat_374_interleave_0 = const()[name = string("concat_374_interleave_0"), val = bool(false)]; + int32 gather_357_cast_uint16_to_int32 = cast(dtype = gather_357_cast_uint16_to_int32_dtype_0, x = gather_357_cast_uint16)[name = string("cast_101")]; + tensor concat_374 = concat(axis = concat_374_axis_0, interleave = concat_374_interleave_0, values = (gather_355, gather_356, var_89, gather_357_cast_uint16_to_int32, gather_358))[name = string("concat_374")]; + tensor real_div_38 = real_div(x = concat_374, y = shape_397_cast_fp16)[name = string("real_div_38")]; + tensor hidden_states_581_cast_fp16 = tile(reps = real_div_38, x = var_3702_cast_fp16)[name = string("hidden_states_581_cast_fp16")]; + tensor concat_375x = const()[name = string("concat_375x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_79_cast_fp16 = reshape(shape = concat_375x, x = hidden_states_581_cast_fp16)[name = string("key_states_79_cast_fp16")]; + tensor var_3712_shape_cast_fp16 = shape(x = var_3693_cast_fp16)[name = string("op_3712_shape_cast_fp16")]; + int32 gather_359 = const()[name = string("gather_359"), val = int32(1)]; + int32 gather_360 = const()[name = string("gather_360"), val = int32(5)]; + int32 gather_361_axis_0 = const()[name = string("gather_361_axis_0"), val = int32(0)]; + int32 gather_361_batch_dims_0 = const()[name = string("gather_361_batch_dims_0"), val = int32(0)]; + bool gather_361_validate_indices_0 = const()[name = string("gather_361_validate_indices_0"), val = bool(false)]; + string var_3712_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3712_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_361_to_uint16 = const()[name = string("select_361_to_uint16"), val = uint16(2)]; + tensor var_3712_shape_cast_fp16_to_uint16 = cast(dtype = var_3712_shape_cast_fp16_to_uint16_dtype_0, x = var_3712_shape_cast_fp16)[name = string("cast_100")]; + uint16 gather_361_cast_uint16 = gather(axis = gather_361_axis_0, batch_dims = gather_361_batch_dims_0, indices = select_361_to_uint16, validate_indices = gather_361_validate_indices_0, x = var_3712_shape_cast_fp16_to_uint16)[name = string("gather_361_cast_uint16")]; + string gather_361_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_361_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_362 = const()[name = string("gather_362"), val = int32(64)]; + tensor var_3719_axes_0 = const()[name = string("op_3719_axes_0"), val = tensor([2])]; + tensor var_3719_cast_fp16 = expand_dims(axes = var_3719_axes_0, x = var_3693_cast_fp16)[name = string("op_3719_cast_fp16")]; + tensor shape_402_cast_fp16 = shape(x = var_3719_cast_fp16)[name = string("shape_402_cast_fp16")]; + int32 concat_376_axis_0 = const()[name = string("concat_376_axis_0"), val = int32(0)]; + bool concat_376_interleave_0 = const()[name = string("concat_376_interleave_0"), val = bool(false)]; + int32 gather_361_cast_uint16_to_int32 = cast(dtype = gather_361_cast_uint16_to_int32_dtype_0, x = gather_361_cast_uint16)[name = string("cast_99")]; + tensor concat_376 = concat(axis = concat_376_axis_0, interleave = concat_376_interleave_0, values = (gather_359, gather_360, var_89, gather_361_cast_uint16_to_int32, gather_362))[name = string("concat_376")]; + tensor real_div_39 = real_div(x = concat_376, y = shape_402_cast_fp16)[name = string("real_div_39")]; + tensor hidden_states_585_cast_fp16 = tile(reps = real_div_39, x = var_3719_cast_fp16)[name = string("hidden_states_585_cast_fp16")]; + tensor concat_377x = const()[name = string("concat_377x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_79_cast_fp16 = reshape(shape = concat_377x, x = hidden_states_585_cast_fp16)[name = string("value_states_79_cast_fp16")]; + tensor var_3729_shape_cast_fp16 = shape(x = key_states_79_cast_fp16)[name = string("op_3729_shape_cast_fp16")]; + int32 gather_363_axis_0 = const()[name = string("gather_363_axis_0"), val = int32(0)]; + int32 gather_363_batch_dims_0 = const()[name = string("gather_363_batch_dims_0"), val = int32(0)]; + bool gather_363_validate_indices_0 = const()[name = string("gather_363_validate_indices_0"), val = bool(false)]; + string var_3729_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3729_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_363_to_uint16 = const()[name = string("select_363_to_uint16"), val = uint16(2)]; + tensor var_3729_shape_cast_fp16_to_uint16 = cast(dtype = var_3729_shape_cast_fp16_to_uint16_dtype_0, x = var_3729_shape_cast_fp16)[name = string("cast_98")]; + uint16 gather_363_cast_uint16 = gather(axis = gather_363_axis_0, batch_dims = gather_363_batch_dims_0, indices = select_363_to_uint16, validate_indices = gather_363_validate_indices_0, x = var_3729_shape_cast_fp16_to_uint16)[name = string("gather_363_cast_uint16")]; + string gather_363_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_363_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = int32(1)]; + int32 concat_378_values1_0 = const()[name = string("concat_378_values1_0"), val = int32(1)]; + int32 concat_378_values2_0 = const()[name = string("concat_378_values2_0"), val = int32(0)]; + int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)]; + bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)]; + int32 gather_363_cast_uint16_to_int32 = cast(dtype = gather_363_cast_uint16_to_int32_dtype_0, x = gather_363_cast_uint16)[name = string("cast_97")]; + tensor concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, concat_378_values1_0, concat_378_values2_0, gather_363_cast_uint16_to_int32))[name = string("concat_378")]; + tensor causal_mask_41_begin_0 = const()[name = string("causal_mask_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_41_end_mask_0 = const()[name = string("causal_mask_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_41_cast_fp16 = slice_by_index(begin = causal_mask_41_begin_0, end = concat_378, end_mask = causal_mask_41_end_mask_0, x = causal_mask)[name = string("causal_mask_41_cast_fp16")]; + tensor attn_output_77_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_41_cast_fp16, key = key_states_79_cast_fp16, query = query_states_79_cast_fp16, value = value_states_79_cast_fp16)[name = string("attn_output_77_cast_fp16")]; + tensor var_3735_perm_0 = const()[name = string("op_3735_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_379_axis_0 = const()[name = string("concat_379_axis_0"), val = int32(0)]; + bool concat_379_interleave_0 = const()[name = string("concat_379_interleave_0"), val = bool(false)]; + int32 gather_347_cast_uint16_to_int32 = cast(dtype = gather_347_cast_uint16_to_int32_dtype_0, x = gather_347_cast_uint16)[name = string("cast_96")]; + tensor concat_379 = concat(axis = concat_379_axis_0, interleave = concat_379_interleave_0, values = (gather_346, gather_347_cast_uint16_to_int32, var_85))[name = string("concat_379")]; + tensor var_3735_cast_fp16 = transpose(perm = var_3735_perm_0, x = attn_output_77_cast_fp16)[name = string("transpose_48")]; + tensor input_153_cast_fp16 = reshape(shape = concat_379, x = var_3735_cast_fp16)[name = string("input_153_cast_fp16")]; + tensor model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132571456))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133032320))))[name = string("model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_136_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = string("linear_136_cast_fp16")]; + tensor hidden_states_589_cast_fp16 = add(x = hidden_states_569_cast_fp16, y = linear_136_cast_fp16)[name = string("hidden_states_589_cast_fp16")]; + fp16 var_80_promoted_39_to_fp16 = const()[name = string("op_80_promoted_39_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3744_cast_fp16 = pow(x = hidden_states_589_cast_fp16, y = var_80_promoted_39_to_fp16)[name = string("op_3744_cast_fp16")]; + tensor variance_79_axes_0 = const()[name = string("variance_79_axes_0"), val = tensor([-1])]; + bool variance_79_keep_dims_0 = const()[name = string("variance_79_keep_dims_0"), val = bool(true)]; + tensor variance_79_cast_fp16 = reduce_mean(axes = variance_79_axes_0, keep_dims = variance_79_keep_dims_0, x = var_3744_cast_fp16)[name = string("variance_79_cast_fp16")]; + fp16 var_3747_to_fp16 = const()[name = string("op_3747_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3748_cast_fp16 = add(x = variance_79_cast_fp16, y = var_3747_to_fp16)[name = string("op_3748_cast_fp16")]; + fp32 var_3749_epsilon_0 = const()[name = string("op_3749_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3749_cast_fp16 = rsqrt(epsilon = var_3749_epsilon_0, x = var_3748_cast_fp16)[name = string("op_3749_cast_fp16")]; + tensor hidden_states_593_cast_fp16 = mul(x = hidden_states_589_cast_fp16, y = var_3749_cast_fp16)[name = string("hidden_states_593_cast_fp16")]; + tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133089984)))]; + tensor input_155_cast_fp16 = mul(x = model_model_layers_19_post_attention_layernorm_weight_to_fp16, y = hidden_states_593_cast_fp16)[name = string("input_155_cast_fp16")]; + tensor model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133091968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134320832))))[name = string("model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_137_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized, x = input_155_cast_fp16)[name = string("linear_137_cast_fp16")]; + tensor var_3761_cast_fp16 = silu(x = linear_137_cast_fp16)[name = string("op_3761_cast_fp16")]; + tensor model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134474496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135703360))))[name = string("model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_138_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized, x = input_155_cast_fp16)[name = string("linear_138_cast_fp16")]; + tensor input_159_cast_fp16 = mul(x = var_3761_cast_fp16, y = linear_138_cast_fp16)[name = string("input_159_cast_fp16")]; + tensor model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135857024))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137085888))))[name = string("model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_139_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized, x = input_159_cast_fp16)[name = string("linear_139_cast_fp16")]; + tensor hidden_states_599_cast_fp16 = add(x = hidden_states_589_cast_fp16, y = linear_139_cast_fp16)[name = string("hidden_states_599_cast_fp16")]; + fp16 var_80_promoted_40_to_fp16 = const()[name = string("op_80_promoted_40_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3774_cast_fp16 = pow(x = hidden_states_599_cast_fp16, y = var_80_promoted_40_to_fp16)[name = string("op_3774_cast_fp16")]; + tensor variance_81_axes_0 = const()[name = string("variance_81_axes_0"), val = tensor([-1])]; + bool variance_81_keep_dims_0 = const()[name = string("variance_81_keep_dims_0"), val = bool(true)]; + tensor variance_81_cast_fp16 = reduce_mean(axes = variance_81_axes_0, keep_dims = variance_81_keep_dims_0, x = var_3774_cast_fp16)[name = string("variance_81_cast_fp16")]; + fp16 var_3777_to_fp16 = const()[name = string("op_3777_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3778_cast_fp16 = add(x = variance_81_cast_fp16, y = var_3777_to_fp16)[name = string("op_3778_cast_fp16")]; + fp32 var_3779_epsilon_0 = const()[name = string("op_3779_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3779_cast_fp16 = rsqrt(epsilon = var_3779_epsilon_0, x = var_3778_cast_fp16)[name = string("op_3779_cast_fp16")]; + tensor hidden_states_603_cast_fp16 = mul(x = hidden_states_599_cast_fp16, y = var_3779_cast_fp16)[name = string("hidden_states_603_cast_fp16")]; + tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137239552)))]; + tensor hidden_states_607_cast_fp16 = mul(x = model_model_layers_20_input_layernorm_weight_to_fp16, y = hidden_states_603_cast_fp16)[name = string("hidden_states_607_cast_fp16")]; + tensor var_3790_shape_cast_fp16 = shape(x = hidden_states_607_cast_fp16)[name = string("op_3790_shape_cast_fp16")]; + int32 gather_364 = const()[name = string("gather_364"), val = int32(1)]; + int32 gather_365_axis_0 = const()[name = string("gather_365_axis_0"), val = int32(0)]; + int32 gather_365_batch_dims_0 = const()[name = string("gather_365_batch_dims_0"), val = int32(0)]; + bool gather_365_validate_indices_0 = const()[name = string("gather_365_validate_indices_0"), val = bool(false)]; + string var_3790_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3790_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_365_to_uint16 = const()[name = string("select_365_to_uint16"), val = uint16(1)]; + tensor var_3790_shape_cast_fp16_to_uint16 = cast(dtype = var_3790_shape_cast_fp16_to_uint16_dtype_0, x = var_3790_shape_cast_fp16)[name = string("cast_95")]; + uint16 gather_365_cast_uint16 = gather(axis = gather_365_axis_0, batch_dims = gather_365_batch_dims_0, indices = select_365_to_uint16, validate_indices = gather_365_validate_indices_0, x = var_3790_shape_cast_fp16_to_uint16)[name = string("gather_365_cast_uint16")]; + string gather_365_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_365_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137241536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137702400))))[name = string("model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_140_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_607_cast_fp16)[name = string("linear_140_cast_fp16")]; + tensor model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137760064))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137913728))))[name = string("model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_141_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_607_cast_fp16)[name = string("linear_141_cast_fp16")]; + tensor model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137932992))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138086656))))[name = string("model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_142_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_607_cast_fp16)[name = string("linear_142_cast_fp16")]; + tensor concat_380x = const()[name = string("concat_380x"), val = tensor([1, -1, 15, 64])]; + tensor var_3799_cast_fp16 = reshape(shape = concat_380x, x = linear_140_cast_fp16)[name = string("op_3799_cast_fp16")]; + tensor q_41_perm_0 = const()[name = string("q_41_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_381x = const()[name = string("concat_381x"), val = tensor([1, -1, 5, 64])]; + tensor var_3802_cast_fp16 = reshape(shape = concat_381x, x = linear_141_cast_fp16)[name = string("op_3802_cast_fp16")]; + tensor k_41_perm_0 = const()[name = string("k_41_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_382x = const()[name = string("concat_382x"), val = tensor([1, -1, 5, 64])]; + tensor var_3805_cast_fp16 = reshape(shape = concat_382x, x = linear_142_cast_fp16)[name = string("op_3805_cast_fp16")]; + tensor v_state_41_perm_0 = const()[name = string("v_state_41_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_41_cast_fp16 = transpose(perm = q_41_perm_0, x = var_3799_cast_fp16)[name = string("transpose_47")]; + tensor var_3809_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3809_cast_fp16")]; + tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; + tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3820_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_3820_cast_fp16")]; + bool var_3822_interleave_0 = const()[name = string("op_3822_interleave_0"), val = bool(false)]; + tensor var_3822_cast_fp16 = concat(axis = var_85, interleave = var_3822_interleave_0, values = (var_3820_cast_fp16, x1_81_cast_fp16))[name = string("op_3822_cast_fp16")]; + tensor var_3823_cast_fp16 = mul(x = var_3822_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3823_cast_fp16")]; + tensor query_states_83_cast_fp16 = add(x = var_3809_cast_fp16, y = var_3823_cast_fp16)[name = string("query_states_83_cast_fp16")]; + tensor k_41_cast_fp16 = transpose(perm = k_41_perm_0, x = var_3802_cast_fp16)[name = string("transpose_46")]; + tensor var_3825_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3825_cast_fp16")]; + tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; + tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3836_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_3836_cast_fp16")]; + bool var_3838_interleave_0 = const()[name = string("op_3838_interleave_0"), val = bool(false)]; + tensor var_3838_cast_fp16 = concat(axis = var_85, interleave = var_3838_interleave_0, values = (var_3836_cast_fp16, x1_83_cast_fp16))[name = string("op_3838_cast_fp16")]; + tensor var_3839_cast_fp16 = mul(x = var_3838_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3839_cast_fp16")]; + tensor k_state_41_cast_fp16 = add(x = var_3825_cast_fp16, y = var_3839_cast_fp16)[name = string("k_state_41_cast_fp16")]; + tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([0])]; + tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; + tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; + tensor concat_385_values0_0 = const()[name = string("concat_385_values0_0"), val = tensor([20])]; + int32 concat_385_axis_0 = const()[name = string("concat_385_axis_0"), val = int32(0)]; + bool concat_385_interleave_0 = const()[name = string("concat_385_interleave_0"), val = bool(false)]; + tensor concat_385 = concat(axis = concat_385_axis_0, interleave = concat_385_interleave_0, values = (concat_385_values0_0, expand_dims_240, expand_dims_241, expand_dims_2, expand_dims_243))[name = string("concat_385")]; + tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_385, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = k_state_41_cast_fp16, x = coreml_update_state_102)[name = string("key_cache_internal_tensor_assign_21_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_21_cast_fp16, input = key_cache)[name = string("coreml_update_state_104_write_state")]; + tensor coreml_update_state_104 = read_state(input = key_cache)[name = string("coreml_update_state_104")]; + tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_41_cast_fp16 = transpose(perm = v_state_41_perm_0, x = var_3805_cast_fp16)[name = string("transpose_45")]; + tensor value_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_385, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = v_state_41_cast_fp16, x = coreml_update_state_103)[name = string("value_cache_internal_tensor_assign_21_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_21_cast_fp16, input = value_cache)[name = string("coreml_update_state_105_write_state")]; + tensor coreml_update_state_105 = read_state(input = value_cache)[name = string("coreml_update_state_105")]; + tensor var_3862_begin_0 = const()[name = string("op_3862_begin_0"), val = tensor([20, 0, 0, 0, 0])]; + tensor var_3862_end_0 = const()[name = string("op_3862_end_0"), val = tensor([21, 1, 5, 2048, 64])]; + tensor var_3862_end_mask_0 = const()[name = string("op_3862_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3862_squeeze_mask_0 = const()[name = string("op_3862_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3862_cast_fp16 = slice_by_index(begin = var_3862_begin_0, end = var_3862_end_0, end_mask = var_3862_end_mask_0, squeeze_mask = var_3862_squeeze_mask_0, x = coreml_update_state_104)[name = string("op_3862_cast_fp16")]; + tensor var_3865_begin_0 = const()[name = string("op_3865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3865_end_mask_0 = const()[name = string("op_3865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3865_cast_fp16 = slice_by_index(begin = var_3865_begin_0, end = concat_11, end_mask = var_3865_end_mask_0, x = var_3862_cast_fp16)[name = string("op_3865_cast_fp16")]; + tensor var_3867_begin_0 = const()[name = string("op_3867_begin_0"), val = tensor([20, 0, 0, 0, 0])]; + tensor var_3867_end_0 = const()[name = string("op_3867_end_0"), val = tensor([21, 1, 5, 2048, 64])]; + tensor var_3867_end_mask_0 = const()[name = string("op_3867_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3867_squeeze_mask_0 = const()[name = string("op_3867_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3867_cast_fp16 = slice_by_index(begin = var_3867_begin_0, end = var_3867_end_0, end_mask = var_3867_end_mask_0, squeeze_mask = var_3867_squeeze_mask_0, x = coreml_update_state_105)[name = string("op_3867_cast_fp16")]; + tensor var_3870_begin_0 = const()[name = string("op_3870_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3870_end_mask_0 = const()[name = string("op_3870_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3870_cast_fp16 = slice_by_index(begin = var_3870_begin_0, end = concat_11, end_mask = var_3870_end_mask_0, x = var_3867_cast_fp16)[name = string("op_3870_cast_fp16")]; + tensor var_3872_shape_cast_fp16 = shape(x = var_3865_cast_fp16)[name = string("op_3872_shape_cast_fp16")]; + int32 gather_373 = const()[name = string("gather_373"), val = int32(1)]; + int32 gather_374 = const()[name = string("gather_374"), val = int32(5)]; + int32 gather_375_axis_0 = const()[name = string("gather_375_axis_0"), val = int32(0)]; + int32 gather_375_batch_dims_0 = const()[name = string("gather_375_batch_dims_0"), val = int32(0)]; + bool gather_375_validate_indices_0 = const()[name = string("gather_375_validate_indices_0"), val = bool(false)]; + string var_3872_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3872_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_375_to_uint16 = const()[name = string("select_375_to_uint16"), val = uint16(2)]; + tensor var_3872_shape_cast_fp16_to_uint16 = cast(dtype = var_3872_shape_cast_fp16_to_uint16_dtype_0, x = var_3872_shape_cast_fp16)[name = string("cast_94")]; + uint16 gather_375_cast_uint16 = gather(axis = gather_375_axis_0, batch_dims = gather_375_batch_dims_0, indices = select_375_to_uint16, validate_indices = gather_375_validate_indices_0, x = var_3872_shape_cast_fp16_to_uint16)[name = string("gather_375_cast_uint16")]; + string gather_375_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_375_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_376 = const()[name = string("gather_376"), val = int32(64)]; + tensor var_3879_axes_0 = const()[name = string("op_3879_axes_0"), val = tensor([2])]; + tensor var_3879_cast_fp16 = expand_dims(axes = var_3879_axes_0, x = var_3865_cast_fp16)[name = string("op_3879_cast_fp16")]; + tensor shape_417_cast_fp16 = shape(x = var_3879_cast_fp16)[name = string("shape_417_cast_fp16")]; + int32 concat_393_axis_0 = const()[name = string("concat_393_axis_0"), val = int32(0)]; + bool concat_393_interleave_0 = const()[name = string("concat_393_interleave_0"), val = bool(false)]; + int32 gather_375_cast_uint16_to_int32 = cast(dtype = gather_375_cast_uint16_to_int32_dtype_0, x = gather_375_cast_uint16)[name = string("cast_93")]; + tensor concat_393 = concat(axis = concat_393_axis_0, interleave = concat_393_interleave_0, values = (gather_373, gather_374, var_89, gather_375_cast_uint16_to_int32, gather_376))[name = string("concat_393")]; + tensor real_div_40 = real_div(x = concat_393, y = shape_417_cast_fp16)[name = string("real_div_40")]; + tensor hidden_states_611_cast_fp16 = tile(reps = real_div_40, x = var_3879_cast_fp16)[name = string("hidden_states_611_cast_fp16")]; + tensor concat_394x = const()[name = string("concat_394x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_83_cast_fp16 = reshape(shape = concat_394x, x = hidden_states_611_cast_fp16)[name = string("key_states_83_cast_fp16")]; + tensor var_3889_shape_cast_fp16 = shape(x = var_3870_cast_fp16)[name = string("op_3889_shape_cast_fp16")]; + int32 gather_377 = const()[name = string("gather_377"), val = int32(1)]; + int32 gather_378 = const()[name = string("gather_378"), val = int32(5)]; + int32 gather_379_axis_0 = const()[name = string("gather_379_axis_0"), val = int32(0)]; + int32 gather_379_batch_dims_0 = const()[name = string("gather_379_batch_dims_0"), val = int32(0)]; + bool gather_379_validate_indices_0 = const()[name = string("gather_379_validate_indices_0"), val = bool(false)]; + string var_3889_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3889_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_379_to_uint16 = const()[name = string("select_379_to_uint16"), val = uint16(2)]; + tensor var_3889_shape_cast_fp16_to_uint16 = cast(dtype = var_3889_shape_cast_fp16_to_uint16_dtype_0, x = var_3889_shape_cast_fp16)[name = string("cast_92")]; + uint16 gather_379_cast_uint16 = gather(axis = gather_379_axis_0, batch_dims = gather_379_batch_dims_0, indices = select_379_to_uint16, validate_indices = gather_379_validate_indices_0, x = var_3889_shape_cast_fp16_to_uint16)[name = string("gather_379_cast_uint16")]; + string gather_379_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_379_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_380 = const()[name = string("gather_380"), val = int32(64)]; + tensor var_3896_axes_0 = const()[name = string("op_3896_axes_0"), val = tensor([2])]; + tensor var_3896_cast_fp16 = expand_dims(axes = var_3896_axes_0, x = var_3870_cast_fp16)[name = string("op_3896_cast_fp16")]; + tensor shape_422_cast_fp16 = shape(x = var_3896_cast_fp16)[name = string("shape_422_cast_fp16")]; + int32 concat_395_axis_0 = const()[name = string("concat_395_axis_0"), val = int32(0)]; + bool concat_395_interleave_0 = const()[name = string("concat_395_interleave_0"), val = bool(false)]; + int32 gather_379_cast_uint16_to_int32 = cast(dtype = gather_379_cast_uint16_to_int32_dtype_0, x = gather_379_cast_uint16)[name = string("cast_91")]; + tensor concat_395 = concat(axis = concat_395_axis_0, interleave = concat_395_interleave_0, values = (gather_377, gather_378, var_89, gather_379_cast_uint16_to_int32, gather_380))[name = string("concat_395")]; + tensor real_div_41 = real_div(x = concat_395, y = shape_422_cast_fp16)[name = string("real_div_41")]; + tensor hidden_states_615_cast_fp16 = tile(reps = real_div_41, x = var_3896_cast_fp16)[name = string("hidden_states_615_cast_fp16")]; + tensor concat_396x = const()[name = string("concat_396x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_83_cast_fp16 = reshape(shape = concat_396x, x = hidden_states_615_cast_fp16)[name = string("value_states_83_cast_fp16")]; + tensor var_3906_shape_cast_fp16 = shape(x = key_states_83_cast_fp16)[name = string("op_3906_shape_cast_fp16")]; + int32 gather_381_axis_0 = const()[name = string("gather_381_axis_0"), val = int32(0)]; + int32 gather_381_batch_dims_0 = const()[name = string("gather_381_batch_dims_0"), val = int32(0)]; + bool gather_381_validate_indices_0 = const()[name = string("gather_381_validate_indices_0"), val = bool(false)]; + string var_3906_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3906_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_381_to_uint16 = const()[name = string("select_381_to_uint16"), val = uint16(2)]; + tensor var_3906_shape_cast_fp16_to_uint16 = cast(dtype = var_3906_shape_cast_fp16_to_uint16_dtype_0, x = var_3906_shape_cast_fp16)[name = string("cast_90")]; + uint16 gather_381_cast_uint16 = gather(axis = gather_381_axis_0, batch_dims = gather_381_batch_dims_0, indices = select_381_to_uint16, validate_indices = gather_381_validate_indices_0, x = var_3906_shape_cast_fp16_to_uint16)[name = string("gather_381_cast_uint16")]; + string gather_381_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_381_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_397_values0_0 = const()[name = string("concat_397_values0_0"), val = int32(1)]; + int32 concat_397_values1_0 = const()[name = string("concat_397_values1_0"), val = int32(1)]; + int32 concat_397_values2_0 = const()[name = string("concat_397_values2_0"), val = int32(0)]; + int32 concat_397_axis_0 = const()[name = string("concat_397_axis_0"), val = int32(0)]; + bool concat_397_interleave_0 = const()[name = string("concat_397_interleave_0"), val = bool(false)]; + int32 gather_381_cast_uint16_to_int32 = cast(dtype = gather_381_cast_uint16_to_int32_dtype_0, x = gather_381_cast_uint16)[name = string("cast_89")]; + tensor concat_397 = concat(axis = concat_397_axis_0, interleave = concat_397_interleave_0, values = (concat_397_values0_0, concat_397_values1_0, concat_397_values2_0, gather_381_cast_uint16_to_int32))[name = string("concat_397")]; + tensor causal_mask_43_begin_0 = const()[name = string("causal_mask_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_43_end_mask_0 = const()[name = string("causal_mask_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_43_cast_fp16 = slice_by_index(begin = causal_mask_43_begin_0, end = concat_397, end_mask = causal_mask_43_end_mask_0, x = causal_mask)[name = string("causal_mask_43_cast_fp16")]; + tensor attn_output_81_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_43_cast_fp16, key = key_states_83_cast_fp16, query = query_states_83_cast_fp16, value = value_states_83_cast_fp16)[name = string("attn_output_81_cast_fp16")]; + tensor var_3912_perm_0 = const()[name = string("op_3912_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_398_axis_0 = const()[name = string("concat_398_axis_0"), val = int32(0)]; + bool concat_398_interleave_0 = const()[name = string("concat_398_interleave_0"), val = bool(false)]; + int32 gather_365_cast_uint16_to_int32 = cast(dtype = gather_365_cast_uint16_to_int32_dtype_0, x = gather_365_cast_uint16)[name = string("cast_88")]; + tensor concat_398 = concat(axis = concat_398_axis_0, interleave = concat_398_interleave_0, values = (gather_364, gather_365_cast_uint16_to_int32, var_85))[name = string("concat_398")]; + tensor var_3912_cast_fp16 = transpose(perm = var_3912_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_44")]; + tensor input_161_cast_fp16 = reshape(shape = concat_398, x = var_3912_cast_fp16)[name = string("input_161_cast_fp16")]; + tensor model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138105920))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138566784))))[name = string("model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_143_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized, x = input_161_cast_fp16)[name = string("linear_143_cast_fp16")]; + tensor hidden_states_619_cast_fp16 = add(x = hidden_states_599_cast_fp16, y = linear_143_cast_fp16)[name = string("hidden_states_619_cast_fp16")]; + fp16 var_80_promoted_41_to_fp16 = const()[name = string("op_80_promoted_41_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3921_cast_fp16 = pow(x = hidden_states_619_cast_fp16, y = var_80_promoted_41_to_fp16)[name = string("op_3921_cast_fp16")]; + tensor variance_83_axes_0 = const()[name = string("variance_83_axes_0"), val = tensor([-1])]; + bool variance_83_keep_dims_0 = const()[name = string("variance_83_keep_dims_0"), val = bool(true)]; + tensor variance_83_cast_fp16 = reduce_mean(axes = variance_83_axes_0, keep_dims = variance_83_keep_dims_0, x = var_3921_cast_fp16)[name = string("variance_83_cast_fp16")]; + fp16 var_3924_to_fp16 = const()[name = string("op_3924_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3925_cast_fp16 = add(x = variance_83_cast_fp16, y = var_3924_to_fp16)[name = string("op_3925_cast_fp16")]; + fp32 var_3926_epsilon_0 = const()[name = string("op_3926_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3926_cast_fp16 = rsqrt(epsilon = var_3926_epsilon_0, x = var_3925_cast_fp16)[name = string("op_3926_cast_fp16")]; + tensor hidden_states_623_cast_fp16 = mul(x = hidden_states_619_cast_fp16, y = var_3926_cast_fp16)[name = string("hidden_states_623_cast_fp16")]; + tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138624448)))]; + tensor input_163_cast_fp16 = mul(x = model_model_layers_20_post_attention_layernorm_weight_to_fp16, y = hidden_states_623_cast_fp16)[name = string("input_163_cast_fp16")]; + tensor model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138626432))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139855296))))[name = string("model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_144_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized, x = input_163_cast_fp16)[name = string("linear_144_cast_fp16")]; + tensor var_3938_cast_fp16 = silu(x = linear_144_cast_fp16)[name = string("op_3938_cast_fp16")]; + tensor model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140008960))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141237824))))[name = string("model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_145_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized, x = input_163_cast_fp16)[name = string("linear_145_cast_fp16")]; + tensor input_167_cast_fp16 = mul(x = var_3938_cast_fp16, y = linear_145_cast_fp16)[name = string("input_167_cast_fp16")]; + tensor model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141391488))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142620352))))[name = string("model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_146_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized, x = input_167_cast_fp16)[name = string("linear_146_cast_fp16")]; + tensor hidden_states_629_cast_fp16 = add(x = hidden_states_619_cast_fp16, y = linear_146_cast_fp16)[name = string("hidden_states_629_cast_fp16")]; + fp16 var_80_promoted_42_to_fp16 = const()[name = string("op_80_promoted_42_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3951_cast_fp16 = pow(x = hidden_states_629_cast_fp16, y = var_80_promoted_42_to_fp16)[name = string("op_3951_cast_fp16")]; + tensor variance_85_axes_0 = const()[name = string("variance_85_axes_0"), val = tensor([-1])]; + bool variance_85_keep_dims_0 = const()[name = string("variance_85_keep_dims_0"), val = bool(true)]; + tensor variance_85_cast_fp16 = reduce_mean(axes = variance_85_axes_0, keep_dims = variance_85_keep_dims_0, x = var_3951_cast_fp16)[name = string("variance_85_cast_fp16")]; + fp16 var_3954_to_fp16 = const()[name = string("op_3954_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3955_cast_fp16 = add(x = variance_85_cast_fp16, y = var_3954_to_fp16)[name = string("op_3955_cast_fp16")]; + fp32 var_3956_epsilon_0 = const()[name = string("op_3956_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3956_cast_fp16 = rsqrt(epsilon = var_3956_epsilon_0, x = var_3955_cast_fp16)[name = string("op_3956_cast_fp16")]; + tensor hidden_states_633_cast_fp16 = mul(x = hidden_states_629_cast_fp16, y = var_3956_cast_fp16)[name = string("hidden_states_633_cast_fp16")]; + tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142774016)))]; + tensor hidden_states_637_cast_fp16 = mul(x = model_model_layers_21_input_layernorm_weight_to_fp16, y = hidden_states_633_cast_fp16)[name = string("hidden_states_637_cast_fp16")]; + tensor var_3967_shape_cast_fp16 = shape(x = hidden_states_637_cast_fp16)[name = string("op_3967_shape_cast_fp16")]; + int32 gather_382 = const()[name = string("gather_382"), val = int32(1)]; + int32 gather_383_axis_0 = const()[name = string("gather_383_axis_0"), val = int32(0)]; + int32 gather_383_batch_dims_0 = const()[name = string("gather_383_batch_dims_0"), val = int32(0)]; + bool gather_383_validate_indices_0 = const()[name = string("gather_383_validate_indices_0"), val = bool(false)]; + string var_3967_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3967_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_383_to_uint16 = const()[name = string("select_383_to_uint16"), val = uint16(1)]; + tensor var_3967_shape_cast_fp16_to_uint16 = cast(dtype = var_3967_shape_cast_fp16_to_uint16_dtype_0, x = var_3967_shape_cast_fp16)[name = string("cast_87")]; + uint16 gather_383_cast_uint16 = gather(axis = gather_383_axis_0, batch_dims = gather_383_batch_dims_0, indices = select_383_to_uint16, validate_indices = gather_383_validate_indices_0, x = var_3967_shape_cast_fp16_to_uint16)[name = string("gather_383_cast_uint16")]; + string gather_383_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_383_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142776000))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143236864))))[name = string("model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_147_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_637_cast_fp16)[name = string("linear_147_cast_fp16")]; + tensor model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143294528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143448192))))[name = string("model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_148_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_637_cast_fp16)[name = string("linear_148_cast_fp16")]; + tensor model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143467456))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143621120))))[name = string("model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_149_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_637_cast_fp16)[name = string("linear_149_cast_fp16")]; + tensor concat_399x = const()[name = string("concat_399x"), val = tensor([1, -1, 15, 64])]; + tensor var_3976_cast_fp16 = reshape(shape = concat_399x, x = linear_147_cast_fp16)[name = string("op_3976_cast_fp16")]; + tensor q_43_perm_0 = const()[name = string("q_43_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_400x = const()[name = string("concat_400x"), val = tensor([1, -1, 5, 64])]; + tensor var_3979_cast_fp16 = reshape(shape = concat_400x, x = linear_148_cast_fp16)[name = string("op_3979_cast_fp16")]; + tensor k_43_perm_0 = const()[name = string("k_43_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_401x = const()[name = string("concat_401x"), val = tensor([1, -1, 5, 64])]; + tensor var_3982_cast_fp16 = reshape(shape = concat_401x, x = linear_149_cast_fp16)[name = string("op_3982_cast_fp16")]; + tensor v_state_43_perm_0 = const()[name = string("v_state_43_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_43_cast_fp16 = transpose(perm = q_43_perm_0, x = var_3976_cast_fp16)[name = string("transpose_43")]; + tensor var_3986_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3986_cast_fp16")]; + tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; + tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; + fp16 const_45_promoted_to_fp16 = const()[name = string("const_45_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3997_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_45_promoted_to_fp16)[name = string("op_3997_cast_fp16")]; + bool var_3999_interleave_0 = const()[name = string("op_3999_interleave_0"), val = bool(false)]; + tensor var_3999_cast_fp16 = concat(axis = var_85, interleave = var_3999_interleave_0, values = (var_3997_cast_fp16, x1_85_cast_fp16))[name = string("op_3999_cast_fp16")]; + tensor var_4000_cast_fp16 = mul(x = var_3999_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4000_cast_fp16")]; + tensor query_states_87_cast_fp16 = add(x = var_3986_cast_fp16, y = var_4000_cast_fp16)[name = string("query_states_87_cast_fp16")]; + tensor k_43_cast_fp16 = transpose(perm = k_43_perm_0, x = var_3979_cast_fp16)[name = string("transpose_42")]; + tensor var_4002_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4002_cast_fp16")]; + tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; + tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; + fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4013_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_4013_cast_fp16")]; + bool var_4015_interleave_0 = const()[name = string("op_4015_interleave_0"), val = bool(false)]; + tensor var_4015_cast_fp16 = concat(axis = var_85, interleave = var_4015_interleave_0, values = (var_4013_cast_fp16, x1_87_cast_fp16))[name = string("op_4015_cast_fp16")]; + tensor var_4016_cast_fp16 = mul(x = var_4015_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4016_cast_fp16")]; + tensor k_state_43_cast_fp16 = add(x = var_4002_cast_fp16, y = var_4016_cast_fp16)[name = string("k_state_43_cast_fp16")]; + tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([0])]; + tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; + tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; + tensor concat_404_values0_0 = const()[name = string("concat_404_values0_0"), val = tensor([21])]; + int32 concat_404_axis_0 = const()[name = string("concat_404_axis_0"), val = int32(0)]; + bool concat_404_interleave_0 = const()[name = string("concat_404_interleave_0"), val = bool(false)]; + tensor concat_404 = concat(axis = concat_404_axis_0, interleave = concat_404_interleave_0, values = (concat_404_values0_0, expand_dims_252, expand_dims_253, expand_dims_2, expand_dims_255))[name = string("concat_404")]; + tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_404, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = k_state_43_cast_fp16, x = coreml_update_state_104)[name = string("key_cache_internal_tensor_assign_22_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_22_cast_fp16, input = key_cache)[name = string("coreml_update_state_106_write_state")]; + tensor coreml_update_state_106 = read_state(input = key_cache)[name = string("coreml_update_state_106")]; + tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_43_cast_fp16 = transpose(perm = v_state_43_perm_0, x = var_3982_cast_fp16)[name = string("transpose_41")]; + tensor value_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_404, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = v_state_43_cast_fp16, x = coreml_update_state_105)[name = string("value_cache_internal_tensor_assign_22_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_22_cast_fp16, input = value_cache)[name = string("coreml_update_state_107_write_state")]; + tensor coreml_update_state_107 = read_state(input = value_cache)[name = string("coreml_update_state_107")]; + tensor var_4039_begin_0 = const()[name = string("op_4039_begin_0"), val = tensor([21, 0, 0, 0, 0])]; + tensor var_4039_end_0 = const()[name = string("op_4039_end_0"), val = tensor([22, 1, 5, 2048, 64])]; + tensor var_4039_end_mask_0 = const()[name = string("op_4039_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4039_squeeze_mask_0 = const()[name = string("op_4039_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4039_cast_fp16 = slice_by_index(begin = var_4039_begin_0, end = var_4039_end_0, end_mask = var_4039_end_mask_0, squeeze_mask = var_4039_squeeze_mask_0, x = coreml_update_state_106)[name = string("op_4039_cast_fp16")]; + tensor var_4042_begin_0 = const()[name = string("op_4042_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4042_end_mask_0 = const()[name = string("op_4042_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4042_cast_fp16 = slice_by_index(begin = var_4042_begin_0, end = concat_11, end_mask = var_4042_end_mask_0, x = var_4039_cast_fp16)[name = string("op_4042_cast_fp16")]; + tensor var_4044_begin_0 = const()[name = string("op_4044_begin_0"), val = tensor([21, 0, 0, 0, 0])]; + tensor var_4044_end_0 = const()[name = string("op_4044_end_0"), val = tensor([22, 1, 5, 2048, 64])]; + tensor var_4044_end_mask_0 = const()[name = string("op_4044_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4044_squeeze_mask_0 = const()[name = string("op_4044_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4044_cast_fp16 = slice_by_index(begin = var_4044_begin_0, end = var_4044_end_0, end_mask = var_4044_end_mask_0, squeeze_mask = var_4044_squeeze_mask_0, x = coreml_update_state_107)[name = string("op_4044_cast_fp16")]; + tensor var_4047_begin_0 = const()[name = string("op_4047_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4047_end_mask_0 = const()[name = string("op_4047_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4047_cast_fp16 = slice_by_index(begin = var_4047_begin_0, end = concat_11, end_mask = var_4047_end_mask_0, x = var_4044_cast_fp16)[name = string("op_4047_cast_fp16")]; + tensor var_4049_shape_cast_fp16 = shape(x = var_4042_cast_fp16)[name = string("op_4049_shape_cast_fp16")]; + int32 gather_391 = const()[name = string("gather_391"), val = int32(1)]; + int32 gather_392 = const()[name = string("gather_392"), val = int32(5)]; + int32 gather_393_axis_0 = const()[name = string("gather_393_axis_0"), val = int32(0)]; + int32 gather_393_batch_dims_0 = const()[name = string("gather_393_batch_dims_0"), val = int32(0)]; + bool gather_393_validate_indices_0 = const()[name = string("gather_393_validate_indices_0"), val = bool(false)]; + string var_4049_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4049_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_393_to_uint16 = const()[name = string("select_393_to_uint16"), val = uint16(2)]; + tensor var_4049_shape_cast_fp16_to_uint16 = cast(dtype = var_4049_shape_cast_fp16_to_uint16_dtype_0, x = var_4049_shape_cast_fp16)[name = string("cast_86")]; + uint16 gather_393_cast_uint16 = gather(axis = gather_393_axis_0, batch_dims = gather_393_batch_dims_0, indices = select_393_to_uint16, validate_indices = gather_393_validate_indices_0, x = var_4049_shape_cast_fp16_to_uint16)[name = string("gather_393_cast_uint16")]; + string gather_393_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_393_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_394 = const()[name = string("gather_394"), val = int32(64)]; + tensor var_4056_axes_0 = const()[name = string("op_4056_axes_0"), val = tensor([2])]; + tensor var_4056_cast_fp16 = expand_dims(axes = var_4056_axes_0, x = var_4042_cast_fp16)[name = string("op_4056_cast_fp16")]; + tensor shape_437_cast_fp16 = shape(x = var_4056_cast_fp16)[name = string("shape_437_cast_fp16")]; + int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)]; + bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)]; + int32 gather_393_cast_uint16_to_int32 = cast(dtype = gather_393_cast_uint16_to_int32_dtype_0, x = gather_393_cast_uint16)[name = string("cast_85")]; + tensor concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (gather_391, gather_392, var_89, gather_393_cast_uint16_to_int32, gather_394))[name = string("concat_412")]; + tensor real_div_42 = real_div(x = concat_412, y = shape_437_cast_fp16)[name = string("real_div_42")]; + tensor hidden_states_641_cast_fp16 = tile(reps = real_div_42, x = var_4056_cast_fp16)[name = string("hidden_states_641_cast_fp16")]; + tensor concat_413x = const()[name = string("concat_413x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_87_cast_fp16 = reshape(shape = concat_413x, x = hidden_states_641_cast_fp16)[name = string("key_states_87_cast_fp16")]; + tensor var_4066_shape_cast_fp16 = shape(x = var_4047_cast_fp16)[name = string("op_4066_shape_cast_fp16")]; + int32 gather_395 = const()[name = string("gather_395"), val = int32(1)]; + int32 gather_396 = const()[name = string("gather_396"), val = int32(5)]; + int32 gather_397_axis_0 = const()[name = string("gather_397_axis_0"), val = int32(0)]; + int32 gather_397_batch_dims_0 = const()[name = string("gather_397_batch_dims_0"), val = int32(0)]; + bool gather_397_validate_indices_0 = const()[name = string("gather_397_validate_indices_0"), val = bool(false)]; + string var_4066_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4066_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_397_to_uint16 = const()[name = string("select_397_to_uint16"), val = uint16(2)]; + tensor var_4066_shape_cast_fp16_to_uint16 = cast(dtype = var_4066_shape_cast_fp16_to_uint16_dtype_0, x = var_4066_shape_cast_fp16)[name = string("cast_84")]; + uint16 gather_397_cast_uint16 = gather(axis = gather_397_axis_0, batch_dims = gather_397_batch_dims_0, indices = select_397_to_uint16, validate_indices = gather_397_validate_indices_0, x = var_4066_shape_cast_fp16_to_uint16)[name = string("gather_397_cast_uint16")]; + string gather_397_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_397_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_398 = const()[name = string("gather_398"), val = int32(64)]; + tensor var_4073_axes_0 = const()[name = string("op_4073_axes_0"), val = tensor([2])]; + tensor var_4073_cast_fp16 = expand_dims(axes = var_4073_axes_0, x = var_4047_cast_fp16)[name = string("op_4073_cast_fp16")]; + tensor shape_442_cast_fp16 = shape(x = var_4073_cast_fp16)[name = string("shape_442_cast_fp16")]; + int32 concat_414_axis_0 = const()[name = string("concat_414_axis_0"), val = int32(0)]; + bool concat_414_interleave_0 = const()[name = string("concat_414_interleave_0"), val = bool(false)]; + int32 gather_397_cast_uint16_to_int32 = cast(dtype = gather_397_cast_uint16_to_int32_dtype_0, x = gather_397_cast_uint16)[name = string("cast_83")]; + tensor concat_414 = concat(axis = concat_414_axis_0, interleave = concat_414_interleave_0, values = (gather_395, gather_396, var_89, gather_397_cast_uint16_to_int32, gather_398))[name = string("concat_414")]; + tensor real_div_43 = real_div(x = concat_414, y = shape_442_cast_fp16)[name = string("real_div_43")]; + tensor hidden_states_645_cast_fp16 = tile(reps = real_div_43, x = var_4073_cast_fp16)[name = string("hidden_states_645_cast_fp16")]; + tensor concat_415x = const()[name = string("concat_415x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_87_cast_fp16 = reshape(shape = concat_415x, x = hidden_states_645_cast_fp16)[name = string("value_states_87_cast_fp16")]; + tensor var_4083_shape_cast_fp16 = shape(x = key_states_87_cast_fp16)[name = string("op_4083_shape_cast_fp16")]; + int32 gather_399_axis_0 = const()[name = string("gather_399_axis_0"), val = int32(0)]; + int32 gather_399_batch_dims_0 = const()[name = string("gather_399_batch_dims_0"), val = int32(0)]; + bool gather_399_validate_indices_0 = const()[name = string("gather_399_validate_indices_0"), val = bool(false)]; + string var_4083_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4083_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_399_to_uint16 = const()[name = string("select_399_to_uint16"), val = uint16(2)]; + tensor var_4083_shape_cast_fp16_to_uint16 = cast(dtype = var_4083_shape_cast_fp16_to_uint16_dtype_0, x = var_4083_shape_cast_fp16)[name = string("cast_82")]; + uint16 gather_399_cast_uint16 = gather(axis = gather_399_axis_0, batch_dims = gather_399_batch_dims_0, indices = select_399_to_uint16, validate_indices = gather_399_validate_indices_0, x = var_4083_shape_cast_fp16_to_uint16)[name = string("gather_399_cast_uint16")]; + string gather_399_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_399_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_416_values0_0 = const()[name = string("concat_416_values0_0"), val = int32(1)]; + int32 concat_416_values1_0 = const()[name = string("concat_416_values1_0"), val = int32(1)]; + int32 concat_416_values2_0 = const()[name = string("concat_416_values2_0"), val = int32(0)]; + int32 concat_416_axis_0 = const()[name = string("concat_416_axis_0"), val = int32(0)]; + bool concat_416_interleave_0 = const()[name = string("concat_416_interleave_0"), val = bool(false)]; + int32 gather_399_cast_uint16_to_int32 = cast(dtype = gather_399_cast_uint16_to_int32_dtype_0, x = gather_399_cast_uint16)[name = string("cast_81")]; + tensor concat_416 = concat(axis = concat_416_axis_0, interleave = concat_416_interleave_0, values = (concat_416_values0_0, concat_416_values1_0, concat_416_values2_0, gather_399_cast_uint16_to_int32))[name = string("concat_416")]; + tensor causal_mask_45_begin_0 = const()[name = string("causal_mask_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_45_end_mask_0 = const()[name = string("causal_mask_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_45_cast_fp16 = slice_by_index(begin = causal_mask_45_begin_0, end = concat_416, end_mask = causal_mask_45_end_mask_0, x = causal_mask)[name = string("causal_mask_45_cast_fp16")]; + tensor attn_output_85_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_45_cast_fp16, key = key_states_87_cast_fp16, query = query_states_87_cast_fp16, value = value_states_87_cast_fp16)[name = string("attn_output_85_cast_fp16")]; + tensor var_4089_perm_0 = const()[name = string("op_4089_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_417_axis_0 = const()[name = string("concat_417_axis_0"), val = int32(0)]; + bool concat_417_interleave_0 = const()[name = string("concat_417_interleave_0"), val = bool(false)]; + int32 gather_383_cast_uint16_to_int32 = cast(dtype = gather_383_cast_uint16_to_int32_dtype_0, x = gather_383_cast_uint16)[name = string("cast_80")]; + tensor concat_417 = concat(axis = concat_417_axis_0, interleave = concat_417_interleave_0, values = (gather_382, gather_383_cast_uint16_to_int32, var_85))[name = string("concat_417")]; + tensor var_4089_cast_fp16 = transpose(perm = var_4089_perm_0, x = attn_output_85_cast_fp16)[name = string("transpose_40")]; + tensor input_169_cast_fp16 = reshape(shape = concat_417, x = var_4089_cast_fp16)[name = string("input_169_cast_fp16")]; + tensor model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143640384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144101248))))[name = string("model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_150_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized, x = input_169_cast_fp16)[name = string("linear_150_cast_fp16")]; + tensor hidden_states_649_cast_fp16 = add(x = hidden_states_629_cast_fp16, y = linear_150_cast_fp16)[name = string("hidden_states_649_cast_fp16")]; + fp16 var_80_promoted_43_to_fp16 = const()[name = string("op_80_promoted_43_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4098_cast_fp16 = pow(x = hidden_states_649_cast_fp16, y = var_80_promoted_43_to_fp16)[name = string("op_4098_cast_fp16")]; + tensor variance_87_axes_0 = const()[name = string("variance_87_axes_0"), val = tensor([-1])]; + bool variance_87_keep_dims_0 = const()[name = string("variance_87_keep_dims_0"), val = bool(true)]; + tensor variance_87_cast_fp16 = reduce_mean(axes = variance_87_axes_0, keep_dims = variance_87_keep_dims_0, x = var_4098_cast_fp16)[name = string("variance_87_cast_fp16")]; + fp16 var_4101_to_fp16 = const()[name = string("op_4101_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4102_cast_fp16 = add(x = variance_87_cast_fp16, y = var_4101_to_fp16)[name = string("op_4102_cast_fp16")]; + fp32 var_4103_epsilon_0 = const()[name = string("op_4103_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4103_cast_fp16 = rsqrt(epsilon = var_4103_epsilon_0, x = var_4102_cast_fp16)[name = string("op_4103_cast_fp16")]; + tensor hidden_states_653_cast_fp16 = mul(x = hidden_states_649_cast_fp16, y = var_4103_cast_fp16)[name = string("hidden_states_653_cast_fp16")]; + tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144158912)))]; + tensor input_171_cast_fp16 = mul(x = model_model_layers_21_post_attention_layernorm_weight_to_fp16, y = hidden_states_653_cast_fp16)[name = string("input_171_cast_fp16")]; + tensor model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144160896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145389760))))[name = string("model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_151_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized, x = input_171_cast_fp16)[name = string("linear_151_cast_fp16")]; + tensor var_4115_cast_fp16 = silu(x = linear_151_cast_fp16)[name = string("op_4115_cast_fp16")]; + tensor model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145543424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146772288))))[name = string("model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_152_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized, x = input_171_cast_fp16)[name = string("linear_152_cast_fp16")]; + tensor input_175_cast_fp16 = mul(x = var_4115_cast_fp16, y = linear_152_cast_fp16)[name = string("input_175_cast_fp16")]; + tensor model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146925952))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148154816))))[name = string("model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_153_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized, x = input_175_cast_fp16)[name = string("linear_153_cast_fp16")]; + tensor hidden_states_659_cast_fp16 = add(x = hidden_states_649_cast_fp16, y = linear_153_cast_fp16)[name = string("hidden_states_659_cast_fp16")]; + fp16 var_80_promoted_44_to_fp16 = const()[name = string("op_80_promoted_44_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4128_cast_fp16 = pow(x = hidden_states_659_cast_fp16, y = var_80_promoted_44_to_fp16)[name = string("op_4128_cast_fp16")]; + tensor variance_89_axes_0 = const()[name = string("variance_89_axes_0"), val = tensor([-1])]; + bool variance_89_keep_dims_0 = const()[name = string("variance_89_keep_dims_0"), val = bool(true)]; + tensor variance_89_cast_fp16 = reduce_mean(axes = variance_89_axes_0, keep_dims = variance_89_keep_dims_0, x = var_4128_cast_fp16)[name = string("variance_89_cast_fp16")]; + fp16 var_4131_to_fp16 = const()[name = string("op_4131_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4132_cast_fp16 = add(x = variance_89_cast_fp16, y = var_4131_to_fp16)[name = string("op_4132_cast_fp16")]; + fp32 var_4133_epsilon_0 = const()[name = string("op_4133_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4133_cast_fp16 = rsqrt(epsilon = var_4133_epsilon_0, x = var_4132_cast_fp16)[name = string("op_4133_cast_fp16")]; + tensor hidden_states_663_cast_fp16 = mul(x = hidden_states_659_cast_fp16, y = var_4133_cast_fp16)[name = string("hidden_states_663_cast_fp16")]; + tensor model_model_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148308480)))]; + tensor hidden_states_667_cast_fp16 = mul(x = model_model_layers_22_input_layernorm_weight_to_fp16, y = hidden_states_663_cast_fp16)[name = string("hidden_states_667_cast_fp16")]; + tensor var_4144_shape_cast_fp16 = shape(x = hidden_states_667_cast_fp16)[name = string("op_4144_shape_cast_fp16")]; + int32 gather_400 = const()[name = string("gather_400"), val = int32(1)]; + int32 gather_401_axis_0 = const()[name = string("gather_401_axis_0"), val = int32(0)]; + int32 gather_401_batch_dims_0 = const()[name = string("gather_401_batch_dims_0"), val = int32(0)]; + bool gather_401_validate_indices_0 = const()[name = string("gather_401_validate_indices_0"), val = bool(false)]; + string var_4144_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4144_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_401_to_uint16 = const()[name = string("select_401_to_uint16"), val = uint16(1)]; + tensor var_4144_shape_cast_fp16_to_uint16 = cast(dtype = var_4144_shape_cast_fp16_to_uint16_dtype_0, x = var_4144_shape_cast_fp16)[name = string("cast_79")]; + uint16 gather_401_cast_uint16 = gather(axis = gather_401_axis_0, batch_dims = gather_401_batch_dims_0, indices = select_401_to_uint16, validate_indices = gather_401_validate_indices_0, x = var_4144_shape_cast_fp16_to_uint16)[name = string("gather_401_cast_uint16")]; + string gather_401_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_401_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148310464))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148771328))))[name = string("model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_154_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_667_cast_fp16)[name = string("linear_154_cast_fp16")]; + tensor model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148828992))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148982656))))[name = string("model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_155_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_667_cast_fp16)[name = string("linear_155_cast_fp16")]; + tensor model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149001920))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149155584))))[name = string("model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_156_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_667_cast_fp16)[name = string("linear_156_cast_fp16")]; + tensor concat_418x = const()[name = string("concat_418x"), val = tensor([1, -1, 15, 64])]; + tensor var_4153_cast_fp16 = reshape(shape = concat_418x, x = linear_154_cast_fp16)[name = string("op_4153_cast_fp16")]; + tensor q_45_perm_0 = const()[name = string("q_45_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_419x = const()[name = string("concat_419x"), val = tensor([1, -1, 5, 64])]; + tensor var_4156_cast_fp16 = reshape(shape = concat_419x, x = linear_155_cast_fp16)[name = string("op_4156_cast_fp16")]; + tensor k_45_perm_0 = const()[name = string("k_45_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_420x = const()[name = string("concat_420x"), val = tensor([1, -1, 5, 64])]; + tensor var_4159_cast_fp16 = reshape(shape = concat_420x, x = linear_156_cast_fp16)[name = string("op_4159_cast_fp16")]; + tensor v_state_45_perm_0 = const()[name = string("v_state_45_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_45_cast_fp16 = transpose(perm = q_45_perm_0, x = var_4153_cast_fp16)[name = string("transpose_39")]; + tensor var_4163_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4163_cast_fp16")]; + tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; + tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4174_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_47_promoted_to_fp16)[name = string("op_4174_cast_fp16")]; + bool var_4176_interleave_0 = const()[name = string("op_4176_interleave_0"), val = bool(false)]; + tensor var_4176_cast_fp16 = concat(axis = var_85, interleave = var_4176_interleave_0, values = (var_4174_cast_fp16, x1_89_cast_fp16))[name = string("op_4176_cast_fp16")]; + tensor var_4177_cast_fp16 = mul(x = var_4176_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4177_cast_fp16")]; + tensor query_states_91_cast_fp16 = add(x = var_4163_cast_fp16, y = var_4177_cast_fp16)[name = string("query_states_91_cast_fp16")]; + tensor k_45_cast_fp16 = transpose(perm = k_45_perm_0, x = var_4156_cast_fp16)[name = string("transpose_38")]; + tensor var_4179_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4179_cast_fp16")]; + tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; + tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4190_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_4190_cast_fp16")]; + bool var_4192_interleave_0 = const()[name = string("op_4192_interleave_0"), val = bool(false)]; + tensor var_4192_cast_fp16 = concat(axis = var_85, interleave = var_4192_interleave_0, values = (var_4190_cast_fp16, x1_91_cast_fp16))[name = string("op_4192_cast_fp16")]; + tensor var_4193_cast_fp16 = mul(x = var_4192_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4193_cast_fp16")]; + tensor k_state_45_cast_fp16 = add(x = var_4179_cast_fp16, y = var_4193_cast_fp16)[name = string("k_state_45_cast_fp16")]; + tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([0])]; + tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; + tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; + tensor concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor([22])]; + int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)]; + bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)]; + tensor concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, expand_dims_264, expand_dims_265, expand_dims_2, expand_dims_267))[name = string("concat_423")]; + tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_423, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = k_state_45_cast_fp16, x = coreml_update_state_106)[name = string("key_cache_internal_tensor_assign_23_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_23_cast_fp16, input = key_cache)[name = string("coreml_update_state_108_write_state")]; + tensor coreml_update_state_108 = read_state(input = key_cache)[name = string("coreml_update_state_108")]; + tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_45_cast_fp16 = transpose(perm = v_state_45_perm_0, x = var_4159_cast_fp16)[name = string("transpose_37")]; + tensor value_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_423, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = v_state_45_cast_fp16, x = coreml_update_state_107)[name = string("value_cache_internal_tensor_assign_23_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_23_cast_fp16, input = value_cache)[name = string("coreml_update_state_109_write_state")]; + tensor coreml_update_state_109 = read_state(input = value_cache)[name = string("coreml_update_state_109")]; + tensor var_4216_begin_0 = const()[name = string("op_4216_begin_0"), val = tensor([22, 0, 0, 0, 0])]; + tensor var_4216_end_0 = const()[name = string("op_4216_end_0"), val = tensor([23, 1, 5, 2048, 64])]; + tensor var_4216_end_mask_0 = const()[name = string("op_4216_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4216_squeeze_mask_0 = const()[name = string("op_4216_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4216_cast_fp16 = slice_by_index(begin = var_4216_begin_0, end = var_4216_end_0, end_mask = var_4216_end_mask_0, squeeze_mask = var_4216_squeeze_mask_0, x = coreml_update_state_108)[name = string("op_4216_cast_fp16")]; + tensor var_4219_begin_0 = const()[name = string("op_4219_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4219_end_mask_0 = const()[name = string("op_4219_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4219_cast_fp16 = slice_by_index(begin = var_4219_begin_0, end = concat_11, end_mask = var_4219_end_mask_0, x = var_4216_cast_fp16)[name = string("op_4219_cast_fp16")]; + tensor var_4221_begin_0 = const()[name = string("op_4221_begin_0"), val = tensor([22, 0, 0, 0, 0])]; + tensor var_4221_end_0 = const()[name = string("op_4221_end_0"), val = tensor([23, 1, 5, 2048, 64])]; + tensor var_4221_end_mask_0 = const()[name = string("op_4221_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4221_squeeze_mask_0 = const()[name = string("op_4221_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4221_cast_fp16 = slice_by_index(begin = var_4221_begin_0, end = var_4221_end_0, end_mask = var_4221_end_mask_0, squeeze_mask = var_4221_squeeze_mask_0, x = coreml_update_state_109)[name = string("op_4221_cast_fp16")]; + tensor var_4224_begin_0 = const()[name = string("op_4224_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4224_end_mask_0 = const()[name = string("op_4224_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4224_cast_fp16 = slice_by_index(begin = var_4224_begin_0, end = concat_11, end_mask = var_4224_end_mask_0, x = var_4221_cast_fp16)[name = string("op_4224_cast_fp16")]; + tensor var_4226_shape_cast_fp16 = shape(x = var_4219_cast_fp16)[name = string("op_4226_shape_cast_fp16")]; + int32 gather_409 = const()[name = string("gather_409"), val = int32(1)]; + int32 gather_410 = const()[name = string("gather_410"), val = int32(5)]; + int32 gather_411_axis_0 = const()[name = string("gather_411_axis_0"), val = int32(0)]; + int32 gather_411_batch_dims_0 = const()[name = string("gather_411_batch_dims_0"), val = int32(0)]; + bool gather_411_validate_indices_0 = const()[name = string("gather_411_validate_indices_0"), val = bool(false)]; + string var_4226_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4226_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_411_to_uint16 = const()[name = string("select_411_to_uint16"), val = uint16(2)]; + tensor var_4226_shape_cast_fp16_to_uint16 = cast(dtype = var_4226_shape_cast_fp16_to_uint16_dtype_0, x = var_4226_shape_cast_fp16)[name = string("cast_78")]; + uint16 gather_411_cast_uint16 = gather(axis = gather_411_axis_0, batch_dims = gather_411_batch_dims_0, indices = select_411_to_uint16, validate_indices = gather_411_validate_indices_0, x = var_4226_shape_cast_fp16_to_uint16)[name = string("gather_411_cast_uint16")]; + string gather_411_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_411_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_412 = const()[name = string("gather_412"), val = int32(64)]; + tensor var_4233_axes_0 = const()[name = string("op_4233_axes_0"), val = tensor([2])]; + tensor var_4233_cast_fp16 = expand_dims(axes = var_4233_axes_0, x = var_4219_cast_fp16)[name = string("op_4233_cast_fp16")]; + tensor shape_457_cast_fp16 = shape(x = var_4233_cast_fp16)[name = string("shape_457_cast_fp16")]; + int32 concat_431_axis_0 = const()[name = string("concat_431_axis_0"), val = int32(0)]; + bool concat_431_interleave_0 = const()[name = string("concat_431_interleave_0"), val = bool(false)]; + int32 gather_411_cast_uint16_to_int32 = cast(dtype = gather_411_cast_uint16_to_int32_dtype_0, x = gather_411_cast_uint16)[name = string("cast_77")]; + tensor concat_431 = concat(axis = concat_431_axis_0, interleave = concat_431_interleave_0, values = (gather_409, gather_410, var_89, gather_411_cast_uint16_to_int32, gather_412))[name = string("concat_431")]; + tensor real_div_44 = real_div(x = concat_431, y = shape_457_cast_fp16)[name = string("real_div_44")]; + tensor hidden_states_671_cast_fp16 = tile(reps = real_div_44, x = var_4233_cast_fp16)[name = string("hidden_states_671_cast_fp16")]; + tensor concat_432x = const()[name = string("concat_432x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_91_cast_fp16 = reshape(shape = concat_432x, x = hidden_states_671_cast_fp16)[name = string("key_states_91_cast_fp16")]; + tensor var_4243_shape_cast_fp16 = shape(x = var_4224_cast_fp16)[name = string("op_4243_shape_cast_fp16")]; + int32 gather_413 = const()[name = string("gather_413"), val = int32(1)]; + int32 gather_414 = const()[name = string("gather_414"), val = int32(5)]; + int32 gather_415_axis_0 = const()[name = string("gather_415_axis_0"), val = int32(0)]; + int32 gather_415_batch_dims_0 = const()[name = string("gather_415_batch_dims_0"), val = int32(0)]; + bool gather_415_validate_indices_0 = const()[name = string("gather_415_validate_indices_0"), val = bool(false)]; + string var_4243_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4243_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_415_to_uint16 = const()[name = string("select_415_to_uint16"), val = uint16(2)]; + tensor var_4243_shape_cast_fp16_to_uint16 = cast(dtype = var_4243_shape_cast_fp16_to_uint16_dtype_0, x = var_4243_shape_cast_fp16)[name = string("cast_76")]; + uint16 gather_415_cast_uint16 = gather(axis = gather_415_axis_0, batch_dims = gather_415_batch_dims_0, indices = select_415_to_uint16, validate_indices = gather_415_validate_indices_0, x = var_4243_shape_cast_fp16_to_uint16)[name = string("gather_415_cast_uint16")]; + string gather_415_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_415_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_416 = const()[name = string("gather_416"), val = int32(64)]; + tensor var_4250_axes_0 = const()[name = string("op_4250_axes_0"), val = tensor([2])]; + tensor var_4250_cast_fp16 = expand_dims(axes = var_4250_axes_0, x = var_4224_cast_fp16)[name = string("op_4250_cast_fp16")]; + tensor shape_462_cast_fp16 = shape(x = var_4250_cast_fp16)[name = string("shape_462_cast_fp16")]; + int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)]; + bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)]; + int32 gather_415_cast_uint16_to_int32 = cast(dtype = gather_415_cast_uint16_to_int32_dtype_0, x = gather_415_cast_uint16)[name = string("cast_75")]; + tensor concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_413, gather_414, var_89, gather_415_cast_uint16_to_int32, gather_416))[name = string("concat_433")]; + tensor real_div_45 = real_div(x = concat_433, y = shape_462_cast_fp16)[name = string("real_div_45")]; + tensor hidden_states_675_cast_fp16 = tile(reps = real_div_45, x = var_4250_cast_fp16)[name = string("hidden_states_675_cast_fp16")]; + tensor concat_434x = const()[name = string("concat_434x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_91_cast_fp16 = reshape(shape = concat_434x, x = hidden_states_675_cast_fp16)[name = string("value_states_91_cast_fp16")]; + tensor var_4260_shape_cast_fp16 = shape(x = key_states_91_cast_fp16)[name = string("op_4260_shape_cast_fp16")]; + int32 gather_417_axis_0 = const()[name = string("gather_417_axis_0"), val = int32(0)]; + int32 gather_417_batch_dims_0 = const()[name = string("gather_417_batch_dims_0"), val = int32(0)]; + bool gather_417_validate_indices_0 = const()[name = string("gather_417_validate_indices_0"), val = bool(false)]; + string var_4260_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4260_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_417_to_uint16 = const()[name = string("select_417_to_uint16"), val = uint16(2)]; + tensor var_4260_shape_cast_fp16_to_uint16 = cast(dtype = var_4260_shape_cast_fp16_to_uint16_dtype_0, x = var_4260_shape_cast_fp16)[name = string("cast_74")]; + uint16 gather_417_cast_uint16 = gather(axis = gather_417_axis_0, batch_dims = gather_417_batch_dims_0, indices = select_417_to_uint16, validate_indices = gather_417_validate_indices_0, x = var_4260_shape_cast_fp16_to_uint16)[name = string("gather_417_cast_uint16")]; + string gather_417_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_417_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_435_values0_0 = const()[name = string("concat_435_values0_0"), val = int32(1)]; + int32 concat_435_values1_0 = const()[name = string("concat_435_values1_0"), val = int32(1)]; + int32 concat_435_values2_0 = const()[name = string("concat_435_values2_0"), val = int32(0)]; + int32 concat_435_axis_0 = const()[name = string("concat_435_axis_0"), val = int32(0)]; + bool concat_435_interleave_0 = const()[name = string("concat_435_interleave_0"), val = bool(false)]; + int32 gather_417_cast_uint16_to_int32 = cast(dtype = gather_417_cast_uint16_to_int32_dtype_0, x = gather_417_cast_uint16)[name = string("cast_73")]; + tensor concat_435 = concat(axis = concat_435_axis_0, interleave = concat_435_interleave_0, values = (concat_435_values0_0, concat_435_values1_0, concat_435_values2_0, gather_417_cast_uint16_to_int32))[name = string("concat_435")]; + tensor causal_mask_47_begin_0 = const()[name = string("causal_mask_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_47_end_mask_0 = const()[name = string("causal_mask_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_47_cast_fp16 = slice_by_index(begin = causal_mask_47_begin_0, end = concat_435, end_mask = causal_mask_47_end_mask_0, x = causal_mask)[name = string("causal_mask_47_cast_fp16")]; + tensor attn_output_89_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_47_cast_fp16, key = key_states_91_cast_fp16, query = query_states_91_cast_fp16, value = value_states_91_cast_fp16)[name = string("attn_output_89_cast_fp16")]; + tensor var_4266_perm_0 = const()[name = string("op_4266_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_436_axis_0 = const()[name = string("concat_436_axis_0"), val = int32(0)]; + bool concat_436_interleave_0 = const()[name = string("concat_436_interleave_0"), val = bool(false)]; + int32 gather_401_cast_uint16_to_int32 = cast(dtype = gather_401_cast_uint16_to_int32_dtype_0, x = gather_401_cast_uint16)[name = string("cast_72")]; + tensor concat_436 = concat(axis = concat_436_axis_0, interleave = concat_436_interleave_0, values = (gather_400, gather_401_cast_uint16_to_int32, var_85))[name = string("concat_436")]; + tensor var_4266_cast_fp16 = transpose(perm = var_4266_perm_0, x = attn_output_89_cast_fp16)[name = string("transpose_36")]; + tensor input_177_cast_fp16 = reshape(shape = concat_436, x = var_4266_cast_fp16)[name = string("input_177_cast_fp16")]; + tensor model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149174848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149635712))))[name = string("model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_157_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized, x = input_177_cast_fp16)[name = string("linear_157_cast_fp16")]; + tensor hidden_states_679_cast_fp16 = add(x = hidden_states_659_cast_fp16, y = linear_157_cast_fp16)[name = string("hidden_states_679_cast_fp16")]; + fp16 var_80_promoted_45_to_fp16 = const()[name = string("op_80_promoted_45_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4275_cast_fp16 = pow(x = hidden_states_679_cast_fp16, y = var_80_promoted_45_to_fp16)[name = string("op_4275_cast_fp16")]; + tensor variance_91_axes_0 = const()[name = string("variance_91_axes_0"), val = tensor([-1])]; + bool variance_91_keep_dims_0 = const()[name = string("variance_91_keep_dims_0"), val = bool(true)]; + tensor variance_91_cast_fp16 = reduce_mean(axes = variance_91_axes_0, keep_dims = variance_91_keep_dims_0, x = var_4275_cast_fp16)[name = string("variance_91_cast_fp16")]; + fp16 var_4278_to_fp16 = const()[name = string("op_4278_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4279_cast_fp16 = add(x = variance_91_cast_fp16, y = var_4278_to_fp16)[name = string("op_4279_cast_fp16")]; + fp32 var_4280_epsilon_0 = const()[name = string("op_4280_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4280_cast_fp16 = rsqrt(epsilon = var_4280_epsilon_0, x = var_4279_cast_fp16)[name = string("op_4280_cast_fp16")]; + tensor hidden_states_683_cast_fp16 = mul(x = hidden_states_679_cast_fp16, y = var_4280_cast_fp16)[name = string("hidden_states_683_cast_fp16")]; + tensor model_model_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149693376)))]; + tensor input_179_cast_fp16 = mul(x = model_model_layers_22_post_attention_layernorm_weight_to_fp16, y = hidden_states_683_cast_fp16)[name = string("input_179_cast_fp16")]; + tensor model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149695360))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150924224))))[name = string("model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_158_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized, x = input_179_cast_fp16)[name = string("linear_158_cast_fp16")]; + tensor var_4292_cast_fp16 = silu(x = linear_158_cast_fp16)[name = string("op_4292_cast_fp16")]; + tensor model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151077888))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152306752))))[name = string("model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_159_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized, x = input_179_cast_fp16)[name = string("linear_159_cast_fp16")]; + tensor input_183_cast_fp16 = mul(x = var_4292_cast_fp16, y = linear_159_cast_fp16)[name = string("input_183_cast_fp16")]; + tensor model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152460416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153689280))))[name = string("model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_160_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized, x = input_183_cast_fp16)[name = string("linear_160_cast_fp16")]; + tensor hidden_states_689_cast_fp16 = add(x = hidden_states_679_cast_fp16, y = linear_160_cast_fp16)[name = string("hidden_states_689_cast_fp16")]; + fp16 var_80_promoted_46_to_fp16 = const()[name = string("op_80_promoted_46_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4305_cast_fp16 = pow(x = hidden_states_689_cast_fp16, y = var_80_promoted_46_to_fp16)[name = string("op_4305_cast_fp16")]; + tensor variance_93_axes_0 = const()[name = string("variance_93_axes_0"), val = tensor([-1])]; + bool variance_93_keep_dims_0 = const()[name = string("variance_93_keep_dims_0"), val = bool(true)]; + tensor variance_93_cast_fp16 = reduce_mean(axes = variance_93_axes_0, keep_dims = variance_93_keep_dims_0, x = var_4305_cast_fp16)[name = string("variance_93_cast_fp16")]; + fp16 var_4308_to_fp16 = const()[name = string("op_4308_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4309_cast_fp16 = add(x = variance_93_cast_fp16, y = var_4308_to_fp16)[name = string("op_4309_cast_fp16")]; + fp32 var_4310_epsilon_0 = const()[name = string("op_4310_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4310_cast_fp16 = rsqrt(epsilon = var_4310_epsilon_0, x = var_4309_cast_fp16)[name = string("op_4310_cast_fp16")]; + tensor hidden_states_693_cast_fp16 = mul(x = hidden_states_689_cast_fp16, y = var_4310_cast_fp16)[name = string("hidden_states_693_cast_fp16")]; + tensor model_model_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153842944)))]; + tensor hidden_states_697_cast_fp16 = mul(x = model_model_layers_23_input_layernorm_weight_to_fp16, y = hidden_states_693_cast_fp16)[name = string("hidden_states_697_cast_fp16")]; + tensor var_4321_shape_cast_fp16 = shape(x = hidden_states_697_cast_fp16)[name = string("op_4321_shape_cast_fp16")]; + int32 gather_418 = const()[name = string("gather_418"), val = int32(1)]; + int32 gather_419_axis_0 = const()[name = string("gather_419_axis_0"), val = int32(0)]; + int32 gather_419_batch_dims_0 = const()[name = string("gather_419_batch_dims_0"), val = int32(0)]; + bool gather_419_validate_indices_0 = const()[name = string("gather_419_validate_indices_0"), val = bool(false)]; + string var_4321_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4321_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_419_to_uint16 = const()[name = string("select_419_to_uint16"), val = uint16(1)]; + tensor var_4321_shape_cast_fp16_to_uint16 = cast(dtype = var_4321_shape_cast_fp16_to_uint16_dtype_0, x = var_4321_shape_cast_fp16)[name = string("cast_71")]; + uint16 gather_419_cast_uint16 = gather(axis = gather_419_axis_0, batch_dims = gather_419_batch_dims_0, indices = select_419_to_uint16, validate_indices = gather_419_validate_indices_0, x = var_4321_shape_cast_fp16_to_uint16)[name = string("gather_419_cast_uint16")]; + string gather_419_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_419_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153844928))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154305792))))[name = string("model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_161_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_697_cast_fp16)[name = string("linear_161_cast_fp16")]; + tensor model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154363456))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154517120))))[name = string("model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_162_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_697_cast_fp16)[name = string("linear_162_cast_fp16")]; + tensor model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154536384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154690048))))[name = string("model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_697_cast_fp16)[name = string("linear_163_cast_fp16")]; + tensor concat_437x = const()[name = string("concat_437x"), val = tensor([1, -1, 15, 64])]; + tensor var_4330_cast_fp16 = reshape(shape = concat_437x, x = linear_161_cast_fp16)[name = string("op_4330_cast_fp16")]; + tensor q_47_perm_0 = const()[name = string("q_47_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_438x = const()[name = string("concat_438x"), val = tensor([1, -1, 5, 64])]; + tensor var_4333_cast_fp16 = reshape(shape = concat_438x, x = linear_162_cast_fp16)[name = string("op_4333_cast_fp16")]; + tensor k_47_perm_0 = const()[name = string("k_47_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_439x = const()[name = string("concat_439x"), val = tensor([1, -1, 5, 64])]; + tensor var_4336_cast_fp16 = reshape(shape = concat_439x, x = linear_163_cast_fp16)[name = string("op_4336_cast_fp16")]; + tensor v_state_47_perm_0 = const()[name = string("v_state_47_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_47_cast_fp16 = transpose(perm = q_47_perm_0, x = var_4330_cast_fp16)[name = string("transpose_35")]; + tensor var_4340_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4340_cast_fp16")]; + tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; + tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; + fp16 const_49_promoted_to_fp16 = const()[name = string("const_49_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4351_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_49_promoted_to_fp16)[name = string("op_4351_cast_fp16")]; + bool var_4353_interleave_0 = const()[name = string("op_4353_interleave_0"), val = bool(false)]; + tensor var_4353_cast_fp16 = concat(axis = var_85, interleave = var_4353_interleave_0, values = (var_4351_cast_fp16, x1_93_cast_fp16))[name = string("op_4353_cast_fp16")]; + tensor var_4354_cast_fp16 = mul(x = var_4353_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4354_cast_fp16")]; + tensor query_states_95_cast_fp16 = add(x = var_4340_cast_fp16, y = var_4354_cast_fp16)[name = string("query_states_95_cast_fp16")]; + tensor k_47_cast_fp16 = transpose(perm = k_47_perm_0, x = var_4333_cast_fp16)[name = string("transpose_34")]; + tensor var_4356_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4356_cast_fp16")]; + tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; + tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; + fp16 const_50_promoted_to_fp16 = const()[name = string("const_50_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4367_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_50_promoted_to_fp16)[name = string("op_4367_cast_fp16")]; + bool var_4369_interleave_0 = const()[name = string("op_4369_interleave_0"), val = bool(false)]; + tensor var_4369_cast_fp16 = concat(axis = var_85, interleave = var_4369_interleave_0, values = (var_4367_cast_fp16, x1_95_cast_fp16))[name = string("op_4369_cast_fp16")]; + tensor var_4370_cast_fp16 = mul(x = var_4369_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4370_cast_fp16")]; + tensor k_state_47_cast_fp16 = add(x = var_4356_cast_fp16, y = var_4370_cast_fp16)[name = string("k_state_47_cast_fp16")]; + tensor expand_dims_276 = const()[name = string("expand_dims_276"), val = tensor([0])]; + tensor expand_dims_277 = const()[name = string("expand_dims_277"), val = tensor([0])]; + tensor expand_dims_279 = const()[name = string("expand_dims_279"), val = tensor([0])]; + tensor concat_442_values0_0 = const()[name = string("concat_442_values0_0"), val = tensor([23])]; + int32 concat_442_axis_0 = const()[name = string("concat_442_axis_0"), val = int32(0)]; + bool concat_442_interleave_0 = const()[name = string("concat_442_interleave_0"), val = bool(false)]; + tensor concat_442 = concat(axis = concat_442_axis_0, interleave = concat_442_interleave_0, values = (concat_442_values0_0, expand_dims_276, expand_dims_277, expand_dims_2, expand_dims_279))[name = string("concat_442")]; + tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_442, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = k_state_47_cast_fp16, x = coreml_update_state_108)[name = string("key_cache_internal_tensor_assign_24_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_24_cast_fp16, input = key_cache)[name = string("coreml_update_state_110_write_state")]; + tensor coreml_update_state_110 = read_state(input = key_cache)[name = string("coreml_update_state_110")]; + tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_47_cast_fp16 = transpose(perm = v_state_47_perm_0, x = var_4336_cast_fp16)[name = string("transpose_33")]; + tensor value_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_442, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = v_state_47_cast_fp16, x = coreml_update_state_109)[name = string("value_cache_internal_tensor_assign_24_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_24_cast_fp16, input = value_cache)[name = string("coreml_update_state_111_write_state")]; + tensor coreml_update_state_111 = read_state(input = value_cache)[name = string("coreml_update_state_111")]; + tensor var_4393_begin_0 = const()[name = string("op_4393_begin_0"), val = tensor([23, 0, 0, 0, 0])]; + tensor var_4393_end_0 = const()[name = string("op_4393_end_0"), val = tensor([24, 1, 5, 2048, 64])]; + tensor var_4393_end_mask_0 = const()[name = string("op_4393_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4393_squeeze_mask_0 = const()[name = string("op_4393_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4393_cast_fp16 = slice_by_index(begin = var_4393_begin_0, end = var_4393_end_0, end_mask = var_4393_end_mask_0, squeeze_mask = var_4393_squeeze_mask_0, x = coreml_update_state_110)[name = string("op_4393_cast_fp16")]; + tensor var_4396_begin_0 = const()[name = string("op_4396_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4396_end_mask_0 = const()[name = string("op_4396_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4396_cast_fp16 = slice_by_index(begin = var_4396_begin_0, end = concat_11, end_mask = var_4396_end_mask_0, x = var_4393_cast_fp16)[name = string("op_4396_cast_fp16")]; + tensor var_4398_begin_0 = const()[name = string("op_4398_begin_0"), val = tensor([23, 0, 0, 0, 0])]; + tensor var_4398_end_0 = const()[name = string("op_4398_end_0"), val = tensor([24, 1, 5, 2048, 64])]; + tensor var_4398_end_mask_0 = const()[name = string("op_4398_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4398_squeeze_mask_0 = const()[name = string("op_4398_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4398_cast_fp16 = slice_by_index(begin = var_4398_begin_0, end = var_4398_end_0, end_mask = var_4398_end_mask_0, squeeze_mask = var_4398_squeeze_mask_0, x = coreml_update_state_111)[name = string("op_4398_cast_fp16")]; + tensor var_4401_begin_0 = const()[name = string("op_4401_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4401_end_mask_0 = const()[name = string("op_4401_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4401_cast_fp16 = slice_by_index(begin = var_4401_begin_0, end = concat_11, end_mask = var_4401_end_mask_0, x = var_4398_cast_fp16)[name = string("op_4401_cast_fp16")]; + tensor var_4403_shape_cast_fp16 = shape(x = var_4396_cast_fp16)[name = string("op_4403_shape_cast_fp16")]; + int32 gather_427 = const()[name = string("gather_427"), val = int32(1)]; + int32 gather_428 = const()[name = string("gather_428"), val = int32(5)]; + int32 gather_429_axis_0 = const()[name = string("gather_429_axis_0"), val = int32(0)]; + int32 gather_429_batch_dims_0 = const()[name = string("gather_429_batch_dims_0"), val = int32(0)]; + bool gather_429_validate_indices_0 = const()[name = string("gather_429_validate_indices_0"), val = bool(false)]; + string var_4403_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4403_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_429_to_uint16 = const()[name = string("select_429_to_uint16"), val = uint16(2)]; + tensor var_4403_shape_cast_fp16_to_uint16 = cast(dtype = var_4403_shape_cast_fp16_to_uint16_dtype_0, x = var_4403_shape_cast_fp16)[name = string("cast_70")]; + uint16 gather_429_cast_uint16 = gather(axis = gather_429_axis_0, batch_dims = gather_429_batch_dims_0, indices = select_429_to_uint16, validate_indices = gather_429_validate_indices_0, x = var_4403_shape_cast_fp16_to_uint16)[name = string("gather_429_cast_uint16")]; + string gather_429_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_429_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_430 = const()[name = string("gather_430"), val = int32(64)]; + tensor var_4410_axes_0 = const()[name = string("op_4410_axes_0"), val = tensor([2])]; + tensor var_4410_cast_fp16 = expand_dims(axes = var_4410_axes_0, x = var_4396_cast_fp16)[name = string("op_4410_cast_fp16")]; + tensor shape_477_cast_fp16 = shape(x = var_4410_cast_fp16)[name = string("shape_477_cast_fp16")]; + int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)]; + bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)]; + int32 gather_429_cast_uint16_to_int32 = cast(dtype = gather_429_cast_uint16_to_int32_dtype_0, x = gather_429_cast_uint16)[name = string("cast_69")]; + tensor concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (gather_427, gather_428, var_89, gather_429_cast_uint16_to_int32, gather_430))[name = string("concat_450")]; + tensor real_div_46 = real_div(x = concat_450, y = shape_477_cast_fp16)[name = string("real_div_46")]; + tensor hidden_states_701_cast_fp16 = tile(reps = real_div_46, x = var_4410_cast_fp16)[name = string("hidden_states_701_cast_fp16")]; + tensor concat_451x = const()[name = string("concat_451x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_95_cast_fp16 = reshape(shape = concat_451x, x = hidden_states_701_cast_fp16)[name = string("key_states_95_cast_fp16")]; + tensor var_4420_shape_cast_fp16 = shape(x = var_4401_cast_fp16)[name = string("op_4420_shape_cast_fp16")]; + int32 gather_431 = const()[name = string("gather_431"), val = int32(1)]; + int32 gather_432 = const()[name = string("gather_432"), val = int32(5)]; + int32 gather_433_axis_0 = const()[name = string("gather_433_axis_0"), val = int32(0)]; + int32 gather_433_batch_dims_0 = const()[name = string("gather_433_batch_dims_0"), val = int32(0)]; + bool gather_433_validate_indices_0 = const()[name = string("gather_433_validate_indices_0"), val = bool(false)]; + string var_4420_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4420_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_433_to_uint16 = const()[name = string("select_433_to_uint16"), val = uint16(2)]; + tensor var_4420_shape_cast_fp16_to_uint16 = cast(dtype = var_4420_shape_cast_fp16_to_uint16_dtype_0, x = var_4420_shape_cast_fp16)[name = string("cast_68")]; + uint16 gather_433_cast_uint16 = gather(axis = gather_433_axis_0, batch_dims = gather_433_batch_dims_0, indices = select_433_to_uint16, validate_indices = gather_433_validate_indices_0, x = var_4420_shape_cast_fp16_to_uint16)[name = string("gather_433_cast_uint16")]; + string gather_433_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_433_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_434 = const()[name = string("gather_434"), val = int32(64)]; + tensor var_4427_axes_0 = const()[name = string("op_4427_axes_0"), val = tensor([2])]; + tensor var_4427_cast_fp16 = expand_dims(axes = var_4427_axes_0, x = var_4401_cast_fp16)[name = string("op_4427_cast_fp16")]; + tensor shape_482_cast_fp16 = shape(x = var_4427_cast_fp16)[name = string("shape_482_cast_fp16")]; + int32 concat_452_axis_0 = const()[name = string("concat_452_axis_0"), val = int32(0)]; + bool concat_452_interleave_0 = const()[name = string("concat_452_interleave_0"), val = bool(false)]; + int32 gather_433_cast_uint16_to_int32 = cast(dtype = gather_433_cast_uint16_to_int32_dtype_0, x = gather_433_cast_uint16)[name = string("cast_67")]; + tensor concat_452 = concat(axis = concat_452_axis_0, interleave = concat_452_interleave_0, values = (gather_431, gather_432, var_89, gather_433_cast_uint16_to_int32, gather_434))[name = string("concat_452")]; + tensor real_div_47 = real_div(x = concat_452, y = shape_482_cast_fp16)[name = string("real_div_47")]; + tensor hidden_states_705_cast_fp16 = tile(reps = real_div_47, x = var_4427_cast_fp16)[name = string("hidden_states_705_cast_fp16")]; + tensor concat_453x = const()[name = string("concat_453x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_95_cast_fp16 = reshape(shape = concat_453x, x = hidden_states_705_cast_fp16)[name = string("value_states_95_cast_fp16")]; + tensor var_4437_shape_cast_fp16 = shape(x = key_states_95_cast_fp16)[name = string("op_4437_shape_cast_fp16")]; + int32 gather_435_axis_0 = const()[name = string("gather_435_axis_0"), val = int32(0)]; + int32 gather_435_batch_dims_0 = const()[name = string("gather_435_batch_dims_0"), val = int32(0)]; + bool gather_435_validate_indices_0 = const()[name = string("gather_435_validate_indices_0"), val = bool(false)]; + string var_4437_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4437_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_435_to_uint16 = const()[name = string("select_435_to_uint16"), val = uint16(2)]; + tensor var_4437_shape_cast_fp16_to_uint16 = cast(dtype = var_4437_shape_cast_fp16_to_uint16_dtype_0, x = var_4437_shape_cast_fp16)[name = string("cast_66")]; + uint16 gather_435_cast_uint16 = gather(axis = gather_435_axis_0, batch_dims = gather_435_batch_dims_0, indices = select_435_to_uint16, validate_indices = gather_435_validate_indices_0, x = var_4437_shape_cast_fp16_to_uint16)[name = string("gather_435_cast_uint16")]; + string gather_435_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_435_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_454_values0_0 = const()[name = string("concat_454_values0_0"), val = int32(1)]; + int32 concat_454_values1_0 = const()[name = string("concat_454_values1_0"), val = int32(1)]; + int32 concat_454_values2_0 = const()[name = string("concat_454_values2_0"), val = int32(0)]; + int32 concat_454_axis_0 = const()[name = string("concat_454_axis_0"), val = int32(0)]; + bool concat_454_interleave_0 = const()[name = string("concat_454_interleave_0"), val = bool(false)]; + int32 gather_435_cast_uint16_to_int32 = cast(dtype = gather_435_cast_uint16_to_int32_dtype_0, x = gather_435_cast_uint16)[name = string("cast_65")]; + tensor concat_454 = concat(axis = concat_454_axis_0, interleave = concat_454_interleave_0, values = (concat_454_values0_0, concat_454_values1_0, concat_454_values2_0, gather_435_cast_uint16_to_int32))[name = string("concat_454")]; + tensor causal_mask_49_begin_0 = const()[name = string("causal_mask_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_49_end_mask_0 = const()[name = string("causal_mask_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_49_cast_fp16 = slice_by_index(begin = causal_mask_49_begin_0, end = concat_454, end_mask = causal_mask_49_end_mask_0, x = causal_mask)[name = string("causal_mask_49_cast_fp16")]; + tensor attn_output_93_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_49_cast_fp16, key = key_states_95_cast_fp16, query = query_states_95_cast_fp16, value = value_states_95_cast_fp16)[name = string("attn_output_93_cast_fp16")]; + tensor var_4443_perm_0 = const()[name = string("op_4443_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_455_axis_0 = const()[name = string("concat_455_axis_0"), val = int32(0)]; + bool concat_455_interleave_0 = const()[name = string("concat_455_interleave_0"), val = bool(false)]; + int32 gather_419_cast_uint16_to_int32 = cast(dtype = gather_419_cast_uint16_to_int32_dtype_0, x = gather_419_cast_uint16)[name = string("cast_64")]; + tensor concat_455 = concat(axis = concat_455_axis_0, interleave = concat_455_interleave_0, values = (gather_418, gather_419_cast_uint16_to_int32, var_85))[name = string("concat_455")]; + tensor var_4443_cast_fp16 = transpose(perm = var_4443_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_32")]; + tensor input_185_cast_fp16 = reshape(shape = concat_455, x = var_4443_cast_fp16)[name = string("input_185_cast_fp16")]; + tensor model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154709312))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155170176))))[name = string("model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_164_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized, x = input_185_cast_fp16)[name = string("linear_164_cast_fp16")]; + tensor hidden_states_709_cast_fp16 = add(x = hidden_states_689_cast_fp16, y = linear_164_cast_fp16)[name = string("hidden_states_709_cast_fp16")]; + fp16 var_80_promoted_47_to_fp16 = const()[name = string("op_80_promoted_47_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4452_cast_fp16 = pow(x = hidden_states_709_cast_fp16, y = var_80_promoted_47_to_fp16)[name = string("op_4452_cast_fp16")]; + tensor variance_95_axes_0 = const()[name = string("variance_95_axes_0"), val = tensor([-1])]; + bool variance_95_keep_dims_0 = const()[name = string("variance_95_keep_dims_0"), val = bool(true)]; + tensor variance_95_cast_fp16 = reduce_mean(axes = variance_95_axes_0, keep_dims = variance_95_keep_dims_0, x = var_4452_cast_fp16)[name = string("variance_95_cast_fp16")]; + fp16 var_4455_to_fp16 = const()[name = string("op_4455_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4456_cast_fp16 = add(x = variance_95_cast_fp16, y = var_4455_to_fp16)[name = string("op_4456_cast_fp16")]; + fp32 var_4457_epsilon_0 = const()[name = string("op_4457_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4457_cast_fp16 = rsqrt(epsilon = var_4457_epsilon_0, x = var_4456_cast_fp16)[name = string("op_4457_cast_fp16")]; + tensor hidden_states_713_cast_fp16 = mul(x = hidden_states_709_cast_fp16, y = var_4457_cast_fp16)[name = string("hidden_states_713_cast_fp16")]; + tensor model_model_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155227840)))]; + tensor input_187_cast_fp16 = mul(x = model_model_layers_23_post_attention_layernorm_weight_to_fp16, y = hidden_states_713_cast_fp16)[name = string("input_187_cast_fp16")]; + tensor model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155229824))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156458688))))[name = string("model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_165_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized, x = input_187_cast_fp16)[name = string("linear_165_cast_fp16")]; + tensor var_4469_cast_fp16 = silu(x = linear_165_cast_fp16)[name = string("op_4469_cast_fp16")]; + tensor model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156612352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157841216))))[name = string("model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_166_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized, x = input_187_cast_fp16)[name = string("linear_166_cast_fp16")]; + tensor input_191_cast_fp16 = mul(x = var_4469_cast_fp16, y = linear_166_cast_fp16)[name = string("input_191_cast_fp16")]; + tensor model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157994880))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159223744))))[name = string("model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_167_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized, x = input_191_cast_fp16)[name = string("linear_167_cast_fp16")]; + tensor hidden_states_719_cast_fp16 = add(x = hidden_states_709_cast_fp16, y = linear_167_cast_fp16)[name = string("hidden_states_719_cast_fp16")]; + fp16 var_80_promoted_48_to_fp16 = const()[name = string("op_80_promoted_48_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4482_cast_fp16 = pow(x = hidden_states_719_cast_fp16, y = var_80_promoted_48_to_fp16)[name = string("op_4482_cast_fp16")]; + tensor variance_97_axes_0 = const()[name = string("variance_97_axes_0"), val = tensor([-1])]; + bool variance_97_keep_dims_0 = const()[name = string("variance_97_keep_dims_0"), val = bool(true)]; + tensor variance_97_cast_fp16 = reduce_mean(axes = variance_97_axes_0, keep_dims = variance_97_keep_dims_0, x = var_4482_cast_fp16)[name = string("variance_97_cast_fp16")]; + fp16 var_4485_to_fp16 = const()[name = string("op_4485_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4486_cast_fp16 = add(x = variance_97_cast_fp16, y = var_4485_to_fp16)[name = string("op_4486_cast_fp16")]; + fp32 var_4487_epsilon_0 = const()[name = string("op_4487_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4487_cast_fp16 = rsqrt(epsilon = var_4487_epsilon_0, x = var_4486_cast_fp16)[name = string("op_4487_cast_fp16")]; + tensor hidden_states_723_cast_fp16 = mul(x = hidden_states_719_cast_fp16, y = var_4487_cast_fp16)[name = string("hidden_states_723_cast_fp16")]; + tensor model_model_layers_24_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159377408)))]; + tensor hidden_states_727_cast_fp16 = mul(x = model_model_layers_24_input_layernorm_weight_to_fp16, y = hidden_states_723_cast_fp16)[name = string("hidden_states_727_cast_fp16")]; + tensor var_4498_shape_cast_fp16 = shape(x = hidden_states_727_cast_fp16)[name = string("op_4498_shape_cast_fp16")]; + int32 gather_436 = const()[name = string("gather_436"), val = int32(1)]; + int32 gather_437_axis_0 = const()[name = string("gather_437_axis_0"), val = int32(0)]; + int32 gather_437_batch_dims_0 = const()[name = string("gather_437_batch_dims_0"), val = int32(0)]; + bool gather_437_validate_indices_0 = const()[name = string("gather_437_validate_indices_0"), val = bool(false)]; + string var_4498_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4498_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_437_to_uint16 = const()[name = string("select_437_to_uint16"), val = uint16(1)]; + tensor var_4498_shape_cast_fp16_to_uint16 = cast(dtype = var_4498_shape_cast_fp16_to_uint16_dtype_0, x = var_4498_shape_cast_fp16)[name = string("cast_63")]; + uint16 gather_437_cast_uint16 = gather(axis = gather_437_axis_0, batch_dims = gather_437_batch_dims_0, indices = select_437_to_uint16, validate_indices = gather_437_validate_indices_0, x = var_4498_shape_cast_fp16_to_uint16)[name = string("gather_437_cast_uint16")]; + string gather_437_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_437_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_24_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159379392))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159840256))))[name = string("model_model_layers_24_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_168_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_24_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_727_cast_fp16)[name = string("linear_168_cast_fp16")]; + tensor model_model_layers_24_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159897920))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160051584))))[name = string("model_model_layers_24_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_24_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_727_cast_fp16)[name = string("linear_169_cast_fp16")]; + tensor model_model_layers_24_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160070848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160224512))))[name = string("model_model_layers_24_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_24_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_727_cast_fp16)[name = string("linear_170_cast_fp16")]; + tensor concat_456x = const()[name = string("concat_456x"), val = tensor([1, -1, 15, 64])]; + tensor var_4507_cast_fp16 = reshape(shape = concat_456x, x = linear_168_cast_fp16)[name = string("op_4507_cast_fp16")]; + tensor q_49_perm_0 = const()[name = string("q_49_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_457x = const()[name = string("concat_457x"), val = tensor([1, -1, 5, 64])]; + tensor var_4510_cast_fp16 = reshape(shape = concat_457x, x = linear_169_cast_fp16)[name = string("op_4510_cast_fp16")]; + tensor k_49_perm_0 = const()[name = string("k_49_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_458x = const()[name = string("concat_458x"), val = tensor([1, -1, 5, 64])]; + tensor var_4513_cast_fp16 = reshape(shape = concat_458x, x = linear_170_cast_fp16)[name = string("op_4513_cast_fp16")]; + tensor v_state_49_perm_0 = const()[name = string("v_state_49_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_49_cast_fp16 = transpose(perm = q_49_perm_0, x = var_4507_cast_fp16)[name = string("transpose_31")]; + tensor var_4517_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4517_cast_fp16")]; + tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; + tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; + fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4528_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_4528_cast_fp16")]; + bool var_4530_interleave_0 = const()[name = string("op_4530_interleave_0"), val = bool(false)]; + tensor var_4530_cast_fp16 = concat(axis = var_85, interleave = var_4530_interleave_0, values = (var_4528_cast_fp16, x1_97_cast_fp16))[name = string("op_4530_cast_fp16")]; + tensor var_4531_cast_fp16 = mul(x = var_4530_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4531_cast_fp16")]; + tensor query_states_99_cast_fp16 = add(x = var_4517_cast_fp16, y = var_4531_cast_fp16)[name = string("query_states_99_cast_fp16")]; + tensor k_49_cast_fp16 = transpose(perm = k_49_perm_0, x = var_4510_cast_fp16)[name = string("transpose_30")]; + tensor var_4533_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4533_cast_fp16")]; + tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; + tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; + fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4544_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_4544_cast_fp16")]; + bool var_4546_interleave_0 = const()[name = string("op_4546_interleave_0"), val = bool(false)]; + tensor var_4546_cast_fp16 = concat(axis = var_85, interleave = var_4546_interleave_0, values = (var_4544_cast_fp16, x1_99_cast_fp16))[name = string("op_4546_cast_fp16")]; + tensor var_4547_cast_fp16 = mul(x = var_4546_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4547_cast_fp16")]; + tensor k_state_49_cast_fp16 = add(x = var_4533_cast_fp16, y = var_4547_cast_fp16)[name = string("k_state_49_cast_fp16")]; + tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([0])]; + tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; + tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; + tensor concat_461_values0_0 = const()[name = string("concat_461_values0_0"), val = tensor([24])]; + int32 concat_461_axis_0 = const()[name = string("concat_461_axis_0"), val = int32(0)]; + bool concat_461_interleave_0 = const()[name = string("concat_461_interleave_0"), val = bool(false)]; + tensor concat_461 = concat(axis = concat_461_axis_0, interleave = concat_461_interleave_0, values = (concat_461_values0_0, expand_dims_288, expand_dims_289, expand_dims_2, expand_dims_291))[name = string("concat_461")]; + tensor key_cache_internal_tensor_assign_25_stride_0 = const()[name = string("key_cache_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_25_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_25_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_461, begin_mask = key_cache_internal_tensor_assign_25_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_25_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_25_squeeze_mask_0, stride = key_cache_internal_tensor_assign_25_stride_0, update = k_state_49_cast_fp16, x = coreml_update_state_110)[name = string("key_cache_internal_tensor_assign_25_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_25_cast_fp16, input = key_cache)[name = string("coreml_update_state_112_write_state")]; + tensor coreml_update_state_112 = read_state(input = key_cache)[name = string("coreml_update_state_112")]; + tensor value_cache_internal_tensor_assign_25_stride_0 = const()[name = string("value_cache_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_25_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_25_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_49_cast_fp16 = transpose(perm = v_state_49_perm_0, x = var_4513_cast_fp16)[name = string("transpose_29")]; + tensor value_cache_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_461, begin_mask = value_cache_internal_tensor_assign_25_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_25_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_25_squeeze_mask_0, stride = value_cache_internal_tensor_assign_25_stride_0, update = v_state_49_cast_fp16, x = coreml_update_state_111)[name = string("value_cache_internal_tensor_assign_25_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_25_cast_fp16, input = value_cache)[name = string("coreml_update_state_113_write_state")]; + tensor coreml_update_state_113 = read_state(input = value_cache)[name = string("coreml_update_state_113")]; + tensor var_4570_begin_0 = const()[name = string("op_4570_begin_0"), val = tensor([24, 0, 0, 0, 0])]; + tensor var_4570_end_0 = const()[name = string("op_4570_end_0"), val = tensor([25, 1, 5, 2048, 64])]; + tensor var_4570_end_mask_0 = const()[name = string("op_4570_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4570_squeeze_mask_0 = const()[name = string("op_4570_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4570_cast_fp16 = slice_by_index(begin = var_4570_begin_0, end = var_4570_end_0, end_mask = var_4570_end_mask_0, squeeze_mask = var_4570_squeeze_mask_0, x = coreml_update_state_112)[name = string("op_4570_cast_fp16")]; + tensor var_4573_begin_0 = const()[name = string("op_4573_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4573_end_mask_0 = const()[name = string("op_4573_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4573_cast_fp16 = slice_by_index(begin = var_4573_begin_0, end = concat_11, end_mask = var_4573_end_mask_0, x = var_4570_cast_fp16)[name = string("op_4573_cast_fp16")]; + tensor var_4575_begin_0 = const()[name = string("op_4575_begin_0"), val = tensor([24, 0, 0, 0, 0])]; + tensor var_4575_end_0 = const()[name = string("op_4575_end_0"), val = tensor([25, 1, 5, 2048, 64])]; + tensor var_4575_end_mask_0 = const()[name = string("op_4575_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4575_squeeze_mask_0 = const()[name = string("op_4575_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4575_cast_fp16 = slice_by_index(begin = var_4575_begin_0, end = var_4575_end_0, end_mask = var_4575_end_mask_0, squeeze_mask = var_4575_squeeze_mask_0, x = coreml_update_state_113)[name = string("op_4575_cast_fp16")]; + tensor var_4578_begin_0 = const()[name = string("op_4578_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4578_end_mask_0 = const()[name = string("op_4578_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4578_cast_fp16 = slice_by_index(begin = var_4578_begin_0, end = concat_11, end_mask = var_4578_end_mask_0, x = var_4575_cast_fp16)[name = string("op_4578_cast_fp16")]; + tensor var_4580_shape_cast_fp16 = shape(x = var_4573_cast_fp16)[name = string("op_4580_shape_cast_fp16")]; + int32 gather_445 = const()[name = string("gather_445"), val = int32(1)]; + int32 gather_446 = const()[name = string("gather_446"), val = int32(5)]; + int32 gather_447_axis_0 = const()[name = string("gather_447_axis_0"), val = int32(0)]; + int32 gather_447_batch_dims_0 = const()[name = string("gather_447_batch_dims_0"), val = int32(0)]; + bool gather_447_validate_indices_0 = const()[name = string("gather_447_validate_indices_0"), val = bool(false)]; + string var_4580_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4580_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_447_to_uint16 = const()[name = string("select_447_to_uint16"), val = uint16(2)]; + tensor var_4580_shape_cast_fp16_to_uint16 = cast(dtype = var_4580_shape_cast_fp16_to_uint16_dtype_0, x = var_4580_shape_cast_fp16)[name = string("cast_62")]; + uint16 gather_447_cast_uint16 = gather(axis = gather_447_axis_0, batch_dims = gather_447_batch_dims_0, indices = select_447_to_uint16, validate_indices = gather_447_validate_indices_0, x = var_4580_shape_cast_fp16_to_uint16)[name = string("gather_447_cast_uint16")]; + string gather_447_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_447_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_448 = const()[name = string("gather_448"), val = int32(64)]; + tensor var_4587_axes_0 = const()[name = string("op_4587_axes_0"), val = tensor([2])]; + tensor var_4587_cast_fp16 = expand_dims(axes = var_4587_axes_0, x = var_4573_cast_fp16)[name = string("op_4587_cast_fp16")]; + tensor shape_497_cast_fp16 = shape(x = var_4587_cast_fp16)[name = string("shape_497_cast_fp16")]; + int32 concat_469_axis_0 = const()[name = string("concat_469_axis_0"), val = int32(0)]; + bool concat_469_interleave_0 = const()[name = string("concat_469_interleave_0"), val = bool(false)]; + int32 gather_447_cast_uint16_to_int32 = cast(dtype = gather_447_cast_uint16_to_int32_dtype_0, x = gather_447_cast_uint16)[name = string("cast_61")]; + tensor concat_469 = concat(axis = concat_469_axis_0, interleave = concat_469_interleave_0, values = (gather_445, gather_446, var_89, gather_447_cast_uint16_to_int32, gather_448))[name = string("concat_469")]; + tensor real_div_48 = real_div(x = concat_469, y = shape_497_cast_fp16)[name = string("real_div_48")]; + tensor hidden_states_731_cast_fp16 = tile(reps = real_div_48, x = var_4587_cast_fp16)[name = string("hidden_states_731_cast_fp16")]; + tensor concat_470x = const()[name = string("concat_470x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_99_cast_fp16 = reshape(shape = concat_470x, x = hidden_states_731_cast_fp16)[name = string("key_states_99_cast_fp16")]; + tensor var_4597_shape_cast_fp16 = shape(x = var_4578_cast_fp16)[name = string("op_4597_shape_cast_fp16")]; + int32 gather_449 = const()[name = string("gather_449"), val = int32(1)]; + int32 gather_450 = const()[name = string("gather_450"), val = int32(5)]; + int32 gather_451_axis_0 = const()[name = string("gather_451_axis_0"), val = int32(0)]; + int32 gather_451_batch_dims_0 = const()[name = string("gather_451_batch_dims_0"), val = int32(0)]; + bool gather_451_validate_indices_0 = const()[name = string("gather_451_validate_indices_0"), val = bool(false)]; + string var_4597_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4597_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_451_to_uint16 = const()[name = string("select_451_to_uint16"), val = uint16(2)]; + tensor var_4597_shape_cast_fp16_to_uint16 = cast(dtype = var_4597_shape_cast_fp16_to_uint16_dtype_0, x = var_4597_shape_cast_fp16)[name = string("cast_60")]; + uint16 gather_451_cast_uint16 = gather(axis = gather_451_axis_0, batch_dims = gather_451_batch_dims_0, indices = select_451_to_uint16, validate_indices = gather_451_validate_indices_0, x = var_4597_shape_cast_fp16_to_uint16)[name = string("gather_451_cast_uint16")]; + string gather_451_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_451_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_452 = const()[name = string("gather_452"), val = int32(64)]; + tensor var_4604_axes_0 = const()[name = string("op_4604_axes_0"), val = tensor([2])]; + tensor var_4604_cast_fp16 = expand_dims(axes = var_4604_axes_0, x = var_4578_cast_fp16)[name = string("op_4604_cast_fp16")]; + tensor shape_502_cast_fp16 = shape(x = var_4604_cast_fp16)[name = string("shape_502_cast_fp16")]; + int32 concat_471_axis_0 = const()[name = string("concat_471_axis_0"), val = int32(0)]; + bool concat_471_interleave_0 = const()[name = string("concat_471_interleave_0"), val = bool(false)]; + int32 gather_451_cast_uint16_to_int32 = cast(dtype = gather_451_cast_uint16_to_int32_dtype_0, x = gather_451_cast_uint16)[name = string("cast_59")]; + tensor concat_471 = concat(axis = concat_471_axis_0, interleave = concat_471_interleave_0, values = (gather_449, gather_450, var_89, gather_451_cast_uint16_to_int32, gather_452))[name = string("concat_471")]; + tensor real_div_49 = real_div(x = concat_471, y = shape_502_cast_fp16)[name = string("real_div_49")]; + tensor hidden_states_735_cast_fp16 = tile(reps = real_div_49, x = var_4604_cast_fp16)[name = string("hidden_states_735_cast_fp16")]; + tensor concat_472x = const()[name = string("concat_472x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_99_cast_fp16 = reshape(shape = concat_472x, x = hidden_states_735_cast_fp16)[name = string("value_states_99_cast_fp16")]; + tensor var_4614_shape_cast_fp16 = shape(x = key_states_99_cast_fp16)[name = string("op_4614_shape_cast_fp16")]; + int32 gather_453_axis_0 = const()[name = string("gather_453_axis_0"), val = int32(0)]; + int32 gather_453_batch_dims_0 = const()[name = string("gather_453_batch_dims_0"), val = int32(0)]; + bool gather_453_validate_indices_0 = const()[name = string("gather_453_validate_indices_0"), val = bool(false)]; + string var_4614_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4614_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_453_to_uint16 = const()[name = string("select_453_to_uint16"), val = uint16(2)]; + tensor var_4614_shape_cast_fp16_to_uint16 = cast(dtype = var_4614_shape_cast_fp16_to_uint16_dtype_0, x = var_4614_shape_cast_fp16)[name = string("cast_58")]; + uint16 gather_453_cast_uint16 = gather(axis = gather_453_axis_0, batch_dims = gather_453_batch_dims_0, indices = select_453_to_uint16, validate_indices = gather_453_validate_indices_0, x = var_4614_shape_cast_fp16_to_uint16)[name = string("gather_453_cast_uint16")]; + string gather_453_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_453_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_473_values0_0 = const()[name = string("concat_473_values0_0"), val = int32(1)]; + int32 concat_473_values1_0 = const()[name = string("concat_473_values1_0"), val = int32(1)]; + int32 concat_473_values2_0 = const()[name = string("concat_473_values2_0"), val = int32(0)]; + int32 concat_473_axis_0 = const()[name = string("concat_473_axis_0"), val = int32(0)]; + bool concat_473_interleave_0 = const()[name = string("concat_473_interleave_0"), val = bool(false)]; + int32 gather_453_cast_uint16_to_int32 = cast(dtype = gather_453_cast_uint16_to_int32_dtype_0, x = gather_453_cast_uint16)[name = string("cast_57")]; + tensor concat_473 = concat(axis = concat_473_axis_0, interleave = concat_473_interleave_0, values = (concat_473_values0_0, concat_473_values1_0, concat_473_values2_0, gather_453_cast_uint16_to_int32))[name = string("concat_473")]; + tensor causal_mask_51_begin_0 = const()[name = string("causal_mask_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_51_end_mask_0 = const()[name = string("causal_mask_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_51_cast_fp16 = slice_by_index(begin = causal_mask_51_begin_0, end = concat_473, end_mask = causal_mask_51_end_mask_0, x = causal_mask)[name = string("causal_mask_51_cast_fp16")]; + tensor attn_output_97_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_51_cast_fp16, key = key_states_99_cast_fp16, query = query_states_99_cast_fp16, value = value_states_99_cast_fp16)[name = string("attn_output_97_cast_fp16")]; + tensor var_4620_perm_0 = const()[name = string("op_4620_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_474_axis_0 = const()[name = string("concat_474_axis_0"), val = int32(0)]; + bool concat_474_interleave_0 = const()[name = string("concat_474_interleave_0"), val = bool(false)]; + int32 gather_437_cast_uint16_to_int32 = cast(dtype = gather_437_cast_uint16_to_int32_dtype_0, x = gather_437_cast_uint16)[name = string("cast_56")]; + tensor concat_474 = concat(axis = concat_474_axis_0, interleave = concat_474_interleave_0, values = (gather_436, gather_437_cast_uint16_to_int32, var_85))[name = string("concat_474")]; + tensor var_4620_cast_fp16 = transpose(perm = var_4620_perm_0, x = attn_output_97_cast_fp16)[name = string("transpose_28")]; + tensor input_193_cast_fp16 = reshape(shape = concat_474, x = var_4620_cast_fp16)[name = string("input_193_cast_fp16")]; + tensor model_model_layers_24_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160243776))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160704640))))[name = string("model_model_layers_24_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_171_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_24_self_attn_o_proj_weight_to_fp16_quantized, x = input_193_cast_fp16)[name = string("linear_171_cast_fp16")]; + tensor hidden_states_739_cast_fp16 = add(x = hidden_states_719_cast_fp16, y = linear_171_cast_fp16)[name = string("hidden_states_739_cast_fp16")]; + fp16 var_80_promoted_49_to_fp16 = const()[name = string("op_80_promoted_49_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4629_cast_fp16 = pow(x = hidden_states_739_cast_fp16, y = var_80_promoted_49_to_fp16)[name = string("op_4629_cast_fp16")]; + tensor variance_99_axes_0 = const()[name = string("variance_99_axes_0"), val = tensor([-1])]; + bool variance_99_keep_dims_0 = const()[name = string("variance_99_keep_dims_0"), val = bool(true)]; + tensor variance_99_cast_fp16 = reduce_mean(axes = variance_99_axes_0, keep_dims = variance_99_keep_dims_0, x = var_4629_cast_fp16)[name = string("variance_99_cast_fp16")]; + fp16 var_4632_to_fp16 = const()[name = string("op_4632_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4633_cast_fp16 = add(x = variance_99_cast_fp16, y = var_4632_to_fp16)[name = string("op_4633_cast_fp16")]; + fp32 var_4634_epsilon_0 = const()[name = string("op_4634_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4634_cast_fp16 = rsqrt(epsilon = var_4634_epsilon_0, x = var_4633_cast_fp16)[name = string("op_4634_cast_fp16")]; + tensor hidden_states_743_cast_fp16 = mul(x = hidden_states_739_cast_fp16, y = var_4634_cast_fp16)[name = string("hidden_states_743_cast_fp16")]; + tensor model_model_layers_24_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160762304)))]; + tensor input_195_cast_fp16 = mul(x = model_model_layers_24_post_attention_layernorm_weight_to_fp16, y = hidden_states_743_cast_fp16)[name = string("input_195_cast_fp16")]; + tensor model_model_layers_24_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160764288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161993152))))[name = string("model_model_layers_24_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_172_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_24_mlp_gate_proj_weight_to_fp16_quantized, x = input_195_cast_fp16)[name = string("linear_172_cast_fp16")]; + tensor var_4646_cast_fp16 = silu(x = linear_172_cast_fp16)[name = string("op_4646_cast_fp16")]; + tensor model_model_layers_24_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162146816))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163375680))))[name = string("model_model_layers_24_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_173_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_24_mlp_up_proj_weight_to_fp16_quantized, x = input_195_cast_fp16)[name = string("linear_173_cast_fp16")]; + tensor input_199_cast_fp16 = mul(x = var_4646_cast_fp16, y = linear_173_cast_fp16)[name = string("input_199_cast_fp16")]; + tensor model_model_layers_24_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163529344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164758208))))[name = string("model_model_layers_24_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_174_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_24_mlp_down_proj_weight_to_fp16_quantized, x = input_199_cast_fp16)[name = string("linear_174_cast_fp16")]; + tensor hidden_states_749_cast_fp16 = add(x = hidden_states_739_cast_fp16, y = linear_174_cast_fp16)[name = string("hidden_states_749_cast_fp16")]; + fp16 var_80_promoted_50_to_fp16 = const()[name = string("op_80_promoted_50_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4659_cast_fp16 = pow(x = hidden_states_749_cast_fp16, y = var_80_promoted_50_to_fp16)[name = string("op_4659_cast_fp16")]; + tensor variance_101_axes_0 = const()[name = string("variance_101_axes_0"), val = tensor([-1])]; + bool variance_101_keep_dims_0 = const()[name = string("variance_101_keep_dims_0"), val = bool(true)]; + tensor variance_101_cast_fp16 = reduce_mean(axes = variance_101_axes_0, keep_dims = variance_101_keep_dims_0, x = var_4659_cast_fp16)[name = string("variance_101_cast_fp16")]; + fp16 var_4662_to_fp16 = const()[name = string("op_4662_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4663_cast_fp16 = add(x = variance_101_cast_fp16, y = var_4662_to_fp16)[name = string("op_4663_cast_fp16")]; + fp32 var_4664_epsilon_0 = const()[name = string("op_4664_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4664_cast_fp16 = rsqrt(epsilon = var_4664_epsilon_0, x = var_4663_cast_fp16)[name = string("op_4664_cast_fp16")]; + tensor hidden_states_753_cast_fp16 = mul(x = hidden_states_749_cast_fp16, y = var_4664_cast_fp16)[name = string("hidden_states_753_cast_fp16")]; + tensor model_model_layers_25_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164911872)))]; + tensor hidden_states_757_cast_fp16 = mul(x = model_model_layers_25_input_layernorm_weight_to_fp16, y = hidden_states_753_cast_fp16)[name = string("hidden_states_757_cast_fp16")]; + tensor var_4675_shape_cast_fp16 = shape(x = hidden_states_757_cast_fp16)[name = string("op_4675_shape_cast_fp16")]; + int32 gather_454 = const()[name = string("gather_454"), val = int32(1)]; + int32 gather_455_axis_0 = const()[name = string("gather_455_axis_0"), val = int32(0)]; + int32 gather_455_batch_dims_0 = const()[name = string("gather_455_batch_dims_0"), val = int32(0)]; + bool gather_455_validate_indices_0 = const()[name = string("gather_455_validate_indices_0"), val = bool(false)]; + string var_4675_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4675_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_455_to_uint16 = const()[name = string("select_455_to_uint16"), val = uint16(1)]; + tensor var_4675_shape_cast_fp16_to_uint16 = cast(dtype = var_4675_shape_cast_fp16_to_uint16_dtype_0, x = var_4675_shape_cast_fp16)[name = string("cast_55")]; + uint16 gather_455_cast_uint16 = gather(axis = gather_455_axis_0, batch_dims = gather_455_batch_dims_0, indices = select_455_to_uint16, validate_indices = gather_455_validate_indices_0, x = var_4675_shape_cast_fp16_to_uint16)[name = string("gather_455_cast_uint16")]; + string gather_455_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_455_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_25_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164913856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165374720))))[name = string("model_model_layers_25_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_175_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_25_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_757_cast_fp16)[name = string("linear_175_cast_fp16")]; + tensor model_model_layers_25_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165432384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165586048))))[name = string("model_model_layers_25_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_176_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_25_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_757_cast_fp16)[name = string("linear_176_cast_fp16")]; + tensor model_model_layers_25_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165605312))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165758976))))[name = string("model_model_layers_25_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_25_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_757_cast_fp16)[name = string("linear_177_cast_fp16")]; + tensor concat_475x = const()[name = string("concat_475x"), val = tensor([1, -1, 15, 64])]; + tensor var_4684_cast_fp16 = reshape(shape = concat_475x, x = linear_175_cast_fp16)[name = string("op_4684_cast_fp16")]; + tensor q_51_perm_0 = const()[name = string("q_51_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_476x = const()[name = string("concat_476x"), val = tensor([1, -1, 5, 64])]; + tensor var_4687_cast_fp16 = reshape(shape = concat_476x, x = linear_176_cast_fp16)[name = string("op_4687_cast_fp16")]; + tensor k_51_perm_0 = const()[name = string("k_51_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_477x = const()[name = string("concat_477x"), val = tensor([1, -1, 5, 64])]; + tensor var_4690_cast_fp16 = reshape(shape = concat_477x, x = linear_177_cast_fp16)[name = string("op_4690_cast_fp16")]; + tensor v_state_51_perm_0 = const()[name = string("v_state_51_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_51_cast_fp16 = transpose(perm = q_51_perm_0, x = var_4684_cast_fp16)[name = string("transpose_27")]; + tensor var_4694_cast_fp16 = mul(x = q_51_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4694_cast_fp16")]; + tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_51_cast_fp16)[name = string("x1_101_cast_fp16")]; + tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_51_cast_fp16)[name = string("x2_101_cast_fp16")]; + fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4705_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_4705_cast_fp16")]; + bool var_4707_interleave_0 = const()[name = string("op_4707_interleave_0"), val = bool(false)]; + tensor var_4707_cast_fp16 = concat(axis = var_85, interleave = var_4707_interleave_0, values = (var_4705_cast_fp16, x1_101_cast_fp16))[name = string("op_4707_cast_fp16")]; + tensor var_4708_cast_fp16 = mul(x = var_4707_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4708_cast_fp16")]; + tensor query_states_103_cast_fp16 = add(x = var_4694_cast_fp16, y = var_4708_cast_fp16)[name = string("query_states_103_cast_fp16")]; + tensor k_51_cast_fp16 = transpose(perm = k_51_perm_0, x = var_4687_cast_fp16)[name = string("transpose_26")]; + tensor var_4710_cast_fp16 = mul(x = k_51_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4710_cast_fp16")]; + tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_103_cast_fp16 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = k_51_cast_fp16)[name = string("x1_103_cast_fp16")]; + tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_103_cast_fp16 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = k_51_cast_fp16)[name = string("x2_103_cast_fp16")]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4721_cast_fp16 = mul(x = x2_103_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_4721_cast_fp16")]; + bool var_4723_interleave_0 = const()[name = string("op_4723_interleave_0"), val = bool(false)]; + tensor var_4723_cast_fp16 = concat(axis = var_85, interleave = var_4723_interleave_0, values = (var_4721_cast_fp16, x1_103_cast_fp16))[name = string("op_4723_cast_fp16")]; + tensor var_4724_cast_fp16 = mul(x = var_4723_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4724_cast_fp16")]; + tensor k_state_51_cast_fp16 = add(x = var_4710_cast_fp16, y = var_4724_cast_fp16)[name = string("k_state_51_cast_fp16")]; + tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([0])]; + tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; + tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; + tensor concat_480_values0_0 = const()[name = string("concat_480_values0_0"), val = tensor([25])]; + int32 concat_480_axis_0 = const()[name = string("concat_480_axis_0"), val = int32(0)]; + bool concat_480_interleave_0 = const()[name = string("concat_480_interleave_0"), val = bool(false)]; + tensor concat_480 = concat(axis = concat_480_axis_0, interleave = concat_480_interleave_0, values = (concat_480_values0_0, expand_dims_300, expand_dims_301, expand_dims_2, expand_dims_303))[name = string("concat_480")]; + tensor key_cache_internal_tensor_assign_26_stride_0 = const()[name = string("key_cache_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_26_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_26_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_480, begin_mask = key_cache_internal_tensor_assign_26_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_26_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_26_squeeze_mask_0, stride = key_cache_internal_tensor_assign_26_stride_0, update = k_state_51_cast_fp16, x = coreml_update_state_112)[name = string("key_cache_internal_tensor_assign_26_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_26_cast_fp16, input = key_cache)[name = string("coreml_update_state_114_write_state")]; + tensor coreml_update_state_114 = read_state(input = key_cache)[name = string("coreml_update_state_114")]; + tensor value_cache_internal_tensor_assign_26_stride_0 = const()[name = string("value_cache_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_26_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_26_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_51_cast_fp16 = transpose(perm = v_state_51_perm_0, x = var_4690_cast_fp16)[name = string("transpose_25")]; + tensor value_cache_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_480, begin_mask = value_cache_internal_tensor_assign_26_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_26_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_26_squeeze_mask_0, stride = value_cache_internal_tensor_assign_26_stride_0, update = v_state_51_cast_fp16, x = coreml_update_state_113)[name = string("value_cache_internal_tensor_assign_26_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_26_cast_fp16, input = value_cache)[name = string("coreml_update_state_115_write_state")]; + tensor coreml_update_state_115 = read_state(input = value_cache)[name = string("coreml_update_state_115")]; + tensor var_4747_begin_0 = const()[name = string("op_4747_begin_0"), val = tensor([25, 0, 0, 0, 0])]; + tensor var_4747_end_0 = const()[name = string("op_4747_end_0"), val = tensor([26, 1, 5, 2048, 64])]; + tensor var_4747_end_mask_0 = const()[name = string("op_4747_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4747_squeeze_mask_0 = const()[name = string("op_4747_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4747_cast_fp16 = slice_by_index(begin = var_4747_begin_0, end = var_4747_end_0, end_mask = var_4747_end_mask_0, squeeze_mask = var_4747_squeeze_mask_0, x = coreml_update_state_114)[name = string("op_4747_cast_fp16")]; + tensor var_4750_begin_0 = const()[name = string("op_4750_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4750_end_mask_0 = const()[name = string("op_4750_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4750_cast_fp16 = slice_by_index(begin = var_4750_begin_0, end = concat_11, end_mask = var_4750_end_mask_0, x = var_4747_cast_fp16)[name = string("op_4750_cast_fp16")]; + tensor var_4752_begin_0 = const()[name = string("op_4752_begin_0"), val = tensor([25, 0, 0, 0, 0])]; + tensor var_4752_end_0 = const()[name = string("op_4752_end_0"), val = tensor([26, 1, 5, 2048, 64])]; + tensor var_4752_end_mask_0 = const()[name = string("op_4752_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4752_squeeze_mask_0 = const()[name = string("op_4752_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4752_cast_fp16 = slice_by_index(begin = var_4752_begin_0, end = var_4752_end_0, end_mask = var_4752_end_mask_0, squeeze_mask = var_4752_squeeze_mask_0, x = coreml_update_state_115)[name = string("op_4752_cast_fp16")]; + tensor var_4755_begin_0 = const()[name = string("op_4755_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4755_end_mask_0 = const()[name = string("op_4755_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4755_cast_fp16 = slice_by_index(begin = var_4755_begin_0, end = concat_11, end_mask = var_4755_end_mask_0, x = var_4752_cast_fp16)[name = string("op_4755_cast_fp16")]; + tensor var_4757_shape_cast_fp16 = shape(x = var_4750_cast_fp16)[name = string("op_4757_shape_cast_fp16")]; + int32 gather_463 = const()[name = string("gather_463"), val = int32(1)]; + int32 gather_464 = const()[name = string("gather_464"), val = int32(5)]; + int32 gather_465_axis_0 = const()[name = string("gather_465_axis_0"), val = int32(0)]; + int32 gather_465_batch_dims_0 = const()[name = string("gather_465_batch_dims_0"), val = int32(0)]; + bool gather_465_validate_indices_0 = const()[name = string("gather_465_validate_indices_0"), val = bool(false)]; + string var_4757_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4757_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_465_to_uint16 = const()[name = string("select_465_to_uint16"), val = uint16(2)]; + tensor var_4757_shape_cast_fp16_to_uint16 = cast(dtype = var_4757_shape_cast_fp16_to_uint16_dtype_0, x = var_4757_shape_cast_fp16)[name = string("cast_54")]; + uint16 gather_465_cast_uint16 = gather(axis = gather_465_axis_0, batch_dims = gather_465_batch_dims_0, indices = select_465_to_uint16, validate_indices = gather_465_validate_indices_0, x = var_4757_shape_cast_fp16_to_uint16)[name = string("gather_465_cast_uint16")]; + string gather_465_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_465_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_466 = const()[name = string("gather_466"), val = int32(64)]; + tensor var_4764_axes_0 = const()[name = string("op_4764_axes_0"), val = tensor([2])]; + tensor var_4764_cast_fp16 = expand_dims(axes = var_4764_axes_0, x = var_4750_cast_fp16)[name = string("op_4764_cast_fp16")]; + tensor shape_517_cast_fp16 = shape(x = var_4764_cast_fp16)[name = string("shape_517_cast_fp16")]; + int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)]; + bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)]; + int32 gather_465_cast_uint16_to_int32 = cast(dtype = gather_465_cast_uint16_to_int32_dtype_0, x = gather_465_cast_uint16)[name = string("cast_53")]; + tensor concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (gather_463, gather_464, var_89, gather_465_cast_uint16_to_int32, gather_466))[name = string("concat_488")]; + tensor real_div_50 = real_div(x = concat_488, y = shape_517_cast_fp16)[name = string("real_div_50")]; + tensor hidden_states_761_cast_fp16 = tile(reps = real_div_50, x = var_4764_cast_fp16)[name = string("hidden_states_761_cast_fp16")]; + tensor concat_489x = const()[name = string("concat_489x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_103_cast_fp16 = reshape(shape = concat_489x, x = hidden_states_761_cast_fp16)[name = string("key_states_103_cast_fp16")]; + tensor var_4774_shape_cast_fp16 = shape(x = var_4755_cast_fp16)[name = string("op_4774_shape_cast_fp16")]; + int32 gather_467 = const()[name = string("gather_467"), val = int32(1)]; + int32 gather_468 = const()[name = string("gather_468"), val = int32(5)]; + int32 gather_469_axis_0 = const()[name = string("gather_469_axis_0"), val = int32(0)]; + int32 gather_469_batch_dims_0 = const()[name = string("gather_469_batch_dims_0"), val = int32(0)]; + bool gather_469_validate_indices_0 = const()[name = string("gather_469_validate_indices_0"), val = bool(false)]; + string var_4774_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4774_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_469_to_uint16 = const()[name = string("select_469_to_uint16"), val = uint16(2)]; + tensor var_4774_shape_cast_fp16_to_uint16 = cast(dtype = var_4774_shape_cast_fp16_to_uint16_dtype_0, x = var_4774_shape_cast_fp16)[name = string("cast_52")]; + uint16 gather_469_cast_uint16 = gather(axis = gather_469_axis_0, batch_dims = gather_469_batch_dims_0, indices = select_469_to_uint16, validate_indices = gather_469_validate_indices_0, x = var_4774_shape_cast_fp16_to_uint16)[name = string("gather_469_cast_uint16")]; + string gather_469_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_469_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_470 = const()[name = string("gather_470"), val = int32(64)]; + tensor var_4781_axes_0 = const()[name = string("op_4781_axes_0"), val = tensor([2])]; + tensor var_4781_cast_fp16 = expand_dims(axes = var_4781_axes_0, x = var_4755_cast_fp16)[name = string("op_4781_cast_fp16")]; + tensor shape_522_cast_fp16 = shape(x = var_4781_cast_fp16)[name = string("shape_522_cast_fp16")]; + int32 concat_490_axis_0 = const()[name = string("concat_490_axis_0"), val = int32(0)]; + bool concat_490_interleave_0 = const()[name = string("concat_490_interleave_0"), val = bool(false)]; + int32 gather_469_cast_uint16_to_int32 = cast(dtype = gather_469_cast_uint16_to_int32_dtype_0, x = gather_469_cast_uint16)[name = string("cast_51")]; + tensor concat_490 = concat(axis = concat_490_axis_0, interleave = concat_490_interleave_0, values = (gather_467, gather_468, var_89, gather_469_cast_uint16_to_int32, gather_470))[name = string("concat_490")]; + tensor real_div_51 = real_div(x = concat_490, y = shape_522_cast_fp16)[name = string("real_div_51")]; + tensor hidden_states_765_cast_fp16 = tile(reps = real_div_51, x = var_4781_cast_fp16)[name = string("hidden_states_765_cast_fp16")]; + tensor concat_491x = const()[name = string("concat_491x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_103_cast_fp16 = reshape(shape = concat_491x, x = hidden_states_765_cast_fp16)[name = string("value_states_103_cast_fp16")]; + tensor var_4791_shape_cast_fp16 = shape(x = key_states_103_cast_fp16)[name = string("op_4791_shape_cast_fp16")]; + int32 gather_471_axis_0 = const()[name = string("gather_471_axis_0"), val = int32(0)]; + int32 gather_471_batch_dims_0 = const()[name = string("gather_471_batch_dims_0"), val = int32(0)]; + bool gather_471_validate_indices_0 = const()[name = string("gather_471_validate_indices_0"), val = bool(false)]; + string var_4791_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4791_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_471_to_uint16 = const()[name = string("select_471_to_uint16"), val = uint16(2)]; + tensor var_4791_shape_cast_fp16_to_uint16 = cast(dtype = var_4791_shape_cast_fp16_to_uint16_dtype_0, x = var_4791_shape_cast_fp16)[name = string("cast_50")]; + uint16 gather_471_cast_uint16 = gather(axis = gather_471_axis_0, batch_dims = gather_471_batch_dims_0, indices = select_471_to_uint16, validate_indices = gather_471_validate_indices_0, x = var_4791_shape_cast_fp16_to_uint16)[name = string("gather_471_cast_uint16")]; + string gather_471_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_471_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_492_values0_0 = const()[name = string("concat_492_values0_0"), val = int32(1)]; + int32 concat_492_values1_0 = const()[name = string("concat_492_values1_0"), val = int32(1)]; + int32 concat_492_values2_0 = const()[name = string("concat_492_values2_0"), val = int32(0)]; + int32 concat_492_axis_0 = const()[name = string("concat_492_axis_0"), val = int32(0)]; + bool concat_492_interleave_0 = const()[name = string("concat_492_interleave_0"), val = bool(false)]; + int32 gather_471_cast_uint16_to_int32 = cast(dtype = gather_471_cast_uint16_to_int32_dtype_0, x = gather_471_cast_uint16)[name = string("cast_49")]; + tensor concat_492 = concat(axis = concat_492_axis_0, interleave = concat_492_interleave_0, values = (concat_492_values0_0, concat_492_values1_0, concat_492_values2_0, gather_471_cast_uint16_to_int32))[name = string("concat_492")]; + tensor causal_mask_53_begin_0 = const()[name = string("causal_mask_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_53_end_mask_0 = const()[name = string("causal_mask_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_53_cast_fp16 = slice_by_index(begin = causal_mask_53_begin_0, end = concat_492, end_mask = causal_mask_53_end_mask_0, x = causal_mask)[name = string("causal_mask_53_cast_fp16")]; + tensor attn_output_101_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_53_cast_fp16, key = key_states_103_cast_fp16, query = query_states_103_cast_fp16, value = value_states_103_cast_fp16)[name = string("attn_output_101_cast_fp16")]; + tensor var_4797_perm_0 = const()[name = string("op_4797_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_493_axis_0 = const()[name = string("concat_493_axis_0"), val = int32(0)]; + bool concat_493_interleave_0 = const()[name = string("concat_493_interleave_0"), val = bool(false)]; + int32 gather_455_cast_uint16_to_int32 = cast(dtype = gather_455_cast_uint16_to_int32_dtype_0, x = gather_455_cast_uint16)[name = string("cast_48")]; + tensor concat_493 = concat(axis = concat_493_axis_0, interleave = concat_493_interleave_0, values = (gather_454, gather_455_cast_uint16_to_int32, var_85))[name = string("concat_493")]; + tensor var_4797_cast_fp16 = transpose(perm = var_4797_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_24")]; + tensor input_201_cast_fp16 = reshape(shape = concat_493, x = var_4797_cast_fp16)[name = string("input_201_cast_fp16")]; + tensor model_model_layers_25_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165778240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166239104))))[name = string("model_model_layers_25_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_178_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_25_self_attn_o_proj_weight_to_fp16_quantized, x = input_201_cast_fp16)[name = string("linear_178_cast_fp16")]; + tensor hidden_states_769_cast_fp16 = add(x = hidden_states_749_cast_fp16, y = linear_178_cast_fp16)[name = string("hidden_states_769_cast_fp16")]; + fp16 var_80_promoted_51_to_fp16 = const()[name = string("op_80_promoted_51_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4806_cast_fp16 = pow(x = hidden_states_769_cast_fp16, y = var_80_promoted_51_to_fp16)[name = string("op_4806_cast_fp16")]; + tensor variance_103_axes_0 = const()[name = string("variance_103_axes_0"), val = tensor([-1])]; + bool variance_103_keep_dims_0 = const()[name = string("variance_103_keep_dims_0"), val = bool(true)]; + tensor variance_103_cast_fp16 = reduce_mean(axes = variance_103_axes_0, keep_dims = variance_103_keep_dims_0, x = var_4806_cast_fp16)[name = string("variance_103_cast_fp16")]; + fp16 var_4809_to_fp16 = const()[name = string("op_4809_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4810_cast_fp16 = add(x = variance_103_cast_fp16, y = var_4809_to_fp16)[name = string("op_4810_cast_fp16")]; + fp32 var_4811_epsilon_0 = const()[name = string("op_4811_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4811_cast_fp16 = rsqrt(epsilon = var_4811_epsilon_0, x = var_4810_cast_fp16)[name = string("op_4811_cast_fp16")]; + tensor hidden_states_773_cast_fp16 = mul(x = hidden_states_769_cast_fp16, y = var_4811_cast_fp16)[name = string("hidden_states_773_cast_fp16")]; + tensor model_model_layers_25_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166296768)))]; + tensor input_203_cast_fp16 = mul(x = model_model_layers_25_post_attention_layernorm_weight_to_fp16, y = hidden_states_773_cast_fp16)[name = string("input_203_cast_fp16")]; + tensor model_model_layers_25_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166298752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167527616))))[name = string("model_model_layers_25_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_179_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_25_mlp_gate_proj_weight_to_fp16_quantized, x = input_203_cast_fp16)[name = string("linear_179_cast_fp16")]; + tensor var_4823_cast_fp16 = silu(x = linear_179_cast_fp16)[name = string("op_4823_cast_fp16")]; + tensor model_model_layers_25_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167681280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168910144))))[name = string("model_model_layers_25_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_180_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_25_mlp_up_proj_weight_to_fp16_quantized, x = input_203_cast_fp16)[name = string("linear_180_cast_fp16")]; + tensor input_207_cast_fp16 = mul(x = var_4823_cast_fp16, y = linear_180_cast_fp16)[name = string("input_207_cast_fp16")]; + tensor model_model_layers_25_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169063808))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170292672))))[name = string("model_model_layers_25_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_181_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_25_mlp_down_proj_weight_to_fp16_quantized, x = input_207_cast_fp16)[name = string("linear_181_cast_fp16")]; + tensor hidden_states_779_cast_fp16 = add(x = hidden_states_769_cast_fp16, y = linear_181_cast_fp16)[name = string("hidden_states_779_cast_fp16")]; + fp16 var_80_promoted_52_to_fp16 = const()[name = string("op_80_promoted_52_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4836_cast_fp16 = pow(x = hidden_states_779_cast_fp16, y = var_80_promoted_52_to_fp16)[name = string("op_4836_cast_fp16")]; + tensor variance_105_axes_0 = const()[name = string("variance_105_axes_0"), val = tensor([-1])]; + bool variance_105_keep_dims_0 = const()[name = string("variance_105_keep_dims_0"), val = bool(true)]; + tensor variance_105_cast_fp16 = reduce_mean(axes = variance_105_axes_0, keep_dims = variance_105_keep_dims_0, x = var_4836_cast_fp16)[name = string("variance_105_cast_fp16")]; + fp16 var_4839_to_fp16 = const()[name = string("op_4839_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4840_cast_fp16 = add(x = variance_105_cast_fp16, y = var_4839_to_fp16)[name = string("op_4840_cast_fp16")]; + fp32 var_4841_epsilon_0 = const()[name = string("op_4841_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4841_cast_fp16 = rsqrt(epsilon = var_4841_epsilon_0, x = var_4840_cast_fp16)[name = string("op_4841_cast_fp16")]; + tensor hidden_states_783_cast_fp16 = mul(x = hidden_states_779_cast_fp16, y = var_4841_cast_fp16)[name = string("hidden_states_783_cast_fp16")]; + tensor model_model_layers_26_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170446336)))]; + tensor hidden_states_787_cast_fp16 = mul(x = model_model_layers_26_input_layernorm_weight_to_fp16, y = hidden_states_783_cast_fp16)[name = string("hidden_states_787_cast_fp16")]; + tensor var_4852_shape_cast_fp16 = shape(x = hidden_states_787_cast_fp16)[name = string("op_4852_shape_cast_fp16")]; + int32 gather_472 = const()[name = string("gather_472"), val = int32(1)]; + int32 gather_473_axis_0 = const()[name = string("gather_473_axis_0"), val = int32(0)]; + int32 gather_473_batch_dims_0 = const()[name = string("gather_473_batch_dims_0"), val = int32(0)]; + bool gather_473_validate_indices_0 = const()[name = string("gather_473_validate_indices_0"), val = bool(false)]; + string var_4852_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4852_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_473_to_uint16 = const()[name = string("select_473_to_uint16"), val = uint16(1)]; + tensor var_4852_shape_cast_fp16_to_uint16 = cast(dtype = var_4852_shape_cast_fp16_to_uint16_dtype_0, x = var_4852_shape_cast_fp16)[name = string("cast_47")]; + uint16 gather_473_cast_uint16 = gather(axis = gather_473_axis_0, batch_dims = gather_473_batch_dims_0, indices = select_473_to_uint16, validate_indices = gather_473_validate_indices_0, x = var_4852_shape_cast_fp16_to_uint16)[name = string("gather_473_cast_uint16")]; + string gather_473_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_473_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_26_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170448320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170909184))))[name = string("model_model_layers_26_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_182_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_26_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_787_cast_fp16)[name = string("linear_182_cast_fp16")]; + tensor model_model_layers_26_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170966848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171120512))))[name = string("model_model_layers_26_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_183_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_26_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_787_cast_fp16)[name = string("linear_183_cast_fp16")]; + tensor model_model_layers_26_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171139776))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171293440))))[name = string("model_model_layers_26_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_184_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_26_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_787_cast_fp16)[name = string("linear_184_cast_fp16")]; + tensor concat_494x = const()[name = string("concat_494x"), val = tensor([1, -1, 15, 64])]; + tensor var_4861_cast_fp16 = reshape(shape = concat_494x, x = linear_182_cast_fp16)[name = string("op_4861_cast_fp16")]; + tensor q_53_perm_0 = const()[name = string("q_53_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_495x = const()[name = string("concat_495x"), val = tensor([1, -1, 5, 64])]; + tensor var_4864_cast_fp16 = reshape(shape = concat_495x, x = linear_183_cast_fp16)[name = string("op_4864_cast_fp16")]; + tensor k_53_perm_0 = const()[name = string("k_53_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_496x = const()[name = string("concat_496x"), val = tensor([1, -1, 5, 64])]; + tensor var_4867_cast_fp16 = reshape(shape = concat_496x, x = linear_184_cast_fp16)[name = string("op_4867_cast_fp16")]; + tensor v_state_53_perm_0 = const()[name = string("v_state_53_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_53_cast_fp16 = transpose(perm = q_53_perm_0, x = var_4861_cast_fp16)[name = string("transpose_23")]; + tensor var_4871_cast_fp16 = mul(x = q_53_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4871_cast_fp16")]; + tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_105_cast_fp16 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = q_53_cast_fp16)[name = string("x1_105_cast_fp16")]; + tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_105_cast_fp16 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = q_53_cast_fp16)[name = string("x2_105_cast_fp16")]; + fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4882_cast_fp16 = mul(x = x2_105_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_4882_cast_fp16")]; + bool var_4884_interleave_0 = const()[name = string("op_4884_interleave_0"), val = bool(false)]; + tensor var_4884_cast_fp16 = concat(axis = var_85, interleave = var_4884_interleave_0, values = (var_4882_cast_fp16, x1_105_cast_fp16))[name = string("op_4884_cast_fp16")]; + tensor var_4885_cast_fp16 = mul(x = var_4884_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4885_cast_fp16")]; + tensor query_states_107_cast_fp16 = add(x = var_4871_cast_fp16, y = var_4885_cast_fp16)[name = string("query_states_107_cast_fp16")]; + tensor k_53_cast_fp16 = transpose(perm = k_53_perm_0, x = var_4864_cast_fp16)[name = string("transpose_22")]; + tensor var_4887_cast_fp16 = mul(x = k_53_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4887_cast_fp16")]; + tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_107_cast_fp16 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = k_53_cast_fp16)[name = string("x1_107_cast_fp16")]; + tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_107_cast_fp16 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = k_53_cast_fp16)[name = string("x2_107_cast_fp16")]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4898_cast_fp16 = mul(x = x2_107_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_4898_cast_fp16")]; + bool var_4900_interleave_0 = const()[name = string("op_4900_interleave_0"), val = bool(false)]; + tensor var_4900_cast_fp16 = concat(axis = var_85, interleave = var_4900_interleave_0, values = (var_4898_cast_fp16, x1_107_cast_fp16))[name = string("op_4900_cast_fp16")]; + tensor var_4901_cast_fp16 = mul(x = var_4900_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4901_cast_fp16")]; + tensor k_state_53_cast_fp16 = add(x = var_4887_cast_fp16, y = var_4901_cast_fp16)[name = string("k_state_53_cast_fp16")]; + tensor expand_dims_312 = const()[name = string("expand_dims_312"), val = tensor([0])]; + tensor expand_dims_313 = const()[name = string("expand_dims_313"), val = tensor([0])]; + tensor expand_dims_315 = const()[name = string("expand_dims_315"), val = tensor([0])]; + tensor concat_499_values0_0 = const()[name = string("concat_499_values0_0"), val = tensor([26])]; + int32 concat_499_axis_0 = const()[name = string("concat_499_axis_0"), val = int32(0)]; + bool concat_499_interleave_0 = const()[name = string("concat_499_interleave_0"), val = bool(false)]; + tensor concat_499 = concat(axis = concat_499_axis_0, interleave = concat_499_interleave_0, values = (concat_499_values0_0, expand_dims_312, expand_dims_313, expand_dims_2, expand_dims_315))[name = string("concat_499")]; + tensor key_cache_internal_tensor_assign_27_stride_0 = const()[name = string("key_cache_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_27_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_27_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_499, begin_mask = key_cache_internal_tensor_assign_27_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_27_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_27_squeeze_mask_0, stride = key_cache_internal_tensor_assign_27_stride_0, update = k_state_53_cast_fp16, x = coreml_update_state_114)[name = string("key_cache_internal_tensor_assign_27_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_27_cast_fp16, input = key_cache)[name = string("coreml_update_state_116_write_state")]; + tensor coreml_update_state_116 = read_state(input = key_cache)[name = string("coreml_update_state_116")]; + tensor value_cache_internal_tensor_assign_27_stride_0 = const()[name = string("value_cache_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_27_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_27_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_53_cast_fp16 = transpose(perm = v_state_53_perm_0, x = var_4867_cast_fp16)[name = string("transpose_21")]; + tensor value_cache_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_499, begin_mask = value_cache_internal_tensor_assign_27_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_27_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_27_squeeze_mask_0, stride = value_cache_internal_tensor_assign_27_stride_0, update = v_state_53_cast_fp16, x = coreml_update_state_115)[name = string("value_cache_internal_tensor_assign_27_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_27_cast_fp16, input = value_cache)[name = string("coreml_update_state_117_write_state")]; + tensor coreml_update_state_117 = read_state(input = value_cache)[name = string("coreml_update_state_117")]; + tensor var_4924_begin_0 = const()[name = string("op_4924_begin_0"), val = tensor([26, 0, 0, 0, 0])]; + tensor var_4924_end_0 = const()[name = string("op_4924_end_0"), val = tensor([27, 1, 5, 2048, 64])]; + tensor var_4924_end_mask_0 = const()[name = string("op_4924_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4924_squeeze_mask_0 = const()[name = string("op_4924_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4924_cast_fp16 = slice_by_index(begin = var_4924_begin_0, end = var_4924_end_0, end_mask = var_4924_end_mask_0, squeeze_mask = var_4924_squeeze_mask_0, x = coreml_update_state_116)[name = string("op_4924_cast_fp16")]; + tensor var_4927_begin_0 = const()[name = string("op_4927_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4927_end_mask_0 = const()[name = string("op_4927_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4927_cast_fp16 = slice_by_index(begin = var_4927_begin_0, end = concat_11, end_mask = var_4927_end_mask_0, x = var_4924_cast_fp16)[name = string("op_4927_cast_fp16")]; + tensor var_4929_begin_0 = const()[name = string("op_4929_begin_0"), val = tensor([26, 0, 0, 0, 0])]; + tensor var_4929_end_0 = const()[name = string("op_4929_end_0"), val = tensor([27, 1, 5, 2048, 64])]; + tensor var_4929_end_mask_0 = const()[name = string("op_4929_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4929_squeeze_mask_0 = const()[name = string("op_4929_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4929_cast_fp16 = slice_by_index(begin = var_4929_begin_0, end = var_4929_end_0, end_mask = var_4929_end_mask_0, squeeze_mask = var_4929_squeeze_mask_0, x = coreml_update_state_117)[name = string("op_4929_cast_fp16")]; + tensor var_4932_begin_0 = const()[name = string("op_4932_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4932_end_mask_0 = const()[name = string("op_4932_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4932_cast_fp16 = slice_by_index(begin = var_4932_begin_0, end = concat_11, end_mask = var_4932_end_mask_0, x = var_4929_cast_fp16)[name = string("op_4932_cast_fp16")]; + tensor var_4934_shape_cast_fp16 = shape(x = var_4927_cast_fp16)[name = string("op_4934_shape_cast_fp16")]; + int32 gather_481 = const()[name = string("gather_481"), val = int32(1)]; + int32 gather_482 = const()[name = string("gather_482"), val = int32(5)]; + int32 gather_483_axis_0 = const()[name = string("gather_483_axis_0"), val = int32(0)]; + int32 gather_483_batch_dims_0 = const()[name = string("gather_483_batch_dims_0"), val = int32(0)]; + bool gather_483_validate_indices_0 = const()[name = string("gather_483_validate_indices_0"), val = bool(false)]; + string var_4934_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4934_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_483_to_uint16 = const()[name = string("select_483_to_uint16"), val = uint16(2)]; + tensor var_4934_shape_cast_fp16_to_uint16 = cast(dtype = var_4934_shape_cast_fp16_to_uint16_dtype_0, x = var_4934_shape_cast_fp16)[name = string("cast_46")]; + uint16 gather_483_cast_uint16 = gather(axis = gather_483_axis_0, batch_dims = gather_483_batch_dims_0, indices = select_483_to_uint16, validate_indices = gather_483_validate_indices_0, x = var_4934_shape_cast_fp16_to_uint16)[name = string("gather_483_cast_uint16")]; + string gather_483_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_483_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_484 = const()[name = string("gather_484"), val = int32(64)]; + tensor var_4941_axes_0 = const()[name = string("op_4941_axes_0"), val = tensor([2])]; + tensor var_4941_cast_fp16 = expand_dims(axes = var_4941_axes_0, x = var_4927_cast_fp16)[name = string("op_4941_cast_fp16")]; + tensor shape_537_cast_fp16 = shape(x = var_4941_cast_fp16)[name = string("shape_537_cast_fp16")]; + int32 concat_507_axis_0 = const()[name = string("concat_507_axis_0"), val = int32(0)]; + bool concat_507_interleave_0 = const()[name = string("concat_507_interleave_0"), val = bool(false)]; + int32 gather_483_cast_uint16_to_int32 = cast(dtype = gather_483_cast_uint16_to_int32_dtype_0, x = gather_483_cast_uint16)[name = string("cast_45")]; + tensor concat_507 = concat(axis = concat_507_axis_0, interleave = concat_507_interleave_0, values = (gather_481, gather_482, var_89, gather_483_cast_uint16_to_int32, gather_484))[name = string("concat_507")]; + tensor real_div_52 = real_div(x = concat_507, y = shape_537_cast_fp16)[name = string("real_div_52")]; + tensor hidden_states_791_cast_fp16 = tile(reps = real_div_52, x = var_4941_cast_fp16)[name = string("hidden_states_791_cast_fp16")]; + tensor concat_508x = const()[name = string("concat_508x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_107_cast_fp16 = reshape(shape = concat_508x, x = hidden_states_791_cast_fp16)[name = string("key_states_107_cast_fp16")]; + tensor var_4951_shape_cast_fp16 = shape(x = var_4932_cast_fp16)[name = string("op_4951_shape_cast_fp16")]; + int32 gather_485 = const()[name = string("gather_485"), val = int32(1)]; + int32 gather_486 = const()[name = string("gather_486"), val = int32(5)]; + int32 gather_487_axis_0 = const()[name = string("gather_487_axis_0"), val = int32(0)]; + int32 gather_487_batch_dims_0 = const()[name = string("gather_487_batch_dims_0"), val = int32(0)]; + bool gather_487_validate_indices_0 = const()[name = string("gather_487_validate_indices_0"), val = bool(false)]; + string var_4951_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4951_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_487_to_uint16 = const()[name = string("select_487_to_uint16"), val = uint16(2)]; + tensor var_4951_shape_cast_fp16_to_uint16 = cast(dtype = var_4951_shape_cast_fp16_to_uint16_dtype_0, x = var_4951_shape_cast_fp16)[name = string("cast_44")]; + uint16 gather_487_cast_uint16 = gather(axis = gather_487_axis_0, batch_dims = gather_487_batch_dims_0, indices = select_487_to_uint16, validate_indices = gather_487_validate_indices_0, x = var_4951_shape_cast_fp16_to_uint16)[name = string("gather_487_cast_uint16")]; + string gather_487_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_487_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_488 = const()[name = string("gather_488"), val = int32(64)]; + tensor var_4958_axes_0 = const()[name = string("op_4958_axes_0"), val = tensor([2])]; + tensor var_4958_cast_fp16 = expand_dims(axes = var_4958_axes_0, x = var_4932_cast_fp16)[name = string("op_4958_cast_fp16")]; + tensor shape_542_cast_fp16 = shape(x = var_4958_cast_fp16)[name = string("shape_542_cast_fp16")]; + int32 concat_509_axis_0 = const()[name = string("concat_509_axis_0"), val = int32(0)]; + bool concat_509_interleave_0 = const()[name = string("concat_509_interleave_0"), val = bool(false)]; + int32 gather_487_cast_uint16_to_int32 = cast(dtype = gather_487_cast_uint16_to_int32_dtype_0, x = gather_487_cast_uint16)[name = string("cast_43")]; + tensor concat_509 = concat(axis = concat_509_axis_0, interleave = concat_509_interleave_0, values = (gather_485, gather_486, var_89, gather_487_cast_uint16_to_int32, gather_488))[name = string("concat_509")]; + tensor real_div_53 = real_div(x = concat_509, y = shape_542_cast_fp16)[name = string("real_div_53")]; + tensor hidden_states_795_cast_fp16 = tile(reps = real_div_53, x = var_4958_cast_fp16)[name = string("hidden_states_795_cast_fp16")]; + tensor concat_510x = const()[name = string("concat_510x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_107_cast_fp16 = reshape(shape = concat_510x, x = hidden_states_795_cast_fp16)[name = string("value_states_107_cast_fp16")]; + tensor var_4968_shape_cast_fp16 = shape(x = key_states_107_cast_fp16)[name = string("op_4968_shape_cast_fp16")]; + int32 gather_489_axis_0 = const()[name = string("gather_489_axis_0"), val = int32(0)]; + int32 gather_489_batch_dims_0 = const()[name = string("gather_489_batch_dims_0"), val = int32(0)]; + bool gather_489_validate_indices_0 = const()[name = string("gather_489_validate_indices_0"), val = bool(false)]; + string var_4968_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4968_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_489_to_uint16 = const()[name = string("select_489_to_uint16"), val = uint16(2)]; + tensor var_4968_shape_cast_fp16_to_uint16 = cast(dtype = var_4968_shape_cast_fp16_to_uint16_dtype_0, x = var_4968_shape_cast_fp16)[name = string("cast_42")]; + uint16 gather_489_cast_uint16 = gather(axis = gather_489_axis_0, batch_dims = gather_489_batch_dims_0, indices = select_489_to_uint16, validate_indices = gather_489_validate_indices_0, x = var_4968_shape_cast_fp16_to_uint16)[name = string("gather_489_cast_uint16")]; + string gather_489_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_489_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_511_values0_0 = const()[name = string("concat_511_values0_0"), val = int32(1)]; + int32 concat_511_values1_0 = const()[name = string("concat_511_values1_0"), val = int32(1)]; + int32 concat_511_values2_0 = const()[name = string("concat_511_values2_0"), val = int32(0)]; + int32 concat_511_axis_0 = const()[name = string("concat_511_axis_0"), val = int32(0)]; + bool concat_511_interleave_0 = const()[name = string("concat_511_interleave_0"), val = bool(false)]; + int32 gather_489_cast_uint16_to_int32 = cast(dtype = gather_489_cast_uint16_to_int32_dtype_0, x = gather_489_cast_uint16)[name = string("cast_41")]; + tensor concat_511 = concat(axis = concat_511_axis_0, interleave = concat_511_interleave_0, values = (concat_511_values0_0, concat_511_values1_0, concat_511_values2_0, gather_489_cast_uint16_to_int32))[name = string("concat_511")]; + tensor causal_mask_55_begin_0 = const()[name = string("causal_mask_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_55_end_mask_0 = const()[name = string("causal_mask_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_55_cast_fp16 = slice_by_index(begin = causal_mask_55_begin_0, end = concat_511, end_mask = causal_mask_55_end_mask_0, x = causal_mask)[name = string("causal_mask_55_cast_fp16")]; + tensor attn_output_105_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_55_cast_fp16, key = key_states_107_cast_fp16, query = query_states_107_cast_fp16, value = value_states_107_cast_fp16)[name = string("attn_output_105_cast_fp16")]; + tensor var_4974_perm_0 = const()[name = string("op_4974_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_512_axis_0 = const()[name = string("concat_512_axis_0"), val = int32(0)]; + bool concat_512_interleave_0 = const()[name = string("concat_512_interleave_0"), val = bool(false)]; + int32 gather_473_cast_uint16_to_int32 = cast(dtype = gather_473_cast_uint16_to_int32_dtype_0, x = gather_473_cast_uint16)[name = string("cast_40")]; + tensor concat_512 = concat(axis = concat_512_axis_0, interleave = concat_512_interleave_0, values = (gather_472, gather_473_cast_uint16_to_int32, var_85))[name = string("concat_512")]; + tensor var_4974_cast_fp16 = transpose(perm = var_4974_perm_0, x = attn_output_105_cast_fp16)[name = string("transpose_20")]; + tensor input_209_cast_fp16 = reshape(shape = concat_512, x = var_4974_cast_fp16)[name = string("input_209_cast_fp16")]; + tensor model_model_layers_26_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171312704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171773568))))[name = string("model_model_layers_26_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_185_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_26_self_attn_o_proj_weight_to_fp16_quantized, x = input_209_cast_fp16)[name = string("linear_185_cast_fp16")]; + tensor hidden_states_799_cast_fp16 = add(x = hidden_states_779_cast_fp16, y = linear_185_cast_fp16)[name = string("hidden_states_799_cast_fp16")]; + fp16 var_80_promoted_53_to_fp16 = const()[name = string("op_80_promoted_53_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4983_cast_fp16 = pow(x = hidden_states_799_cast_fp16, y = var_80_promoted_53_to_fp16)[name = string("op_4983_cast_fp16")]; + tensor variance_107_axes_0 = const()[name = string("variance_107_axes_0"), val = tensor([-1])]; + bool variance_107_keep_dims_0 = const()[name = string("variance_107_keep_dims_0"), val = bool(true)]; + tensor variance_107_cast_fp16 = reduce_mean(axes = variance_107_axes_0, keep_dims = variance_107_keep_dims_0, x = var_4983_cast_fp16)[name = string("variance_107_cast_fp16")]; + fp16 var_4986_to_fp16 = const()[name = string("op_4986_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4987_cast_fp16 = add(x = variance_107_cast_fp16, y = var_4986_to_fp16)[name = string("op_4987_cast_fp16")]; + fp32 var_4988_epsilon_0 = const()[name = string("op_4988_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4988_cast_fp16 = rsqrt(epsilon = var_4988_epsilon_0, x = var_4987_cast_fp16)[name = string("op_4988_cast_fp16")]; + tensor hidden_states_803_cast_fp16 = mul(x = hidden_states_799_cast_fp16, y = var_4988_cast_fp16)[name = string("hidden_states_803_cast_fp16")]; + tensor model_model_layers_26_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171831232)))]; + tensor input_211_cast_fp16 = mul(x = model_model_layers_26_post_attention_layernorm_weight_to_fp16, y = hidden_states_803_cast_fp16)[name = string("input_211_cast_fp16")]; + tensor model_model_layers_26_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171833216))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173062080))))[name = string("model_model_layers_26_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_186_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_26_mlp_gate_proj_weight_to_fp16_quantized, x = input_211_cast_fp16)[name = string("linear_186_cast_fp16")]; + tensor var_5000_cast_fp16 = silu(x = linear_186_cast_fp16)[name = string("op_5000_cast_fp16")]; + tensor model_model_layers_26_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173215744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174444608))))[name = string("model_model_layers_26_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_187_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_26_mlp_up_proj_weight_to_fp16_quantized, x = input_211_cast_fp16)[name = string("linear_187_cast_fp16")]; + tensor input_215_cast_fp16 = mul(x = var_5000_cast_fp16, y = linear_187_cast_fp16)[name = string("input_215_cast_fp16")]; + tensor model_model_layers_26_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174598272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175827136))))[name = string("model_model_layers_26_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_188_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_26_mlp_down_proj_weight_to_fp16_quantized, x = input_215_cast_fp16)[name = string("linear_188_cast_fp16")]; + tensor hidden_states_809_cast_fp16 = add(x = hidden_states_799_cast_fp16, y = linear_188_cast_fp16)[name = string("hidden_states_809_cast_fp16")]; + fp16 var_80_promoted_54_to_fp16 = const()[name = string("op_80_promoted_54_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5013_cast_fp16 = pow(x = hidden_states_809_cast_fp16, y = var_80_promoted_54_to_fp16)[name = string("op_5013_cast_fp16")]; + tensor variance_109_axes_0 = const()[name = string("variance_109_axes_0"), val = tensor([-1])]; + bool variance_109_keep_dims_0 = const()[name = string("variance_109_keep_dims_0"), val = bool(true)]; + tensor variance_109_cast_fp16 = reduce_mean(axes = variance_109_axes_0, keep_dims = variance_109_keep_dims_0, x = var_5013_cast_fp16)[name = string("variance_109_cast_fp16")]; + fp16 var_5016_to_fp16 = const()[name = string("op_5016_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5017_cast_fp16 = add(x = variance_109_cast_fp16, y = var_5016_to_fp16)[name = string("op_5017_cast_fp16")]; + fp32 var_5018_epsilon_0 = const()[name = string("op_5018_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5018_cast_fp16 = rsqrt(epsilon = var_5018_epsilon_0, x = var_5017_cast_fp16)[name = string("op_5018_cast_fp16")]; + tensor hidden_states_813_cast_fp16 = mul(x = hidden_states_809_cast_fp16, y = var_5018_cast_fp16)[name = string("hidden_states_813_cast_fp16")]; + tensor model_model_layers_27_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175980800)))]; + tensor hidden_states_817_cast_fp16 = mul(x = model_model_layers_27_input_layernorm_weight_to_fp16, y = hidden_states_813_cast_fp16)[name = string("hidden_states_817_cast_fp16")]; + tensor var_5029_shape_cast_fp16 = shape(x = hidden_states_817_cast_fp16)[name = string("op_5029_shape_cast_fp16")]; + int32 gather_490 = const()[name = string("gather_490"), val = int32(1)]; + int32 gather_491_axis_0 = const()[name = string("gather_491_axis_0"), val = int32(0)]; + int32 gather_491_batch_dims_0 = const()[name = string("gather_491_batch_dims_0"), val = int32(0)]; + bool gather_491_validate_indices_0 = const()[name = string("gather_491_validate_indices_0"), val = bool(false)]; + string var_5029_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5029_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_491_to_uint16 = const()[name = string("select_491_to_uint16"), val = uint16(1)]; + tensor var_5029_shape_cast_fp16_to_uint16 = cast(dtype = var_5029_shape_cast_fp16_to_uint16_dtype_0, x = var_5029_shape_cast_fp16)[name = string("cast_39")]; + uint16 gather_491_cast_uint16 = gather(axis = gather_491_axis_0, batch_dims = gather_491_batch_dims_0, indices = select_491_to_uint16, validate_indices = gather_491_validate_indices_0, x = var_5029_shape_cast_fp16_to_uint16)[name = string("gather_491_cast_uint16")]; + string gather_491_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_491_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_27_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175982784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176443648))))[name = string("model_model_layers_27_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_189_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_27_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_817_cast_fp16)[name = string("linear_189_cast_fp16")]; + tensor model_model_layers_27_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176501312))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176654976))))[name = string("model_model_layers_27_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_27_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_817_cast_fp16)[name = string("linear_190_cast_fp16")]; + tensor model_model_layers_27_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176674240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176827904))))[name = string("model_model_layers_27_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_191_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_27_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_817_cast_fp16)[name = string("linear_191_cast_fp16")]; + tensor concat_513x = const()[name = string("concat_513x"), val = tensor([1, -1, 15, 64])]; + tensor var_5038_cast_fp16 = reshape(shape = concat_513x, x = linear_189_cast_fp16)[name = string("op_5038_cast_fp16")]; + tensor q_55_perm_0 = const()[name = string("q_55_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_514x = const()[name = string("concat_514x"), val = tensor([1, -1, 5, 64])]; + tensor var_5041_cast_fp16 = reshape(shape = concat_514x, x = linear_190_cast_fp16)[name = string("op_5041_cast_fp16")]; + tensor k_55_perm_0 = const()[name = string("k_55_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_515x = const()[name = string("concat_515x"), val = tensor([1, -1, 5, 64])]; + tensor var_5044_cast_fp16 = reshape(shape = concat_515x, x = linear_191_cast_fp16)[name = string("op_5044_cast_fp16")]; + tensor v_state_55_perm_0 = const()[name = string("v_state_55_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_55_cast_fp16 = transpose(perm = q_55_perm_0, x = var_5038_cast_fp16)[name = string("transpose_19")]; + tensor var_5048_cast_fp16 = mul(x = q_55_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5048_cast_fp16")]; + tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_109_cast_fp16 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = q_55_cast_fp16)[name = string("x1_109_cast_fp16")]; + tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_109_cast_fp16 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = q_55_cast_fp16)[name = string("x2_109_cast_fp16")]; + fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5059_cast_fp16 = mul(x = x2_109_cast_fp16, y = const_57_promoted_to_fp16)[name = string("op_5059_cast_fp16")]; + bool var_5061_interleave_0 = const()[name = string("op_5061_interleave_0"), val = bool(false)]; + tensor var_5061_cast_fp16 = concat(axis = var_85, interleave = var_5061_interleave_0, values = (var_5059_cast_fp16, x1_109_cast_fp16))[name = string("op_5061_cast_fp16")]; + tensor var_5062_cast_fp16 = mul(x = var_5061_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5062_cast_fp16")]; + tensor query_states_111_cast_fp16 = add(x = var_5048_cast_fp16, y = var_5062_cast_fp16)[name = string("query_states_111_cast_fp16")]; + tensor k_55_cast_fp16 = transpose(perm = k_55_perm_0, x = var_5041_cast_fp16)[name = string("transpose_18")]; + tensor var_5064_cast_fp16 = mul(x = k_55_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5064_cast_fp16")]; + tensor x1_111_begin_0 = const()[name = string("x1_111_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_111_end_0 = const()[name = string("x1_111_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_111_end_mask_0 = const()[name = string("x1_111_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_111_cast_fp16 = slice_by_index(begin = x1_111_begin_0, end = x1_111_end_0, end_mask = x1_111_end_mask_0, x = k_55_cast_fp16)[name = string("x1_111_cast_fp16")]; + tensor x2_111_begin_0 = const()[name = string("x2_111_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_111_end_0 = const()[name = string("x2_111_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_111_end_mask_0 = const()[name = string("x2_111_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_111_cast_fp16 = slice_by_index(begin = x2_111_begin_0, end = x2_111_end_0, end_mask = x2_111_end_mask_0, x = k_55_cast_fp16)[name = string("x2_111_cast_fp16")]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5075_cast_fp16 = mul(x = x2_111_cast_fp16, y = const_58_promoted_to_fp16)[name = string("op_5075_cast_fp16")]; + bool var_5077_interleave_0 = const()[name = string("op_5077_interleave_0"), val = bool(false)]; + tensor var_5077_cast_fp16 = concat(axis = var_85, interleave = var_5077_interleave_0, values = (var_5075_cast_fp16, x1_111_cast_fp16))[name = string("op_5077_cast_fp16")]; + tensor var_5078_cast_fp16 = mul(x = var_5077_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5078_cast_fp16")]; + tensor k_state_55_cast_fp16 = add(x = var_5064_cast_fp16, y = var_5078_cast_fp16)[name = string("k_state_55_cast_fp16")]; + tensor expand_dims_324 = const()[name = string("expand_dims_324"), val = tensor([0])]; + tensor expand_dims_325 = const()[name = string("expand_dims_325"), val = tensor([0])]; + tensor expand_dims_327 = const()[name = string("expand_dims_327"), val = tensor([0])]; + tensor concat_518_values0_0 = const()[name = string("concat_518_values0_0"), val = tensor([27])]; + int32 concat_518_axis_0 = const()[name = string("concat_518_axis_0"), val = int32(0)]; + bool concat_518_interleave_0 = const()[name = string("concat_518_interleave_0"), val = bool(false)]; + tensor concat_518 = concat(axis = concat_518_axis_0, interleave = concat_518_interleave_0, values = (concat_518_values0_0, expand_dims_324, expand_dims_325, expand_dims_2, expand_dims_327))[name = string("concat_518")]; + tensor key_cache_internal_tensor_assign_28_stride_0 = const()[name = string("key_cache_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_28_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_28_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_518, begin_mask = key_cache_internal_tensor_assign_28_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_28_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_28_squeeze_mask_0, stride = key_cache_internal_tensor_assign_28_stride_0, update = k_state_55_cast_fp16, x = coreml_update_state_116)[name = string("key_cache_internal_tensor_assign_28_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_28_cast_fp16, input = key_cache)[name = string("coreml_update_state_118_write_state")]; + tensor coreml_update_state_118 = read_state(input = key_cache)[name = string("coreml_update_state_118")]; + tensor value_cache_internal_tensor_assign_28_stride_0 = const()[name = string("value_cache_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_28_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_28_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_55_cast_fp16 = transpose(perm = v_state_55_perm_0, x = var_5044_cast_fp16)[name = string("transpose_17")]; + tensor value_cache_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_518, begin_mask = value_cache_internal_tensor_assign_28_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_28_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_28_squeeze_mask_0, stride = value_cache_internal_tensor_assign_28_stride_0, update = v_state_55_cast_fp16, x = coreml_update_state_117)[name = string("value_cache_internal_tensor_assign_28_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_28_cast_fp16, input = value_cache)[name = string("coreml_update_state_119_write_state")]; + tensor coreml_update_state_119 = read_state(input = value_cache)[name = string("coreml_update_state_119")]; + tensor var_5101_begin_0 = const()[name = string("op_5101_begin_0"), val = tensor([27, 0, 0, 0, 0])]; + tensor var_5101_end_0 = const()[name = string("op_5101_end_0"), val = tensor([28, 1, 5, 2048, 64])]; + tensor var_5101_end_mask_0 = const()[name = string("op_5101_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5101_squeeze_mask_0 = const()[name = string("op_5101_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5101_cast_fp16 = slice_by_index(begin = var_5101_begin_0, end = var_5101_end_0, end_mask = var_5101_end_mask_0, squeeze_mask = var_5101_squeeze_mask_0, x = coreml_update_state_118)[name = string("op_5101_cast_fp16")]; + tensor var_5104_begin_0 = const()[name = string("op_5104_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5104_end_mask_0 = const()[name = string("op_5104_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5104_cast_fp16 = slice_by_index(begin = var_5104_begin_0, end = concat_11, end_mask = var_5104_end_mask_0, x = var_5101_cast_fp16)[name = string("op_5104_cast_fp16")]; + tensor var_5106_begin_0 = const()[name = string("op_5106_begin_0"), val = tensor([27, 0, 0, 0, 0])]; + tensor var_5106_end_0 = const()[name = string("op_5106_end_0"), val = tensor([28, 1, 5, 2048, 64])]; + tensor var_5106_end_mask_0 = const()[name = string("op_5106_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5106_squeeze_mask_0 = const()[name = string("op_5106_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5106_cast_fp16 = slice_by_index(begin = var_5106_begin_0, end = var_5106_end_0, end_mask = var_5106_end_mask_0, squeeze_mask = var_5106_squeeze_mask_0, x = coreml_update_state_119)[name = string("op_5106_cast_fp16")]; + tensor var_5109_begin_0 = const()[name = string("op_5109_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5109_end_mask_0 = const()[name = string("op_5109_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5109_cast_fp16 = slice_by_index(begin = var_5109_begin_0, end = concat_11, end_mask = var_5109_end_mask_0, x = var_5106_cast_fp16)[name = string("op_5109_cast_fp16")]; + tensor var_5111_shape_cast_fp16 = shape(x = var_5104_cast_fp16)[name = string("op_5111_shape_cast_fp16")]; + int32 gather_499 = const()[name = string("gather_499"), val = int32(1)]; + int32 gather_500 = const()[name = string("gather_500"), val = int32(5)]; + int32 gather_501_axis_0 = const()[name = string("gather_501_axis_0"), val = int32(0)]; + int32 gather_501_batch_dims_0 = const()[name = string("gather_501_batch_dims_0"), val = int32(0)]; + bool gather_501_validate_indices_0 = const()[name = string("gather_501_validate_indices_0"), val = bool(false)]; + string var_5111_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5111_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_501_to_uint16 = const()[name = string("select_501_to_uint16"), val = uint16(2)]; + tensor var_5111_shape_cast_fp16_to_uint16 = cast(dtype = var_5111_shape_cast_fp16_to_uint16_dtype_0, x = var_5111_shape_cast_fp16)[name = string("cast_38")]; + uint16 gather_501_cast_uint16 = gather(axis = gather_501_axis_0, batch_dims = gather_501_batch_dims_0, indices = select_501_to_uint16, validate_indices = gather_501_validate_indices_0, x = var_5111_shape_cast_fp16_to_uint16)[name = string("gather_501_cast_uint16")]; + string gather_501_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_501_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_502 = const()[name = string("gather_502"), val = int32(64)]; + tensor var_5118_axes_0 = const()[name = string("op_5118_axes_0"), val = tensor([2])]; + tensor var_5118_cast_fp16 = expand_dims(axes = var_5118_axes_0, x = var_5104_cast_fp16)[name = string("op_5118_cast_fp16")]; + tensor shape_557_cast_fp16 = shape(x = var_5118_cast_fp16)[name = string("shape_557_cast_fp16")]; + int32 concat_526_axis_0 = const()[name = string("concat_526_axis_0"), val = int32(0)]; + bool concat_526_interleave_0 = const()[name = string("concat_526_interleave_0"), val = bool(false)]; + int32 gather_501_cast_uint16_to_int32 = cast(dtype = gather_501_cast_uint16_to_int32_dtype_0, x = gather_501_cast_uint16)[name = string("cast_37")]; + tensor concat_526 = concat(axis = concat_526_axis_0, interleave = concat_526_interleave_0, values = (gather_499, gather_500, var_89, gather_501_cast_uint16_to_int32, gather_502))[name = string("concat_526")]; + tensor real_div_54 = real_div(x = concat_526, y = shape_557_cast_fp16)[name = string("real_div_54")]; + tensor hidden_states_821_cast_fp16 = tile(reps = real_div_54, x = var_5118_cast_fp16)[name = string("hidden_states_821_cast_fp16")]; + tensor concat_527x = const()[name = string("concat_527x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_111_cast_fp16 = reshape(shape = concat_527x, x = hidden_states_821_cast_fp16)[name = string("key_states_111_cast_fp16")]; + tensor var_5128_shape_cast_fp16 = shape(x = var_5109_cast_fp16)[name = string("op_5128_shape_cast_fp16")]; + int32 gather_503 = const()[name = string("gather_503"), val = int32(1)]; + int32 gather_504 = const()[name = string("gather_504"), val = int32(5)]; + int32 gather_505_axis_0 = const()[name = string("gather_505_axis_0"), val = int32(0)]; + int32 gather_505_batch_dims_0 = const()[name = string("gather_505_batch_dims_0"), val = int32(0)]; + bool gather_505_validate_indices_0 = const()[name = string("gather_505_validate_indices_0"), val = bool(false)]; + string var_5128_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5128_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_505_to_uint16 = const()[name = string("select_505_to_uint16"), val = uint16(2)]; + tensor var_5128_shape_cast_fp16_to_uint16 = cast(dtype = var_5128_shape_cast_fp16_to_uint16_dtype_0, x = var_5128_shape_cast_fp16)[name = string("cast_36")]; + uint16 gather_505_cast_uint16 = gather(axis = gather_505_axis_0, batch_dims = gather_505_batch_dims_0, indices = select_505_to_uint16, validate_indices = gather_505_validate_indices_0, x = var_5128_shape_cast_fp16_to_uint16)[name = string("gather_505_cast_uint16")]; + string gather_505_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_505_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_506 = const()[name = string("gather_506"), val = int32(64)]; + tensor var_5135_axes_0 = const()[name = string("op_5135_axes_0"), val = tensor([2])]; + tensor var_5135_cast_fp16 = expand_dims(axes = var_5135_axes_0, x = var_5109_cast_fp16)[name = string("op_5135_cast_fp16")]; + tensor shape_562_cast_fp16 = shape(x = var_5135_cast_fp16)[name = string("shape_562_cast_fp16")]; + int32 concat_528_axis_0 = const()[name = string("concat_528_axis_0"), val = int32(0)]; + bool concat_528_interleave_0 = const()[name = string("concat_528_interleave_0"), val = bool(false)]; + int32 gather_505_cast_uint16_to_int32 = cast(dtype = gather_505_cast_uint16_to_int32_dtype_0, x = gather_505_cast_uint16)[name = string("cast_35")]; + tensor concat_528 = concat(axis = concat_528_axis_0, interleave = concat_528_interleave_0, values = (gather_503, gather_504, var_89, gather_505_cast_uint16_to_int32, gather_506))[name = string("concat_528")]; + tensor real_div_55 = real_div(x = concat_528, y = shape_562_cast_fp16)[name = string("real_div_55")]; + tensor hidden_states_825_cast_fp16 = tile(reps = real_div_55, x = var_5135_cast_fp16)[name = string("hidden_states_825_cast_fp16")]; + tensor concat_529x = const()[name = string("concat_529x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_111_cast_fp16 = reshape(shape = concat_529x, x = hidden_states_825_cast_fp16)[name = string("value_states_111_cast_fp16")]; + tensor var_5145_shape_cast_fp16 = shape(x = key_states_111_cast_fp16)[name = string("op_5145_shape_cast_fp16")]; + int32 gather_507_axis_0 = const()[name = string("gather_507_axis_0"), val = int32(0)]; + int32 gather_507_batch_dims_0 = const()[name = string("gather_507_batch_dims_0"), val = int32(0)]; + bool gather_507_validate_indices_0 = const()[name = string("gather_507_validate_indices_0"), val = bool(false)]; + string var_5145_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5145_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_507_to_uint16 = const()[name = string("select_507_to_uint16"), val = uint16(2)]; + tensor var_5145_shape_cast_fp16_to_uint16 = cast(dtype = var_5145_shape_cast_fp16_to_uint16_dtype_0, x = var_5145_shape_cast_fp16)[name = string("cast_34")]; + uint16 gather_507_cast_uint16 = gather(axis = gather_507_axis_0, batch_dims = gather_507_batch_dims_0, indices = select_507_to_uint16, validate_indices = gather_507_validate_indices_0, x = var_5145_shape_cast_fp16_to_uint16)[name = string("gather_507_cast_uint16")]; + string gather_507_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_507_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_530_values0_0 = const()[name = string("concat_530_values0_0"), val = int32(1)]; + int32 concat_530_values1_0 = const()[name = string("concat_530_values1_0"), val = int32(1)]; + int32 concat_530_values2_0 = const()[name = string("concat_530_values2_0"), val = int32(0)]; + int32 concat_530_axis_0 = const()[name = string("concat_530_axis_0"), val = int32(0)]; + bool concat_530_interleave_0 = const()[name = string("concat_530_interleave_0"), val = bool(false)]; + int32 gather_507_cast_uint16_to_int32 = cast(dtype = gather_507_cast_uint16_to_int32_dtype_0, x = gather_507_cast_uint16)[name = string("cast_33")]; + tensor concat_530 = concat(axis = concat_530_axis_0, interleave = concat_530_interleave_0, values = (concat_530_values0_0, concat_530_values1_0, concat_530_values2_0, gather_507_cast_uint16_to_int32))[name = string("concat_530")]; + tensor causal_mask_57_begin_0 = const()[name = string("causal_mask_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_57_end_mask_0 = const()[name = string("causal_mask_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_57_cast_fp16 = slice_by_index(begin = causal_mask_57_begin_0, end = concat_530, end_mask = causal_mask_57_end_mask_0, x = causal_mask)[name = string("causal_mask_57_cast_fp16")]; + tensor attn_output_109_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_57_cast_fp16, key = key_states_111_cast_fp16, query = query_states_111_cast_fp16, value = value_states_111_cast_fp16)[name = string("attn_output_109_cast_fp16")]; + tensor var_5151_perm_0 = const()[name = string("op_5151_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_531_axis_0 = const()[name = string("concat_531_axis_0"), val = int32(0)]; + bool concat_531_interleave_0 = const()[name = string("concat_531_interleave_0"), val = bool(false)]; + int32 gather_491_cast_uint16_to_int32 = cast(dtype = gather_491_cast_uint16_to_int32_dtype_0, x = gather_491_cast_uint16)[name = string("cast_32")]; + tensor concat_531 = concat(axis = concat_531_axis_0, interleave = concat_531_interleave_0, values = (gather_490, gather_491_cast_uint16_to_int32, var_85))[name = string("concat_531")]; + tensor var_5151_cast_fp16 = transpose(perm = var_5151_perm_0, x = attn_output_109_cast_fp16)[name = string("transpose_16")]; + tensor input_217_cast_fp16 = reshape(shape = concat_531, x = var_5151_cast_fp16)[name = string("input_217_cast_fp16")]; + tensor model_model_layers_27_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176847168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177308032))))[name = string("model_model_layers_27_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_192_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_27_self_attn_o_proj_weight_to_fp16_quantized, x = input_217_cast_fp16)[name = string("linear_192_cast_fp16")]; + tensor hidden_states_829_cast_fp16 = add(x = hidden_states_809_cast_fp16, y = linear_192_cast_fp16)[name = string("hidden_states_829_cast_fp16")]; + fp16 var_80_promoted_55_to_fp16 = const()[name = string("op_80_promoted_55_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5160_cast_fp16 = pow(x = hidden_states_829_cast_fp16, y = var_80_promoted_55_to_fp16)[name = string("op_5160_cast_fp16")]; + tensor variance_111_axes_0 = const()[name = string("variance_111_axes_0"), val = tensor([-1])]; + bool variance_111_keep_dims_0 = const()[name = string("variance_111_keep_dims_0"), val = bool(true)]; + tensor variance_111_cast_fp16 = reduce_mean(axes = variance_111_axes_0, keep_dims = variance_111_keep_dims_0, x = var_5160_cast_fp16)[name = string("variance_111_cast_fp16")]; + fp16 var_5163_to_fp16 = const()[name = string("op_5163_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5164_cast_fp16 = add(x = variance_111_cast_fp16, y = var_5163_to_fp16)[name = string("op_5164_cast_fp16")]; + fp32 var_5165_epsilon_0 = const()[name = string("op_5165_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5165_cast_fp16 = rsqrt(epsilon = var_5165_epsilon_0, x = var_5164_cast_fp16)[name = string("op_5165_cast_fp16")]; + tensor hidden_states_833_cast_fp16 = mul(x = hidden_states_829_cast_fp16, y = var_5165_cast_fp16)[name = string("hidden_states_833_cast_fp16")]; + tensor model_model_layers_27_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177365696)))]; + tensor input_219_cast_fp16 = mul(x = model_model_layers_27_post_attention_layernorm_weight_to_fp16, y = hidden_states_833_cast_fp16)[name = string("input_219_cast_fp16")]; + tensor model_model_layers_27_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177367680))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178596544))))[name = string("model_model_layers_27_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_193_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_27_mlp_gate_proj_weight_to_fp16_quantized, x = input_219_cast_fp16)[name = string("linear_193_cast_fp16")]; + tensor var_5177_cast_fp16 = silu(x = linear_193_cast_fp16)[name = string("op_5177_cast_fp16")]; + tensor model_model_layers_27_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178750208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179979072))))[name = string("model_model_layers_27_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_194_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_27_mlp_up_proj_weight_to_fp16_quantized, x = input_219_cast_fp16)[name = string("linear_194_cast_fp16")]; + tensor input_223_cast_fp16 = mul(x = var_5177_cast_fp16, y = linear_194_cast_fp16)[name = string("input_223_cast_fp16")]; + tensor model_model_layers_27_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180132736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181361600))))[name = string("model_model_layers_27_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_195_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_27_mlp_down_proj_weight_to_fp16_quantized, x = input_223_cast_fp16)[name = string("linear_195_cast_fp16")]; + tensor hidden_states_839_cast_fp16 = add(x = hidden_states_829_cast_fp16, y = linear_195_cast_fp16)[name = string("hidden_states_839_cast_fp16")]; + fp16 var_80_promoted_56_to_fp16 = const()[name = string("op_80_promoted_56_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5190_cast_fp16 = pow(x = hidden_states_839_cast_fp16, y = var_80_promoted_56_to_fp16)[name = string("op_5190_cast_fp16")]; + tensor variance_113_axes_0 = const()[name = string("variance_113_axes_0"), val = tensor([-1])]; + bool variance_113_keep_dims_0 = const()[name = string("variance_113_keep_dims_0"), val = bool(true)]; + tensor variance_113_cast_fp16 = reduce_mean(axes = variance_113_axes_0, keep_dims = variance_113_keep_dims_0, x = var_5190_cast_fp16)[name = string("variance_113_cast_fp16")]; + fp16 var_5193_to_fp16 = const()[name = string("op_5193_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5194_cast_fp16 = add(x = variance_113_cast_fp16, y = var_5193_to_fp16)[name = string("op_5194_cast_fp16")]; + fp32 var_5195_epsilon_0 = const()[name = string("op_5195_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5195_cast_fp16 = rsqrt(epsilon = var_5195_epsilon_0, x = var_5194_cast_fp16)[name = string("op_5195_cast_fp16")]; + tensor hidden_states_843_cast_fp16 = mul(x = hidden_states_839_cast_fp16, y = var_5195_cast_fp16)[name = string("hidden_states_843_cast_fp16")]; + tensor model_model_layers_28_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_28_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181515264)))]; + tensor hidden_states_847_cast_fp16 = mul(x = model_model_layers_28_input_layernorm_weight_to_fp16, y = hidden_states_843_cast_fp16)[name = string("hidden_states_847_cast_fp16")]; + tensor var_5206_shape_cast_fp16 = shape(x = hidden_states_847_cast_fp16)[name = string("op_5206_shape_cast_fp16")]; + int32 gather_508 = const()[name = string("gather_508"), val = int32(1)]; + int32 gather_509_axis_0 = const()[name = string("gather_509_axis_0"), val = int32(0)]; + int32 gather_509_batch_dims_0 = const()[name = string("gather_509_batch_dims_0"), val = int32(0)]; + bool gather_509_validate_indices_0 = const()[name = string("gather_509_validate_indices_0"), val = bool(false)]; + string var_5206_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5206_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_509_to_uint16 = const()[name = string("select_509_to_uint16"), val = uint16(1)]; + tensor var_5206_shape_cast_fp16_to_uint16 = cast(dtype = var_5206_shape_cast_fp16_to_uint16_dtype_0, x = var_5206_shape_cast_fp16)[name = string("cast_31")]; + uint16 gather_509_cast_uint16 = gather(axis = gather_509_axis_0, batch_dims = gather_509_batch_dims_0, indices = select_509_to_uint16, validate_indices = gather_509_validate_indices_0, x = var_5206_shape_cast_fp16_to_uint16)[name = string("gather_509_cast_uint16")]; + string gather_509_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_509_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_28_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181517248))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181978112))))[name = string("model_model_layers_28_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_196_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_28_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_847_cast_fp16)[name = string("linear_196_cast_fp16")]; + tensor model_model_layers_28_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182035776))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182189440))))[name = string("model_model_layers_28_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_197_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_28_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_847_cast_fp16)[name = string("linear_197_cast_fp16")]; + tensor model_model_layers_28_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182208704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182362368))))[name = string("model_model_layers_28_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_198_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_28_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_847_cast_fp16)[name = string("linear_198_cast_fp16")]; + tensor concat_532x = const()[name = string("concat_532x"), val = tensor([1, -1, 15, 64])]; + tensor var_5215_cast_fp16 = reshape(shape = concat_532x, x = linear_196_cast_fp16)[name = string("op_5215_cast_fp16")]; + tensor q_57_perm_0 = const()[name = string("q_57_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_533x = const()[name = string("concat_533x"), val = tensor([1, -1, 5, 64])]; + tensor var_5218_cast_fp16 = reshape(shape = concat_533x, x = linear_197_cast_fp16)[name = string("op_5218_cast_fp16")]; + tensor k_57_perm_0 = const()[name = string("k_57_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_534x = const()[name = string("concat_534x"), val = tensor([1, -1, 5, 64])]; + tensor var_5221_cast_fp16 = reshape(shape = concat_534x, x = linear_198_cast_fp16)[name = string("op_5221_cast_fp16")]; + tensor v_state_57_perm_0 = const()[name = string("v_state_57_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_57_cast_fp16 = transpose(perm = q_57_perm_0, x = var_5215_cast_fp16)[name = string("transpose_15")]; + tensor var_5225_cast_fp16 = mul(x = q_57_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5225_cast_fp16")]; + tensor x1_113_begin_0 = const()[name = string("x1_113_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_113_end_0 = const()[name = string("x1_113_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_113_end_mask_0 = const()[name = string("x1_113_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_113_cast_fp16 = slice_by_index(begin = x1_113_begin_0, end = x1_113_end_0, end_mask = x1_113_end_mask_0, x = q_57_cast_fp16)[name = string("x1_113_cast_fp16")]; + tensor x2_113_begin_0 = const()[name = string("x2_113_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_113_end_0 = const()[name = string("x2_113_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_113_end_mask_0 = const()[name = string("x2_113_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_113_cast_fp16 = slice_by_index(begin = x2_113_begin_0, end = x2_113_end_0, end_mask = x2_113_end_mask_0, x = q_57_cast_fp16)[name = string("x2_113_cast_fp16")]; + fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5236_cast_fp16 = mul(x = x2_113_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_5236_cast_fp16")]; + bool var_5238_interleave_0 = const()[name = string("op_5238_interleave_0"), val = bool(false)]; + tensor var_5238_cast_fp16 = concat(axis = var_85, interleave = var_5238_interleave_0, values = (var_5236_cast_fp16, x1_113_cast_fp16))[name = string("op_5238_cast_fp16")]; + tensor var_5239_cast_fp16 = mul(x = var_5238_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5239_cast_fp16")]; + tensor query_states_115_cast_fp16 = add(x = var_5225_cast_fp16, y = var_5239_cast_fp16)[name = string("query_states_115_cast_fp16")]; + tensor k_57_cast_fp16 = transpose(perm = k_57_perm_0, x = var_5218_cast_fp16)[name = string("transpose_14")]; + tensor var_5241_cast_fp16 = mul(x = k_57_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5241_cast_fp16")]; + tensor x1_115_begin_0 = const()[name = string("x1_115_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_115_end_0 = const()[name = string("x1_115_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_115_end_mask_0 = const()[name = string("x1_115_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_115_cast_fp16 = slice_by_index(begin = x1_115_begin_0, end = x1_115_end_0, end_mask = x1_115_end_mask_0, x = k_57_cast_fp16)[name = string("x1_115_cast_fp16")]; + tensor x2_115_begin_0 = const()[name = string("x2_115_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_115_end_0 = const()[name = string("x2_115_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_115_end_mask_0 = const()[name = string("x2_115_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_115_cast_fp16 = slice_by_index(begin = x2_115_begin_0, end = x2_115_end_0, end_mask = x2_115_end_mask_0, x = k_57_cast_fp16)[name = string("x2_115_cast_fp16")]; + fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5252_cast_fp16 = mul(x = x2_115_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_5252_cast_fp16")]; + bool var_5254_interleave_0 = const()[name = string("op_5254_interleave_0"), val = bool(false)]; + tensor var_5254_cast_fp16 = concat(axis = var_85, interleave = var_5254_interleave_0, values = (var_5252_cast_fp16, x1_115_cast_fp16))[name = string("op_5254_cast_fp16")]; + tensor var_5255_cast_fp16 = mul(x = var_5254_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5255_cast_fp16")]; + tensor k_state_57_cast_fp16 = add(x = var_5241_cast_fp16, y = var_5255_cast_fp16)[name = string("k_state_57_cast_fp16")]; + tensor expand_dims_336 = const()[name = string("expand_dims_336"), val = tensor([0])]; + tensor expand_dims_337 = const()[name = string("expand_dims_337"), val = tensor([0])]; + tensor expand_dims_339 = const()[name = string("expand_dims_339"), val = tensor([0])]; + tensor concat_537_values0_0 = const()[name = string("concat_537_values0_0"), val = tensor([28])]; + int32 concat_537_axis_0 = const()[name = string("concat_537_axis_0"), val = int32(0)]; + bool concat_537_interleave_0 = const()[name = string("concat_537_interleave_0"), val = bool(false)]; + tensor concat_537 = concat(axis = concat_537_axis_0, interleave = concat_537_interleave_0, values = (concat_537_values0_0, expand_dims_336, expand_dims_337, expand_dims_2, expand_dims_339))[name = string("concat_537")]; + tensor key_cache_internal_tensor_assign_29_stride_0 = const()[name = string("key_cache_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_29_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_29_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_537, begin_mask = key_cache_internal_tensor_assign_29_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_29_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_29_squeeze_mask_0, stride = key_cache_internal_tensor_assign_29_stride_0, update = k_state_57_cast_fp16, x = coreml_update_state_118)[name = string("key_cache_internal_tensor_assign_29_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_29_cast_fp16, input = key_cache)[name = string("coreml_update_state_120_write_state")]; + tensor coreml_update_state_120 = read_state(input = key_cache)[name = string("coreml_update_state_120")]; + tensor value_cache_internal_tensor_assign_29_stride_0 = const()[name = string("value_cache_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_29_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_29_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_57_cast_fp16 = transpose(perm = v_state_57_perm_0, x = var_5221_cast_fp16)[name = string("transpose_13")]; + tensor value_cache_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_537, begin_mask = value_cache_internal_tensor_assign_29_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_29_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_29_squeeze_mask_0, stride = value_cache_internal_tensor_assign_29_stride_0, update = v_state_57_cast_fp16, x = coreml_update_state_119)[name = string("value_cache_internal_tensor_assign_29_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_29_cast_fp16, input = value_cache)[name = string("coreml_update_state_121_write_state")]; + tensor coreml_update_state_121 = read_state(input = value_cache)[name = string("coreml_update_state_121")]; + tensor var_5278_begin_0 = const()[name = string("op_5278_begin_0"), val = tensor([28, 0, 0, 0, 0])]; + tensor var_5278_end_0 = const()[name = string("op_5278_end_0"), val = tensor([29, 1, 5, 2048, 64])]; + tensor var_5278_end_mask_0 = const()[name = string("op_5278_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5278_squeeze_mask_0 = const()[name = string("op_5278_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5278_cast_fp16 = slice_by_index(begin = var_5278_begin_0, end = var_5278_end_0, end_mask = var_5278_end_mask_0, squeeze_mask = var_5278_squeeze_mask_0, x = coreml_update_state_120)[name = string("op_5278_cast_fp16")]; + tensor var_5281_begin_0 = const()[name = string("op_5281_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5281_end_mask_0 = const()[name = string("op_5281_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5281_cast_fp16 = slice_by_index(begin = var_5281_begin_0, end = concat_11, end_mask = var_5281_end_mask_0, x = var_5278_cast_fp16)[name = string("op_5281_cast_fp16")]; + tensor var_5283_begin_0 = const()[name = string("op_5283_begin_0"), val = tensor([28, 0, 0, 0, 0])]; + tensor var_5283_end_0 = const()[name = string("op_5283_end_0"), val = tensor([29, 1, 5, 2048, 64])]; + tensor var_5283_end_mask_0 = const()[name = string("op_5283_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5283_squeeze_mask_0 = const()[name = string("op_5283_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5283_cast_fp16 = slice_by_index(begin = var_5283_begin_0, end = var_5283_end_0, end_mask = var_5283_end_mask_0, squeeze_mask = var_5283_squeeze_mask_0, x = coreml_update_state_121)[name = string("op_5283_cast_fp16")]; + tensor var_5286_begin_0 = const()[name = string("op_5286_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5286_end_mask_0 = const()[name = string("op_5286_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5286_cast_fp16 = slice_by_index(begin = var_5286_begin_0, end = concat_11, end_mask = var_5286_end_mask_0, x = var_5283_cast_fp16)[name = string("op_5286_cast_fp16")]; + tensor var_5288_shape_cast_fp16 = shape(x = var_5281_cast_fp16)[name = string("op_5288_shape_cast_fp16")]; + int32 gather_517 = const()[name = string("gather_517"), val = int32(1)]; + int32 gather_518 = const()[name = string("gather_518"), val = int32(5)]; + int32 gather_519_axis_0 = const()[name = string("gather_519_axis_0"), val = int32(0)]; + int32 gather_519_batch_dims_0 = const()[name = string("gather_519_batch_dims_0"), val = int32(0)]; + bool gather_519_validate_indices_0 = const()[name = string("gather_519_validate_indices_0"), val = bool(false)]; + string var_5288_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5288_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_519_to_uint16 = const()[name = string("select_519_to_uint16"), val = uint16(2)]; + tensor var_5288_shape_cast_fp16_to_uint16 = cast(dtype = var_5288_shape_cast_fp16_to_uint16_dtype_0, x = var_5288_shape_cast_fp16)[name = string("cast_30")]; + uint16 gather_519_cast_uint16 = gather(axis = gather_519_axis_0, batch_dims = gather_519_batch_dims_0, indices = select_519_to_uint16, validate_indices = gather_519_validate_indices_0, x = var_5288_shape_cast_fp16_to_uint16)[name = string("gather_519_cast_uint16")]; + string gather_519_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_519_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_520 = const()[name = string("gather_520"), val = int32(64)]; + tensor var_5295_axes_0 = const()[name = string("op_5295_axes_0"), val = tensor([2])]; + tensor var_5295_cast_fp16 = expand_dims(axes = var_5295_axes_0, x = var_5281_cast_fp16)[name = string("op_5295_cast_fp16")]; + tensor shape_577_cast_fp16 = shape(x = var_5295_cast_fp16)[name = string("shape_577_cast_fp16")]; + int32 concat_545_axis_0 = const()[name = string("concat_545_axis_0"), val = int32(0)]; + bool concat_545_interleave_0 = const()[name = string("concat_545_interleave_0"), val = bool(false)]; + int32 gather_519_cast_uint16_to_int32 = cast(dtype = gather_519_cast_uint16_to_int32_dtype_0, x = gather_519_cast_uint16)[name = string("cast_29")]; + tensor concat_545 = concat(axis = concat_545_axis_0, interleave = concat_545_interleave_0, values = (gather_517, gather_518, var_89, gather_519_cast_uint16_to_int32, gather_520))[name = string("concat_545")]; + tensor real_div_56 = real_div(x = concat_545, y = shape_577_cast_fp16)[name = string("real_div_56")]; + tensor hidden_states_851_cast_fp16 = tile(reps = real_div_56, x = var_5295_cast_fp16)[name = string("hidden_states_851_cast_fp16")]; + tensor concat_546x = const()[name = string("concat_546x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_115_cast_fp16 = reshape(shape = concat_546x, x = hidden_states_851_cast_fp16)[name = string("key_states_115_cast_fp16")]; + tensor var_5305_shape_cast_fp16 = shape(x = var_5286_cast_fp16)[name = string("op_5305_shape_cast_fp16")]; + int32 gather_521 = const()[name = string("gather_521"), val = int32(1)]; + int32 gather_522 = const()[name = string("gather_522"), val = int32(5)]; + int32 gather_523_axis_0 = const()[name = string("gather_523_axis_0"), val = int32(0)]; + int32 gather_523_batch_dims_0 = const()[name = string("gather_523_batch_dims_0"), val = int32(0)]; + bool gather_523_validate_indices_0 = const()[name = string("gather_523_validate_indices_0"), val = bool(false)]; + string var_5305_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5305_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_523_to_uint16 = const()[name = string("select_523_to_uint16"), val = uint16(2)]; + tensor var_5305_shape_cast_fp16_to_uint16 = cast(dtype = var_5305_shape_cast_fp16_to_uint16_dtype_0, x = var_5305_shape_cast_fp16)[name = string("cast_28")]; + uint16 gather_523_cast_uint16 = gather(axis = gather_523_axis_0, batch_dims = gather_523_batch_dims_0, indices = select_523_to_uint16, validate_indices = gather_523_validate_indices_0, x = var_5305_shape_cast_fp16_to_uint16)[name = string("gather_523_cast_uint16")]; + string gather_523_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_523_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_524 = const()[name = string("gather_524"), val = int32(64)]; + tensor var_5312_axes_0 = const()[name = string("op_5312_axes_0"), val = tensor([2])]; + tensor var_5312_cast_fp16 = expand_dims(axes = var_5312_axes_0, x = var_5286_cast_fp16)[name = string("op_5312_cast_fp16")]; + tensor shape_582_cast_fp16 = shape(x = var_5312_cast_fp16)[name = string("shape_582_cast_fp16")]; + int32 concat_547_axis_0 = const()[name = string("concat_547_axis_0"), val = int32(0)]; + bool concat_547_interleave_0 = const()[name = string("concat_547_interleave_0"), val = bool(false)]; + int32 gather_523_cast_uint16_to_int32 = cast(dtype = gather_523_cast_uint16_to_int32_dtype_0, x = gather_523_cast_uint16)[name = string("cast_27")]; + tensor concat_547 = concat(axis = concat_547_axis_0, interleave = concat_547_interleave_0, values = (gather_521, gather_522, var_89, gather_523_cast_uint16_to_int32, gather_524))[name = string("concat_547")]; + tensor real_div_57 = real_div(x = concat_547, y = shape_582_cast_fp16)[name = string("real_div_57")]; + tensor hidden_states_855_cast_fp16 = tile(reps = real_div_57, x = var_5312_cast_fp16)[name = string("hidden_states_855_cast_fp16")]; + tensor concat_548x = const()[name = string("concat_548x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_115_cast_fp16 = reshape(shape = concat_548x, x = hidden_states_855_cast_fp16)[name = string("value_states_115_cast_fp16")]; + tensor var_5322_shape_cast_fp16 = shape(x = key_states_115_cast_fp16)[name = string("op_5322_shape_cast_fp16")]; + int32 gather_525_axis_0 = const()[name = string("gather_525_axis_0"), val = int32(0)]; + int32 gather_525_batch_dims_0 = const()[name = string("gather_525_batch_dims_0"), val = int32(0)]; + bool gather_525_validate_indices_0 = const()[name = string("gather_525_validate_indices_0"), val = bool(false)]; + string var_5322_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5322_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_525_to_uint16 = const()[name = string("select_525_to_uint16"), val = uint16(2)]; + tensor var_5322_shape_cast_fp16_to_uint16 = cast(dtype = var_5322_shape_cast_fp16_to_uint16_dtype_0, x = var_5322_shape_cast_fp16)[name = string("cast_26")]; + uint16 gather_525_cast_uint16 = gather(axis = gather_525_axis_0, batch_dims = gather_525_batch_dims_0, indices = select_525_to_uint16, validate_indices = gather_525_validate_indices_0, x = var_5322_shape_cast_fp16_to_uint16)[name = string("gather_525_cast_uint16")]; + string gather_525_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_525_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_549_values0_0 = const()[name = string("concat_549_values0_0"), val = int32(1)]; + int32 concat_549_values1_0 = const()[name = string("concat_549_values1_0"), val = int32(1)]; + int32 concat_549_values2_0 = const()[name = string("concat_549_values2_0"), val = int32(0)]; + int32 concat_549_axis_0 = const()[name = string("concat_549_axis_0"), val = int32(0)]; + bool concat_549_interleave_0 = const()[name = string("concat_549_interleave_0"), val = bool(false)]; + int32 gather_525_cast_uint16_to_int32 = cast(dtype = gather_525_cast_uint16_to_int32_dtype_0, x = gather_525_cast_uint16)[name = string("cast_25")]; + tensor concat_549 = concat(axis = concat_549_axis_0, interleave = concat_549_interleave_0, values = (concat_549_values0_0, concat_549_values1_0, concat_549_values2_0, gather_525_cast_uint16_to_int32))[name = string("concat_549")]; + tensor causal_mask_59_begin_0 = const()[name = string("causal_mask_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_59_end_mask_0 = const()[name = string("causal_mask_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_59_cast_fp16 = slice_by_index(begin = causal_mask_59_begin_0, end = concat_549, end_mask = causal_mask_59_end_mask_0, x = causal_mask)[name = string("causal_mask_59_cast_fp16")]; + tensor attn_output_113_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_59_cast_fp16, key = key_states_115_cast_fp16, query = query_states_115_cast_fp16, value = value_states_115_cast_fp16)[name = string("attn_output_113_cast_fp16")]; + tensor var_5328_perm_0 = const()[name = string("op_5328_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_550_axis_0 = const()[name = string("concat_550_axis_0"), val = int32(0)]; + bool concat_550_interleave_0 = const()[name = string("concat_550_interleave_0"), val = bool(false)]; + int32 gather_509_cast_uint16_to_int32 = cast(dtype = gather_509_cast_uint16_to_int32_dtype_0, x = gather_509_cast_uint16)[name = string("cast_24")]; + tensor concat_550 = concat(axis = concat_550_axis_0, interleave = concat_550_interleave_0, values = (gather_508, gather_509_cast_uint16_to_int32, var_85))[name = string("concat_550")]; + tensor var_5328_cast_fp16 = transpose(perm = var_5328_perm_0, x = attn_output_113_cast_fp16)[name = string("transpose_12")]; + tensor input_225_cast_fp16 = reshape(shape = concat_550, x = var_5328_cast_fp16)[name = string("input_225_cast_fp16")]; + tensor model_model_layers_28_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182381632))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182842496))))[name = string("model_model_layers_28_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_199_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_28_self_attn_o_proj_weight_to_fp16_quantized, x = input_225_cast_fp16)[name = string("linear_199_cast_fp16")]; + tensor hidden_states_859_cast_fp16 = add(x = hidden_states_839_cast_fp16, y = linear_199_cast_fp16)[name = string("hidden_states_859_cast_fp16")]; + fp16 var_80_promoted_57_to_fp16 = const()[name = string("op_80_promoted_57_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5337_cast_fp16 = pow(x = hidden_states_859_cast_fp16, y = var_80_promoted_57_to_fp16)[name = string("op_5337_cast_fp16")]; + tensor variance_115_axes_0 = const()[name = string("variance_115_axes_0"), val = tensor([-1])]; + bool variance_115_keep_dims_0 = const()[name = string("variance_115_keep_dims_0"), val = bool(true)]; + tensor variance_115_cast_fp16 = reduce_mean(axes = variance_115_axes_0, keep_dims = variance_115_keep_dims_0, x = var_5337_cast_fp16)[name = string("variance_115_cast_fp16")]; + fp16 var_5340_to_fp16 = const()[name = string("op_5340_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5341_cast_fp16 = add(x = variance_115_cast_fp16, y = var_5340_to_fp16)[name = string("op_5341_cast_fp16")]; + fp32 var_5342_epsilon_0 = const()[name = string("op_5342_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5342_cast_fp16 = rsqrt(epsilon = var_5342_epsilon_0, x = var_5341_cast_fp16)[name = string("op_5342_cast_fp16")]; + tensor hidden_states_863_cast_fp16 = mul(x = hidden_states_859_cast_fp16, y = var_5342_cast_fp16)[name = string("hidden_states_863_cast_fp16")]; + tensor model_model_layers_28_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_28_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182900160)))]; + tensor input_227_cast_fp16 = mul(x = model_model_layers_28_post_attention_layernorm_weight_to_fp16, y = hidden_states_863_cast_fp16)[name = string("input_227_cast_fp16")]; + tensor model_model_layers_28_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182902144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184131008))))[name = string("model_model_layers_28_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_200_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_28_mlp_gate_proj_weight_to_fp16_quantized, x = input_227_cast_fp16)[name = string("linear_200_cast_fp16")]; + tensor var_5354_cast_fp16 = silu(x = linear_200_cast_fp16)[name = string("op_5354_cast_fp16")]; + tensor model_model_layers_28_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184284672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185513536))))[name = string("model_model_layers_28_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_201_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_28_mlp_up_proj_weight_to_fp16_quantized, x = input_227_cast_fp16)[name = string("linear_201_cast_fp16")]; + tensor input_231_cast_fp16 = mul(x = var_5354_cast_fp16, y = linear_201_cast_fp16)[name = string("input_231_cast_fp16")]; + tensor model_model_layers_28_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185667200))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186896064))))[name = string("model_model_layers_28_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_202_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_28_mlp_down_proj_weight_to_fp16_quantized, x = input_231_cast_fp16)[name = string("linear_202_cast_fp16")]; + tensor hidden_states_869_cast_fp16 = add(x = hidden_states_859_cast_fp16, y = linear_202_cast_fp16)[name = string("hidden_states_869_cast_fp16")]; + fp16 var_80_promoted_58_to_fp16 = const()[name = string("op_80_promoted_58_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5367_cast_fp16 = pow(x = hidden_states_869_cast_fp16, y = var_80_promoted_58_to_fp16)[name = string("op_5367_cast_fp16")]; + tensor variance_117_axes_0 = const()[name = string("variance_117_axes_0"), val = tensor([-1])]; + bool variance_117_keep_dims_0 = const()[name = string("variance_117_keep_dims_0"), val = bool(true)]; + tensor variance_117_cast_fp16 = reduce_mean(axes = variance_117_axes_0, keep_dims = variance_117_keep_dims_0, x = var_5367_cast_fp16)[name = string("variance_117_cast_fp16")]; + fp16 var_5370_to_fp16 = const()[name = string("op_5370_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5371_cast_fp16 = add(x = variance_117_cast_fp16, y = var_5370_to_fp16)[name = string("op_5371_cast_fp16")]; + fp32 var_5372_epsilon_0 = const()[name = string("op_5372_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5372_cast_fp16 = rsqrt(epsilon = var_5372_epsilon_0, x = var_5371_cast_fp16)[name = string("op_5372_cast_fp16")]; + tensor hidden_states_873_cast_fp16 = mul(x = hidden_states_869_cast_fp16, y = var_5372_cast_fp16)[name = string("hidden_states_873_cast_fp16")]; + tensor model_model_layers_29_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_29_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187049728)))]; + tensor hidden_states_877_cast_fp16 = mul(x = model_model_layers_29_input_layernorm_weight_to_fp16, y = hidden_states_873_cast_fp16)[name = string("hidden_states_877_cast_fp16")]; + tensor var_5383_shape_cast_fp16 = shape(x = hidden_states_877_cast_fp16)[name = string("op_5383_shape_cast_fp16")]; + int32 gather_526 = const()[name = string("gather_526"), val = int32(1)]; + int32 gather_527_axis_0 = const()[name = string("gather_527_axis_0"), val = int32(0)]; + int32 gather_527_batch_dims_0 = const()[name = string("gather_527_batch_dims_0"), val = int32(0)]; + bool gather_527_validate_indices_0 = const()[name = string("gather_527_validate_indices_0"), val = bool(false)]; + string var_5383_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5383_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_527_to_uint16 = const()[name = string("select_527_to_uint16"), val = uint16(1)]; + tensor var_5383_shape_cast_fp16_to_uint16 = cast(dtype = var_5383_shape_cast_fp16_to_uint16_dtype_0, x = var_5383_shape_cast_fp16)[name = string("cast_23")]; + uint16 gather_527_cast_uint16 = gather(axis = gather_527_axis_0, batch_dims = gather_527_batch_dims_0, indices = select_527_to_uint16, validate_indices = gather_527_validate_indices_0, x = var_5383_shape_cast_fp16_to_uint16)[name = string("gather_527_cast_uint16")]; + string gather_527_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_527_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_29_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187051712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187512576))))[name = string("model_model_layers_29_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_203_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_29_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_877_cast_fp16)[name = string("linear_203_cast_fp16")]; + tensor model_model_layers_29_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187570240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187723904))))[name = string("model_model_layers_29_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_204_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_29_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_877_cast_fp16)[name = string("linear_204_cast_fp16")]; + tensor model_model_layers_29_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187743168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187896832))))[name = string("model_model_layers_29_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_205_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_29_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_877_cast_fp16)[name = string("linear_205_cast_fp16")]; + tensor concat_551x = const()[name = string("concat_551x"), val = tensor([1, -1, 15, 64])]; + tensor var_5392_cast_fp16 = reshape(shape = concat_551x, x = linear_203_cast_fp16)[name = string("op_5392_cast_fp16")]; + tensor q_59_perm_0 = const()[name = string("q_59_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_552x = const()[name = string("concat_552x"), val = tensor([1, -1, 5, 64])]; + tensor var_5395_cast_fp16 = reshape(shape = concat_552x, x = linear_204_cast_fp16)[name = string("op_5395_cast_fp16")]; + tensor k_59_perm_0 = const()[name = string("k_59_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_553x = const()[name = string("concat_553x"), val = tensor([1, -1, 5, 64])]; + tensor var_5398_cast_fp16 = reshape(shape = concat_553x, x = linear_205_cast_fp16)[name = string("op_5398_cast_fp16")]; + tensor v_state_59_perm_0 = const()[name = string("v_state_59_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_59_cast_fp16 = transpose(perm = q_59_perm_0, x = var_5392_cast_fp16)[name = string("transpose_11")]; + tensor var_5402_cast_fp16 = mul(x = q_59_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5402_cast_fp16")]; + tensor x1_117_begin_0 = const()[name = string("x1_117_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_117_end_0 = const()[name = string("x1_117_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_117_end_mask_0 = const()[name = string("x1_117_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_117_cast_fp16 = slice_by_index(begin = x1_117_begin_0, end = x1_117_end_0, end_mask = x1_117_end_mask_0, x = q_59_cast_fp16)[name = string("x1_117_cast_fp16")]; + tensor x2_117_begin_0 = const()[name = string("x2_117_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_117_end_0 = const()[name = string("x2_117_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_117_end_mask_0 = const()[name = string("x2_117_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_117_cast_fp16 = slice_by_index(begin = x2_117_begin_0, end = x2_117_end_0, end_mask = x2_117_end_mask_0, x = q_59_cast_fp16)[name = string("x2_117_cast_fp16")]; + fp16 const_61_promoted_to_fp16 = const()[name = string("const_61_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5413_cast_fp16 = mul(x = x2_117_cast_fp16, y = const_61_promoted_to_fp16)[name = string("op_5413_cast_fp16")]; + bool var_5415_interleave_0 = const()[name = string("op_5415_interleave_0"), val = bool(false)]; + tensor var_5415_cast_fp16 = concat(axis = var_85, interleave = var_5415_interleave_0, values = (var_5413_cast_fp16, x1_117_cast_fp16))[name = string("op_5415_cast_fp16")]; + tensor var_5416_cast_fp16 = mul(x = var_5415_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5416_cast_fp16")]; + tensor query_states_119_cast_fp16 = add(x = var_5402_cast_fp16, y = var_5416_cast_fp16)[name = string("query_states_119_cast_fp16")]; + tensor k_59_cast_fp16 = transpose(perm = k_59_perm_0, x = var_5395_cast_fp16)[name = string("transpose_10")]; + tensor var_5418_cast_fp16 = mul(x = k_59_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5418_cast_fp16")]; + tensor x1_119_begin_0 = const()[name = string("x1_119_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_119_end_0 = const()[name = string("x1_119_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_119_end_mask_0 = const()[name = string("x1_119_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_119_cast_fp16 = slice_by_index(begin = x1_119_begin_0, end = x1_119_end_0, end_mask = x1_119_end_mask_0, x = k_59_cast_fp16)[name = string("x1_119_cast_fp16")]; + tensor x2_119_begin_0 = const()[name = string("x2_119_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_119_end_0 = const()[name = string("x2_119_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_119_end_mask_0 = const()[name = string("x2_119_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_119_cast_fp16 = slice_by_index(begin = x2_119_begin_0, end = x2_119_end_0, end_mask = x2_119_end_mask_0, x = k_59_cast_fp16)[name = string("x2_119_cast_fp16")]; + fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5429_cast_fp16 = mul(x = x2_119_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_5429_cast_fp16")]; + bool var_5431_interleave_0 = const()[name = string("op_5431_interleave_0"), val = bool(false)]; + tensor var_5431_cast_fp16 = concat(axis = var_85, interleave = var_5431_interleave_0, values = (var_5429_cast_fp16, x1_119_cast_fp16))[name = string("op_5431_cast_fp16")]; + tensor var_5432_cast_fp16 = mul(x = var_5431_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5432_cast_fp16")]; + tensor k_state_59_cast_fp16 = add(x = var_5418_cast_fp16, y = var_5432_cast_fp16)[name = string("k_state_59_cast_fp16")]; + tensor expand_dims_348 = const()[name = string("expand_dims_348"), val = tensor([0])]; + tensor expand_dims_349 = const()[name = string("expand_dims_349"), val = tensor([0])]; + tensor expand_dims_351 = const()[name = string("expand_dims_351"), val = tensor([0])]; + tensor concat_556_values0_0 = const()[name = string("concat_556_values0_0"), val = tensor([29])]; + int32 concat_556_axis_0 = const()[name = string("concat_556_axis_0"), val = int32(0)]; + bool concat_556_interleave_0 = const()[name = string("concat_556_interleave_0"), val = bool(false)]; + tensor concat_556 = concat(axis = concat_556_axis_0, interleave = concat_556_interleave_0, values = (concat_556_values0_0, expand_dims_348, expand_dims_349, expand_dims_2, expand_dims_351))[name = string("concat_556")]; + tensor key_cache_internal_tensor_assign_30_stride_0 = const()[name = string("key_cache_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_30_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_30_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_556, begin_mask = key_cache_internal_tensor_assign_30_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_30_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_30_squeeze_mask_0, stride = key_cache_internal_tensor_assign_30_stride_0, update = k_state_59_cast_fp16, x = coreml_update_state_120)[name = string("key_cache_internal_tensor_assign_30_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_30_cast_fp16, input = key_cache)[name = string("coreml_update_state_122_write_state")]; + tensor coreml_update_state_122 = read_state(input = key_cache)[name = string("coreml_update_state_122")]; + tensor value_cache_internal_tensor_assign_30_stride_0 = const()[name = string("value_cache_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_30_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_30_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_59_cast_fp16 = transpose(perm = v_state_59_perm_0, x = var_5398_cast_fp16)[name = string("transpose_9")]; + tensor value_cache_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_556, begin_mask = value_cache_internal_tensor_assign_30_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_30_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_30_squeeze_mask_0, stride = value_cache_internal_tensor_assign_30_stride_0, update = v_state_59_cast_fp16, x = coreml_update_state_121)[name = string("value_cache_internal_tensor_assign_30_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_30_cast_fp16, input = value_cache)[name = string("coreml_update_state_123_write_state")]; + tensor coreml_update_state_123 = read_state(input = value_cache)[name = string("coreml_update_state_123")]; + tensor var_5455_begin_0 = const()[name = string("op_5455_begin_0"), val = tensor([29, 0, 0, 0, 0])]; + tensor var_5455_end_0 = const()[name = string("op_5455_end_0"), val = tensor([30, 1, 5, 2048, 64])]; + tensor var_5455_end_mask_0 = const()[name = string("op_5455_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5455_squeeze_mask_0 = const()[name = string("op_5455_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5455_cast_fp16 = slice_by_index(begin = var_5455_begin_0, end = var_5455_end_0, end_mask = var_5455_end_mask_0, squeeze_mask = var_5455_squeeze_mask_0, x = coreml_update_state_122)[name = string("op_5455_cast_fp16")]; + tensor var_5458_begin_0 = const()[name = string("op_5458_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5458_end_mask_0 = const()[name = string("op_5458_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5458_cast_fp16 = slice_by_index(begin = var_5458_begin_0, end = concat_11, end_mask = var_5458_end_mask_0, x = var_5455_cast_fp16)[name = string("op_5458_cast_fp16")]; + tensor var_5460_begin_0 = const()[name = string("op_5460_begin_0"), val = tensor([29, 0, 0, 0, 0])]; + tensor var_5460_end_0 = const()[name = string("op_5460_end_0"), val = tensor([30, 1, 5, 2048, 64])]; + tensor var_5460_end_mask_0 = const()[name = string("op_5460_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5460_squeeze_mask_0 = const()[name = string("op_5460_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5460_cast_fp16 = slice_by_index(begin = var_5460_begin_0, end = var_5460_end_0, end_mask = var_5460_end_mask_0, squeeze_mask = var_5460_squeeze_mask_0, x = coreml_update_state_123)[name = string("op_5460_cast_fp16")]; + tensor var_5463_begin_0 = const()[name = string("op_5463_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5463_end_mask_0 = const()[name = string("op_5463_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5463_cast_fp16 = slice_by_index(begin = var_5463_begin_0, end = concat_11, end_mask = var_5463_end_mask_0, x = var_5460_cast_fp16)[name = string("op_5463_cast_fp16")]; + tensor var_5465_shape_cast_fp16 = shape(x = var_5458_cast_fp16)[name = string("op_5465_shape_cast_fp16")]; + int32 gather_535 = const()[name = string("gather_535"), val = int32(1)]; + int32 gather_536 = const()[name = string("gather_536"), val = int32(5)]; + int32 gather_537_axis_0 = const()[name = string("gather_537_axis_0"), val = int32(0)]; + int32 gather_537_batch_dims_0 = const()[name = string("gather_537_batch_dims_0"), val = int32(0)]; + bool gather_537_validate_indices_0 = const()[name = string("gather_537_validate_indices_0"), val = bool(false)]; + string var_5465_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5465_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_537_to_uint16 = const()[name = string("select_537_to_uint16"), val = uint16(2)]; + tensor var_5465_shape_cast_fp16_to_uint16 = cast(dtype = var_5465_shape_cast_fp16_to_uint16_dtype_0, x = var_5465_shape_cast_fp16)[name = string("cast_22")]; + uint16 gather_537_cast_uint16 = gather(axis = gather_537_axis_0, batch_dims = gather_537_batch_dims_0, indices = select_537_to_uint16, validate_indices = gather_537_validate_indices_0, x = var_5465_shape_cast_fp16_to_uint16)[name = string("gather_537_cast_uint16")]; + string gather_537_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_537_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_538 = const()[name = string("gather_538"), val = int32(64)]; + tensor var_5472_axes_0 = const()[name = string("op_5472_axes_0"), val = tensor([2])]; + tensor var_5472_cast_fp16 = expand_dims(axes = var_5472_axes_0, x = var_5458_cast_fp16)[name = string("op_5472_cast_fp16")]; + tensor shape_597_cast_fp16 = shape(x = var_5472_cast_fp16)[name = string("shape_597_cast_fp16")]; + int32 concat_564_axis_0 = const()[name = string("concat_564_axis_0"), val = int32(0)]; + bool concat_564_interleave_0 = const()[name = string("concat_564_interleave_0"), val = bool(false)]; + int32 gather_537_cast_uint16_to_int32 = cast(dtype = gather_537_cast_uint16_to_int32_dtype_0, x = gather_537_cast_uint16)[name = string("cast_21")]; + tensor concat_564 = concat(axis = concat_564_axis_0, interleave = concat_564_interleave_0, values = (gather_535, gather_536, var_89, gather_537_cast_uint16_to_int32, gather_538))[name = string("concat_564")]; + tensor real_div_58 = real_div(x = concat_564, y = shape_597_cast_fp16)[name = string("real_div_58")]; + tensor hidden_states_881_cast_fp16 = tile(reps = real_div_58, x = var_5472_cast_fp16)[name = string("hidden_states_881_cast_fp16")]; + tensor concat_565x = const()[name = string("concat_565x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_119_cast_fp16 = reshape(shape = concat_565x, x = hidden_states_881_cast_fp16)[name = string("key_states_119_cast_fp16")]; + tensor var_5482_shape_cast_fp16 = shape(x = var_5463_cast_fp16)[name = string("op_5482_shape_cast_fp16")]; + int32 gather_539 = const()[name = string("gather_539"), val = int32(1)]; + int32 gather_540 = const()[name = string("gather_540"), val = int32(5)]; + int32 gather_541_axis_0 = const()[name = string("gather_541_axis_0"), val = int32(0)]; + int32 gather_541_batch_dims_0 = const()[name = string("gather_541_batch_dims_0"), val = int32(0)]; + bool gather_541_validate_indices_0 = const()[name = string("gather_541_validate_indices_0"), val = bool(false)]; + string var_5482_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5482_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_541_to_uint16 = const()[name = string("select_541_to_uint16"), val = uint16(2)]; + tensor var_5482_shape_cast_fp16_to_uint16 = cast(dtype = var_5482_shape_cast_fp16_to_uint16_dtype_0, x = var_5482_shape_cast_fp16)[name = string("cast_20")]; + uint16 gather_541_cast_uint16 = gather(axis = gather_541_axis_0, batch_dims = gather_541_batch_dims_0, indices = select_541_to_uint16, validate_indices = gather_541_validate_indices_0, x = var_5482_shape_cast_fp16_to_uint16)[name = string("gather_541_cast_uint16")]; + string gather_541_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_541_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_542 = const()[name = string("gather_542"), val = int32(64)]; + tensor var_5489_axes_0 = const()[name = string("op_5489_axes_0"), val = tensor([2])]; + tensor var_5489_cast_fp16 = expand_dims(axes = var_5489_axes_0, x = var_5463_cast_fp16)[name = string("op_5489_cast_fp16")]; + tensor shape_602_cast_fp16 = shape(x = var_5489_cast_fp16)[name = string("shape_602_cast_fp16")]; + int32 concat_566_axis_0 = const()[name = string("concat_566_axis_0"), val = int32(0)]; + bool concat_566_interleave_0 = const()[name = string("concat_566_interleave_0"), val = bool(false)]; + int32 gather_541_cast_uint16_to_int32 = cast(dtype = gather_541_cast_uint16_to_int32_dtype_0, x = gather_541_cast_uint16)[name = string("cast_19")]; + tensor concat_566 = concat(axis = concat_566_axis_0, interleave = concat_566_interleave_0, values = (gather_539, gather_540, var_89, gather_541_cast_uint16_to_int32, gather_542))[name = string("concat_566")]; + tensor real_div_59 = real_div(x = concat_566, y = shape_602_cast_fp16)[name = string("real_div_59")]; + tensor hidden_states_885_cast_fp16 = tile(reps = real_div_59, x = var_5489_cast_fp16)[name = string("hidden_states_885_cast_fp16")]; + tensor concat_567x = const()[name = string("concat_567x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_119_cast_fp16 = reshape(shape = concat_567x, x = hidden_states_885_cast_fp16)[name = string("value_states_119_cast_fp16")]; + tensor var_5499_shape_cast_fp16 = shape(x = key_states_119_cast_fp16)[name = string("op_5499_shape_cast_fp16")]; + int32 gather_543_axis_0 = const()[name = string("gather_543_axis_0"), val = int32(0)]; + int32 gather_543_batch_dims_0 = const()[name = string("gather_543_batch_dims_0"), val = int32(0)]; + bool gather_543_validate_indices_0 = const()[name = string("gather_543_validate_indices_0"), val = bool(false)]; + string var_5499_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5499_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_543_to_uint16 = const()[name = string("select_543_to_uint16"), val = uint16(2)]; + tensor var_5499_shape_cast_fp16_to_uint16 = cast(dtype = var_5499_shape_cast_fp16_to_uint16_dtype_0, x = var_5499_shape_cast_fp16)[name = string("cast_18")]; + uint16 gather_543_cast_uint16 = gather(axis = gather_543_axis_0, batch_dims = gather_543_batch_dims_0, indices = select_543_to_uint16, validate_indices = gather_543_validate_indices_0, x = var_5499_shape_cast_fp16_to_uint16)[name = string("gather_543_cast_uint16")]; + string gather_543_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_543_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_568_values0_0 = const()[name = string("concat_568_values0_0"), val = int32(1)]; + int32 concat_568_values1_0 = const()[name = string("concat_568_values1_0"), val = int32(1)]; + int32 concat_568_values2_0 = const()[name = string("concat_568_values2_0"), val = int32(0)]; + int32 concat_568_axis_0 = const()[name = string("concat_568_axis_0"), val = int32(0)]; + bool concat_568_interleave_0 = const()[name = string("concat_568_interleave_0"), val = bool(false)]; + int32 gather_543_cast_uint16_to_int32 = cast(dtype = gather_543_cast_uint16_to_int32_dtype_0, x = gather_543_cast_uint16)[name = string("cast_17")]; + tensor concat_568 = concat(axis = concat_568_axis_0, interleave = concat_568_interleave_0, values = (concat_568_values0_0, concat_568_values1_0, concat_568_values2_0, gather_543_cast_uint16_to_int32))[name = string("concat_568")]; + tensor causal_mask_61_begin_0 = const()[name = string("causal_mask_61_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_61_end_mask_0 = const()[name = string("causal_mask_61_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_61_cast_fp16 = slice_by_index(begin = causal_mask_61_begin_0, end = concat_568, end_mask = causal_mask_61_end_mask_0, x = causal_mask)[name = string("causal_mask_61_cast_fp16")]; + tensor attn_output_117_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_61_cast_fp16, key = key_states_119_cast_fp16, query = query_states_119_cast_fp16, value = value_states_119_cast_fp16)[name = string("attn_output_117_cast_fp16")]; + tensor var_5505_perm_0 = const()[name = string("op_5505_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_569_axis_0 = const()[name = string("concat_569_axis_0"), val = int32(0)]; + bool concat_569_interleave_0 = const()[name = string("concat_569_interleave_0"), val = bool(false)]; + int32 gather_527_cast_uint16_to_int32 = cast(dtype = gather_527_cast_uint16_to_int32_dtype_0, x = gather_527_cast_uint16)[name = string("cast_16")]; + tensor concat_569 = concat(axis = concat_569_axis_0, interleave = concat_569_interleave_0, values = (gather_526, gather_527_cast_uint16_to_int32, var_85))[name = string("concat_569")]; + tensor var_5505_cast_fp16 = transpose(perm = var_5505_perm_0, x = attn_output_117_cast_fp16)[name = string("transpose_8")]; + tensor input_233_cast_fp16 = reshape(shape = concat_569, x = var_5505_cast_fp16)[name = string("input_233_cast_fp16")]; + tensor model_model_layers_29_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187916096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188376960))))[name = string("model_model_layers_29_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_206_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_29_self_attn_o_proj_weight_to_fp16_quantized, x = input_233_cast_fp16)[name = string("linear_206_cast_fp16")]; + tensor hidden_states_889_cast_fp16 = add(x = hidden_states_869_cast_fp16, y = linear_206_cast_fp16)[name = string("hidden_states_889_cast_fp16")]; + fp16 var_80_promoted_59_to_fp16 = const()[name = string("op_80_promoted_59_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5514_cast_fp16 = pow(x = hidden_states_889_cast_fp16, y = var_80_promoted_59_to_fp16)[name = string("op_5514_cast_fp16")]; + tensor variance_119_axes_0 = const()[name = string("variance_119_axes_0"), val = tensor([-1])]; + bool variance_119_keep_dims_0 = const()[name = string("variance_119_keep_dims_0"), val = bool(true)]; + tensor variance_119_cast_fp16 = reduce_mean(axes = variance_119_axes_0, keep_dims = variance_119_keep_dims_0, x = var_5514_cast_fp16)[name = string("variance_119_cast_fp16")]; + fp16 var_5517_to_fp16 = const()[name = string("op_5517_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5518_cast_fp16 = add(x = variance_119_cast_fp16, y = var_5517_to_fp16)[name = string("op_5518_cast_fp16")]; + fp32 var_5519_epsilon_0 = const()[name = string("op_5519_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5519_cast_fp16 = rsqrt(epsilon = var_5519_epsilon_0, x = var_5518_cast_fp16)[name = string("op_5519_cast_fp16")]; + tensor hidden_states_893_cast_fp16 = mul(x = hidden_states_889_cast_fp16, y = var_5519_cast_fp16)[name = string("hidden_states_893_cast_fp16")]; + tensor model_model_layers_29_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_29_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188434624)))]; + tensor input_235_cast_fp16 = mul(x = model_model_layers_29_post_attention_layernorm_weight_to_fp16, y = hidden_states_893_cast_fp16)[name = string("input_235_cast_fp16")]; + tensor model_model_layers_29_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188436608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189665472))))[name = string("model_model_layers_29_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_207_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_29_mlp_gate_proj_weight_to_fp16_quantized, x = input_235_cast_fp16)[name = string("linear_207_cast_fp16")]; + tensor var_5531_cast_fp16 = silu(x = linear_207_cast_fp16)[name = string("op_5531_cast_fp16")]; + tensor model_model_layers_29_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189819136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191048000))))[name = string("model_model_layers_29_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_208_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_29_mlp_up_proj_weight_to_fp16_quantized, x = input_235_cast_fp16)[name = string("linear_208_cast_fp16")]; + tensor input_239_cast_fp16 = mul(x = var_5531_cast_fp16, y = linear_208_cast_fp16)[name = string("input_239_cast_fp16")]; + tensor model_model_layers_29_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191201664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192430528))))[name = string("model_model_layers_29_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_209_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_29_mlp_down_proj_weight_to_fp16_quantized, x = input_239_cast_fp16)[name = string("linear_209_cast_fp16")]; + tensor hidden_states_899_cast_fp16 = add(x = hidden_states_889_cast_fp16, y = linear_209_cast_fp16)[name = string("hidden_states_899_cast_fp16")]; + fp16 var_80_promoted_60_to_fp16 = const()[name = string("op_80_promoted_60_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5544_cast_fp16 = pow(x = hidden_states_899_cast_fp16, y = var_80_promoted_60_to_fp16)[name = string("op_5544_cast_fp16")]; + tensor variance_121_axes_0 = const()[name = string("variance_121_axes_0"), val = tensor([-1])]; + bool variance_121_keep_dims_0 = const()[name = string("variance_121_keep_dims_0"), val = bool(true)]; + tensor variance_121_cast_fp16 = reduce_mean(axes = variance_121_axes_0, keep_dims = variance_121_keep_dims_0, x = var_5544_cast_fp16)[name = string("variance_121_cast_fp16")]; + fp16 var_5547_to_fp16 = const()[name = string("op_5547_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5548_cast_fp16 = add(x = variance_121_cast_fp16, y = var_5547_to_fp16)[name = string("op_5548_cast_fp16")]; + fp32 var_5549_epsilon_0 = const()[name = string("op_5549_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5549_cast_fp16 = rsqrt(epsilon = var_5549_epsilon_0, x = var_5548_cast_fp16)[name = string("op_5549_cast_fp16")]; + tensor hidden_states_903_cast_fp16 = mul(x = hidden_states_899_cast_fp16, y = var_5549_cast_fp16)[name = string("hidden_states_903_cast_fp16")]; + tensor model_model_layers_30_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_30_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192584192)))]; + tensor hidden_states_907_cast_fp16 = mul(x = model_model_layers_30_input_layernorm_weight_to_fp16, y = hidden_states_903_cast_fp16)[name = string("hidden_states_907_cast_fp16")]; + tensor var_5560_shape_cast_fp16 = shape(x = hidden_states_907_cast_fp16)[name = string("op_5560_shape_cast_fp16")]; + int32 gather_544 = const()[name = string("gather_544"), val = int32(1)]; + int32 gather_545_axis_0 = const()[name = string("gather_545_axis_0"), val = int32(0)]; + int32 gather_545_batch_dims_0 = const()[name = string("gather_545_batch_dims_0"), val = int32(0)]; + bool gather_545_validate_indices_0 = const()[name = string("gather_545_validate_indices_0"), val = bool(false)]; + string var_5560_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5560_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_545_to_uint16 = const()[name = string("select_545_to_uint16"), val = uint16(1)]; + tensor var_5560_shape_cast_fp16_to_uint16 = cast(dtype = var_5560_shape_cast_fp16_to_uint16_dtype_0, x = var_5560_shape_cast_fp16)[name = string("cast_15")]; + uint16 gather_545_cast_uint16 = gather(axis = gather_545_axis_0, batch_dims = gather_545_batch_dims_0, indices = select_545_to_uint16, validate_indices = gather_545_validate_indices_0, x = var_5560_shape_cast_fp16_to_uint16)[name = string("gather_545_cast_uint16")]; + string gather_545_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_545_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_30_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192586176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193047040))))[name = string("model_model_layers_30_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_210_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_30_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_907_cast_fp16)[name = string("linear_210_cast_fp16")]; + tensor model_model_layers_30_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193104704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193258368))))[name = string("model_model_layers_30_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_211_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_30_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_907_cast_fp16)[name = string("linear_211_cast_fp16")]; + tensor model_model_layers_30_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193277632))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193431296))))[name = string("model_model_layers_30_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_212_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_30_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_907_cast_fp16)[name = string("linear_212_cast_fp16")]; + tensor concat_570x = const()[name = string("concat_570x"), val = tensor([1, -1, 15, 64])]; + tensor var_5569_cast_fp16 = reshape(shape = concat_570x, x = linear_210_cast_fp16)[name = string("op_5569_cast_fp16")]; + tensor q_61_perm_0 = const()[name = string("q_61_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_571x = const()[name = string("concat_571x"), val = tensor([1, -1, 5, 64])]; + tensor var_5572_cast_fp16 = reshape(shape = concat_571x, x = linear_211_cast_fp16)[name = string("op_5572_cast_fp16")]; + tensor k_61_perm_0 = const()[name = string("k_61_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_572x = const()[name = string("concat_572x"), val = tensor([1, -1, 5, 64])]; + tensor var_5575_cast_fp16 = reshape(shape = concat_572x, x = linear_212_cast_fp16)[name = string("op_5575_cast_fp16")]; + tensor v_state_61_perm_0 = const()[name = string("v_state_61_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_61_cast_fp16 = transpose(perm = q_61_perm_0, x = var_5569_cast_fp16)[name = string("transpose_7")]; + tensor var_5579_cast_fp16 = mul(x = q_61_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5579_cast_fp16")]; + tensor x1_121_begin_0 = const()[name = string("x1_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_121_end_0 = const()[name = string("x1_121_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_121_end_mask_0 = const()[name = string("x1_121_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_121_cast_fp16 = slice_by_index(begin = x1_121_begin_0, end = x1_121_end_0, end_mask = x1_121_end_mask_0, x = q_61_cast_fp16)[name = string("x1_121_cast_fp16")]; + tensor x2_121_begin_0 = const()[name = string("x2_121_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_121_end_0 = const()[name = string("x2_121_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_121_end_mask_0 = const()[name = string("x2_121_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_121_cast_fp16 = slice_by_index(begin = x2_121_begin_0, end = x2_121_end_0, end_mask = x2_121_end_mask_0, x = q_61_cast_fp16)[name = string("x2_121_cast_fp16")]; + fp16 const_63_promoted_to_fp16 = const()[name = string("const_63_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5590_cast_fp16 = mul(x = x2_121_cast_fp16, y = const_63_promoted_to_fp16)[name = string("op_5590_cast_fp16")]; + bool var_5592_interleave_0 = const()[name = string("op_5592_interleave_0"), val = bool(false)]; + tensor var_5592_cast_fp16 = concat(axis = var_85, interleave = var_5592_interleave_0, values = (var_5590_cast_fp16, x1_121_cast_fp16))[name = string("op_5592_cast_fp16")]; + tensor var_5593_cast_fp16 = mul(x = var_5592_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5593_cast_fp16")]; + tensor query_states_123_cast_fp16 = add(x = var_5579_cast_fp16, y = var_5593_cast_fp16)[name = string("query_states_123_cast_fp16")]; + tensor k_61_cast_fp16 = transpose(perm = k_61_perm_0, x = var_5572_cast_fp16)[name = string("transpose_6")]; + tensor var_5595_cast_fp16 = mul(x = k_61_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5595_cast_fp16")]; + tensor x1_123_begin_0 = const()[name = string("x1_123_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_123_end_0 = const()[name = string("x1_123_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_123_end_mask_0 = const()[name = string("x1_123_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_123_cast_fp16 = slice_by_index(begin = x1_123_begin_0, end = x1_123_end_0, end_mask = x1_123_end_mask_0, x = k_61_cast_fp16)[name = string("x1_123_cast_fp16")]; + tensor x2_123_begin_0 = const()[name = string("x2_123_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_123_end_0 = const()[name = string("x2_123_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_123_end_mask_0 = const()[name = string("x2_123_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_123_cast_fp16 = slice_by_index(begin = x2_123_begin_0, end = x2_123_end_0, end_mask = x2_123_end_mask_0, x = k_61_cast_fp16)[name = string("x2_123_cast_fp16")]; + fp16 const_64_promoted_to_fp16 = const()[name = string("const_64_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5606_cast_fp16 = mul(x = x2_123_cast_fp16, y = const_64_promoted_to_fp16)[name = string("op_5606_cast_fp16")]; + bool var_5608_interleave_0 = const()[name = string("op_5608_interleave_0"), val = bool(false)]; + tensor var_5608_cast_fp16 = concat(axis = var_85, interleave = var_5608_interleave_0, values = (var_5606_cast_fp16, x1_123_cast_fp16))[name = string("op_5608_cast_fp16")]; + tensor var_5609_cast_fp16 = mul(x = var_5608_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5609_cast_fp16")]; + tensor k_state_61_cast_fp16 = add(x = var_5595_cast_fp16, y = var_5609_cast_fp16)[name = string("k_state_61_cast_fp16")]; + tensor expand_dims_360 = const()[name = string("expand_dims_360"), val = tensor([0])]; + tensor expand_dims_361 = const()[name = string("expand_dims_361"), val = tensor([0])]; + tensor expand_dims_363 = const()[name = string("expand_dims_363"), val = tensor([0])]; + tensor concat_575_values0_0 = const()[name = string("concat_575_values0_0"), val = tensor([30])]; + int32 concat_575_axis_0 = const()[name = string("concat_575_axis_0"), val = int32(0)]; + bool concat_575_interleave_0 = const()[name = string("concat_575_interleave_0"), val = bool(false)]; + tensor concat_575 = concat(axis = concat_575_axis_0, interleave = concat_575_interleave_0, values = (concat_575_values0_0, expand_dims_360, expand_dims_361, expand_dims_2, expand_dims_363))[name = string("concat_575")]; + tensor key_cache_internal_tensor_assign_31_stride_0 = const()[name = string("key_cache_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_31_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_31_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_575, begin_mask = key_cache_internal_tensor_assign_31_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_31_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_31_squeeze_mask_0, stride = key_cache_internal_tensor_assign_31_stride_0, update = k_state_61_cast_fp16, x = coreml_update_state_122)[name = string("key_cache_internal_tensor_assign_31_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_31_cast_fp16, input = key_cache)[name = string("coreml_update_state_124_write_state")]; + tensor coreml_update_state_124 = read_state(input = key_cache)[name = string("coreml_update_state_124")]; + tensor value_cache_internal_tensor_assign_31_stride_0 = const()[name = string("value_cache_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_31_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_31_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_61_cast_fp16 = transpose(perm = v_state_61_perm_0, x = var_5575_cast_fp16)[name = string("transpose_5")]; + tensor value_cache_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_575, begin_mask = value_cache_internal_tensor_assign_31_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_31_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_31_squeeze_mask_0, stride = value_cache_internal_tensor_assign_31_stride_0, update = v_state_61_cast_fp16, x = coreml_update_state_123)[name = string("value_cache_internal_tensor_assign_31_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_31_cast_fp16, input = value_cache)[name = string("coreml_update_state_125_write_state")]; + tensor coreml_update_state_125 = read_state(input = value_cache)[name = string("coreml_update_state_125")]; + tensor var_5632_begin_0 = const()[name = string("op_5632_begin_0"), val = tensor([30, 0, 0, 0, 0])]; + tensor var_5632_end_0 = const()[name = string("op_5632_end_0"), val = tensor([31, 1, 5, 2048, 64])]; + tensor var_5632_end_mask_0 = const()[name = string("op_5632_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5632_squeeze_mask_0 = const()[name = string("op_5632_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5632_cast_fp16 = slice_by_index(begin = var_5632_begin_0, end = var_5632_end_0, end_mask = var_5632_end_mask_0, squeeze_mask = var_5632_squeeze_mask_0, x = coreml_update_state_124)[name = string("op_5632_cast_fp16")]; + tensor var_5635_begin_0 = const()[name = string("op_5635_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5635_end_mask_0 = const()[name = string("op_5635_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5635_cast_fp16 = slice_by_index(begin = var_5635_begin_0, end = concat_11, end_mask = var_5635_end_mask_0, x = var_5632_cast_fp16)[name = string("op_5635_cast_fp16")]; + tensor var_5637_begin_0 = const()[name = string("op_5637_begin_0"), val = tensor([30, 0, 0, 0, 0])]; + tensor var_5637_end_0 = const()[name = string("op_5637_end_0"), val = tensor([31, 1, 5, 2048, 64])]; + tensor var_5637_end_mask_0 = const()[name = string("op_5637_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5637_squeeze_mask_0 = const()[name = string("op_5637_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5637_cast_fp16 = slice_by_index(begin = var_5637_begin_0, end = var_5637_end_0, end_mask = var_5637_end_mask_0, squeeze_mask = var_5637_squeeze_mask_0, x = coreml_update_state_125)[name = string("op_5637_cast_fp16")]; + tensor var_5640_begin_0 = const()[name = string("op_5640_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5640_end_mask_0 = const()[name = string("op_5640_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5640_cast_fp16 = slice_by_index(begin = var_5640_begin_0, end = concat_11, end_mask = var_5640_end_mask_0, x = var_5637_cast_fp16)[name = string("op_5640_cast_fp16")]; + tensor var_5642_shape_cast_fp16 = shape(x = var_5635_cast_fp16)[name = string("op_5642_shape_cast_fp16")]; + int32 gather_553 = const()[name = string("gather_553"), val = int32(1)]; + int32 gather_554 = const()[name = string("gather_554"), val = int32(5)]; + int32 gather_555_axis_0 = const()[name = string("gather_555_axis_0"), val = int32(0)]; + int32 gather_555_batch_dims_0 = const()[name = string("gather_555_batch_dims_0"), val = int32(0)]; + bool gather_555_validate_indices_0 = const()[name = string("gather_555_validate_indices_0"), val = bool(false)]; + string var_5642_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5642_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_555_to_uint16 = const()[name = string("select_555_to_uint16"), val = uint16(2)]; + tensor var_5642_shape_cast_fp16_to_uint16 = cast(dtype = var_5642_shape_cast_fp16_to_uint16_dtype_0, x = var_5642_shape_cast_fp16)[name = string("cast_14")]; + uint16 gather_555_cast_uint16 = gather(axis = gather_555_axis_0, batch_dims = gather_555_batch_dims_0, indices = select_555_to_uint16, validate_indices = gather_555_validate_indices_0, x = var_5642_shape_cast_fp16_to_uint16)[name = string("gather_555_cast_uint16")]; + string gather_555_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_555_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_556 = const()[name = string("gather_556"), val = int32(64)]; + tensor var_5649_axes_0 = const()[name = string("op_5649_axes_0"), val = tensor([2])]; + tensor var_5649_cast_fp16 = expand_dims(axes = var_5649_axes_0, x = var_5635_cast_fp16)[name = string("op_5649_cast_fp16")]; + tensor shape_617_cast_fp16 = shape(x = var_5649_cast_fp16)[name = string("shape_617_cast_fp16")]; + int32 concat_583_axis_0 = const()[name = string("concat_583_axis_0"), val = int32(0)]; + bool concat_583_interleave_0 = const()[name = string("concat_583_interleave_0"), val = bool(false)]; + int32 gather_555_cast_uint16_to_int32 = cast(dtype = gather_555_cast_uint16_to_int32_dtype_0, x = gather_555_cast_uint16)[name = string("cast_13")]; + tensor concat_583 = concat(axis = concat_583_axis_0, interleave = concat_583_interleave_0, values = (gather_553, gather_554, var_89, gather_555_cast_uint16_to_int32, gather_556))[name = string("concat_583")]; + tensor real_div_60 = real_div(x = concat_583, y = shape_617_cast_fp16)[name = string("real_div_60")]; + tensor hidden_states_911_cast_fp16 = tile(reps = real_div_60, x = var_5649_cast_fp16)[name = string("hidden_states_911_cast_fp16")]; + tensor concat_584x = const()[name = string("concat_584x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_123_cast_fp16 = reshape(shape = concat_584x, x = hidden_states_911_cast_fp16)[name = string("key_states_123_cast_fp16")]; + tensor var_5659_shape_cast_fp16 = shape(x = var_5640_cast_fp16)[name = string("op_5659_shape_cast_fp16")]; + int32 gather_557 = const()[name = string("gather_557"), val = int32(1)]; + int32 gather_558 = const()[name = string("gather_558"), val = int32(5)]; + int32 gather_559_axis_0 = const()[name = string("gather_559_axis_0"), val = int32(0)]; + int32 gather_559_batch_dims_0 = const()[name = string("gather_559_batch_dims_0"), val = int32(0)]; + bool gather_559_validate_indices_0 = const()[name = string("gather_559_validate_indices_0"), val = bool(false)]; + string var_5659_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5659_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_559_to_uint16 = const()[name = string("select_559_to_uint16"), val = uint16(2)]; + tensor var_5659_shape_cast_fp16_to_uint16 = cast(dtype = var_5659_shape_cast_fp16_to_uint16_dtype_0, x = var_5659_shape_cast_fp16)[name = string("cast_12")]; + uint16 gather_559_cast_uint16 = gather(axis = gather_559_axis_0, batch_dims = gather_559_batch_dims_0, indices = select_559_to_uint16, validate_indices = gather_559_validate_indices_0, x = var_5659_shape_cast_fp16_to_uint16)[name = string("gather_559_cast_uint16")]; + string gather_559_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_559_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_560 = const()[name = string("gather_560"), val = int32(64)]; + tensor var_5666_axes_0 = const()[name = string("op_5666_axes_0"), val = tensor([2])]; + tensor var_5666_cast_fp16 = expand_dims(axes = var_5666_axes_0, x = var_5640_cast_fp16)[name = string("op_5666_cast_fp16")]; + tensor shape_622_cast_fp16 = shape(x = var_5666_cast_fp16)[name = string("shape_622_cast_fp16")]; + int32 concat_585_axis_0 = const()[name = string("concat_585_axis_0"), val = int32(0)]; + bool concat_585_interleave_0 = const()[name = string("concat_585_interleave_0"), val = bool(false)]; + int32 gather_559_cast_uint16_to_int32 = cast(dtype = gather_559_cast_uint16_to_int32_dtype_0, x = gather_559_cast_uint16)[name = string("cast_11")]; + tensor concat_585 = concat(axis = concat_585_axis_0, interleave = concat_585_interleave_0, values = (gather_557, gather_558, var_89, gather_559_cast_uint16_to_int32, gather_560))[name = string("concat_585")]; + tensor real_div_61 = real_div(x = concat_585, y = shape_622_cast_fp16)[name = string("real_div_61")]; + tensor hidden_states_915_cast_fp16 = tile(reps = real_div_61, x = var_5666_cast_fp16)[name = string("hidden_states_915_cast_fp16")]; + tensor concat_586x = const()[name = string("concat_586x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_123_cast_fp16 = reshape(shape = concat_586x, x = hidden_states_915_cast_fp16)[name = string("value_states_123_cast_fp16")]; + tensor var_5676_shape_cast_fp16 = shape(x = key_states_123_cast_fp16)[name = string("op_5676_shape_cast_fp16")]; + int32 gather_561_axis_0 = const()[name = string("gather_561_axis_0"), val = int32(0)]; + int32 gather_561_batch_dims_0 = const()[name = string("gather_561_batch_dims_0"), val = int32(0)]; + bool gather_561_validate_indices_0 = const()[name = string("gather_561_validate_indices_0"), val = bool(false)]; + string var_5676_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5676_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_561_to_uint16 = const()[name = string("select_561_to_uint16"), val = uint16(2)]; + tensor var_5676_shape_cast_fp16_to_uint16 = cast(dtype = var_5676_shape_cast_fp16_to_uint16_dtype_0, x = var_5676_shape_cast_fp16)[name = string("cast_10")]; + uint16 gather_561_cast_uint16 = gather(axis = gather_561_axis_0, batch_dims = gather_561_batch_dims_0, indices = select_561_to_uint16, validate_indices = gather_561_validate_indices_0, x = var_5676_shape_cast_fp16_to_uint16)[name = string("gather_561_cast_uint16")]; + string gather_561_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_561_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_587_values0_0 = const()[name = string("concat_587_values0_0"), val = int32(1)]; + int32 concat_587_values1_0 = const()[name = string("concat_587_values1_0"), val = int32(1)]; + int32 concat_587_values2_0 = const()[name = string("concat_587_values2_0"), val = int32(0)]; + int32 concat_587_axis_0 = const()[name = string("concat_587_axis_0"), val = int32(0)]; + bool concat_587_interleave_0 = const()[name = string("concat_587_interleave_0"), val = bool(false)]; + int32 gather_561_cast_uint16_to_int32 = cast(dtype = gather_561_cast_uint16_to_int32_dtype_0, x = gather_561_cast_uint16)[name = string("cast_9")]; + tensor concat_587 = concat(axis = concat_587_axis_0, interleave = concat_587_interleave_0, values = (concat_587_values0_0, concat_587_values1_0, concat_587_values2_0, gather_561_cast_uint16_to_int32))[name = string("concat_587")]; + tensor causal_mask_63_begin_0 = const()[name = string("causal_mask_63_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_63_end_mask_0 = const()[name = string("causal_mask_63_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_63_cast_fp16 = slice_by_index(begin = causal_mask_63_begin_0, end = concat_587, end_mask = causal_mask_63_end_mask_0, x = causal_mask)[name = string("causal_mask_63_cast_fp16")]; + tensor attn_output_121_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_63_cast_fp16, key = key_states_123_cast_fp16, query = query_states_123_cast_fp16, value = value_states_123_cast_fp16)[name = string("attn_output_121_cast_fp16")]; + tensor var_5682_perm_0 = const()[name = string("op_5682_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_588_axis_0 = const()[name = string("concat_588_axis_0"), val = int32(0)]; + bool concat_588_interleave_0 = const()[name = string("concat_588_interleave_0"), val = bool(false)]; + int32 gather_545_cast_uint16_to_int32 = cast(dtype = gather_545_cast_uint16_to_int32_dtype_0, x = gather_545_cast_uint16)[name = string("cast_8")]; + tensor concat_588 = concat(axis = concat_588_axis_0, interleave = concat_588_interleave_0, values = (gather_544, gather_545_cast_uint16_to_int32, var_85))[name = string("concat_588")]; + tensor var_5682_cast_fp16 = transpose(perm = var_5682_perm_0, x = attn_output_121_cast_fp16)[name = string("transpose_4")]; + tensor input_241_cast_fp16 = reshape(shape = concat_588, x = var_5682_cast_fp16)[name = string("input_241_cast_fp16")]; + tensor model_model_layers_30_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193450560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193911424))))[name = string("model_model_layers_30_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_213_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_30_self_attn_o_proj_weight_to_fp16_quantized, x = input_241_cast_fp16)[name = string("linear_213_cast_fp16")]; + tensor hidden_states_919_cast_fp16 = add(x = hidden_states_899_cast_fp16, y = linear_213_cast_fp16)[name = string("hidden_states_919_cast_fp16")]; + fp16 var_80_promoted_61_to_fp16 = const()[name = string("op_80_promoted_61_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5691_cast_fp16 = pow(x = hidden_states_919_cast_fp16, y = var_80_promoted_61_to_fp16)[name = string("op_5691_cast_fp16")]; + tensor variance_123_axes_0 = const()[name = string("variance_123_axes_0"), val = tensor([-1])]; + bool variance_123_keep_dims_0 = const()[name = string("variance_123_keep_dims_0"), val = bool(true)]; + tensor variance_123_cast_fp16 = reduce_mean(axes = variance_123_axes_0, keep_dims = variance_123_keep_dims_0, x = var_5691_cast_fp16)[name = string("variance_123_cast_fp16")]; + fp16 var_5694_to_fp16 = const()[name = string("op_5694_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5695_cast_fp16 = add(x = variance_123_cast_fp16, y = var_5694_to_fp16)[name = string("op_5695_cast_fp16")]; + fp32 var_5696_epsilon_0 = const()[name = string("op_5696_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5696_cast_fp16 = rsqrt(epsilon = var_5696_epsilon_0, x = var_5695_cast_fp16)[name = string("op_5696_cast_fp16")]; + tensor hidden_states_923_cast_fp16 = mul(x = hidden_states_919_cast_fp16, y = var_5696_cast_fp16)[name = string("hidden_states_923_cast_fp16")]; + tensor model_model_layers_30_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_30_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193969088)))]; + tensor input_243_cast_fp16 = mul(x = model_model_layers_30_post_attention_layernorm_weight_to_fp16, y = hidden_states_923_cast_fp16)[name = string("input_243_cast_fp16")]; + tensor model_model_layers_30_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193971072))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195199936))))[name = string("model_model_layers_30_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_214_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_30_mlp_gate_proj_weight_to_fp16_quantized, x = input_243_cast_fp16)[name = string("linear_214_cast_fp16")]; + tensor var_5708_cast_fp16 = silu(x = linear_214_cast_fp16)[name = string("op_5708_cast_fp16")]; + tensor model_model_layers_30_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195353600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(196582464))))[name = string("model_model_layers_30_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_215_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_30_mlp_up_proj_weight_to_fp16_quantized, x = input_243_cast_fp16)[name = string("linear_215_cast_fp16")]; + tensor input_247_cast_fp16 = mul(x = var_5708_cast_fp16, y = linear_215_cast_fp16)[name = string("input_247_cast_fp16")]; + tensor model_model_layers_30_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(196736128))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197964992))))[name = string("model_model_layers_30_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_216_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_30_mlp_down_proj_weight_to_fp16_quantized, x = input_247_cast_fp16)[name = string("linear_216_cast_fp16")]; + tensor hidden_states_929_cast_fp16 = add(x = hidden_states_919_cast_fp16, y = linear_216_cast_fp16)[name = string("hidden_states_929_cast_fp16")]; + fp16 var_80_promoted_62_to_fp16 = const()[name = string("op_80_promoted_62_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5721_cast_fp16 = pow(x = hidden_states_929_cast_fp16, y = var_80_promoted_62_to_fp16)[name = string("op_5721_cast_fp16")]; + tensor variance_125_axes_0 = const()[name = string("variance_125_axes_0"), val = tensor([-1])]; + bool variance_125_keep_dims_0 = const()[name = string("variance_125_keep_dims_0"), val = bool(true)]; + tensor variance_125_cast_fp16 = reduce_mean(axes = variance_125_axes_0, keep_dims = variance_125_keep_dims_0, x = var_5721_cast_fp16)[name = string("variance_125_cast_fp16")]; + fp16 var_5724_to_fp16 = const()[name = string("op_5724_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5725_cast_fp16 = add(x = variance_125_cast_fp16, y = var_5724_to_fp16)[name = string("op_5725_cast_fp16")]; + fp32 var_5726_epsilon_0 = const()[name = string("op_5726_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5726_cast_fp16 = rsqrt(epsilon = var_5726_epsilon_0, x = var_5725_cast_fp16)[name = string("op_5726_cast_fp16")]; + tensor hidden_states_933_cast_fp16 = mul(x = hidden_states_929_cast_fp16, y = var_5726_cast_fp16)[name = string("hidden_states_933_cast_fp16")]; + tensor model_model_layers_31_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_31_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198118656)))]; + tensor hidden_states_937_cast_fp16 = mul(x = model_model_layers_31_input_layernorm_weight_to_fp16, y = hidden_states_933_cast_fp16)[name = string("hidden_states_937_cast_fp16")]; + tensor var_5737_shape_cast_fp16 = shape(x = hidden_states_937_cast_fp16)[name = string("op_5737_shape_cast_fp16")]; + int32 gather_562 = const()[name = string("gather_562"), val = int32(1)]; + int32 gather_563_axis_0 = const()[name = string("gather_563_axis_0"), val = int32(0)]; + int32 gather_563_batch_dims_0 = const()[name = string("gather_563_batch_dims_0"), val = int32(0)]; + bool gather_563_validate_indices_0 = const()[name = string("gather_563_validate_indices_0"), val = bool(false)]; + string var_5737_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5737_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_563_to_uint16 = const()[name = string("select_563_to_uint16"), val = uint16(1)]; + tensor var_5737_shape_cast_fp16_to_uint16 = cast(dtype = var_5737_shape_cast_fp16_to_uint16_dtype_0, x = var_5737_shape_cast_fp16)[name = string("cast_7")]; + uint16 gather_563_cast_uint16 = gather(axis = gather_563_axis_0, batch_dims = gather_563_batch_dims_0, indices = select_563_to_uint16, validate_indices = gather_563_validate_indices_0, x = var_5737_shape_cast_fp16_to_uint16)[name = string("gather_563_cast_uint16")]; + string gather_563_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_563_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_31_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198120640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198581504))))[name = string("model_model_layers_31_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_217_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_31_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_937_cast_fp16)[name = string("linear_217_cast_fp16")]; + tensor model_model_layers_31_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198639168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198792832))))[name = string("model_model_layers_31_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_218_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_31_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_937_cast_fp16)[name = string("linear_218_cast_fp16")]; + tensor model_model_layers_31_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198812096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198965760))))[name = string("model_model_layers_31_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_219_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_31_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_937_cast_fp16)[name = string("linear_219_cast_fp16")]; + tensor concat_589x = const()[name = string("concat_589x"), val = tensor([1, -1, 15, 64])]; + tensor var_5746_cast_fp16 = reshape(shape = concat_589x, x = linear_217_cast_fp16)[name = string("op_5746_cast_fp16")]; + tensor q_perm_0 = const()[name = string("q_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_590x = const()[name = string("concat_590x"), val = tensor([1, -1, 5, 64])]; + tensor var_5749_cast_fp16 = reshape(shape = concat_590x, x = linear_218_cast_fp16)[name = string("op_5749_cast_fp16")]; + tensor k_perm_0 = const()[name = string("k_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_591x = const()[name = string("concat_591x"), val = tensor([1, -1, 5, 64])]; + tensor var_5752_cast_fp16 = reshape(shape = concat_591x, x = linear_219_cast_fp16)[name = string("op_5752_cast_fp16")]; + tensor v_state_perm_0 = const()[name = string("v_state_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_cast_fp16 = transpose(perm = q_perm_0, x = var_5746_cast_fp16)[name = string("transpose_3")]; + tensor var_5756_cast_fp16 = mul(x = q_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5756_cast_fp16")]; + tensor x1_125_begin_0 = const()[name = string("x1_125_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_125_end_0 = const()[name = string("x1_125_end_0"), val = tensor([1, 15, 0, 32])]; + tensor x1_125_end_mask_0 = const()[name = string("x1_125_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_125_cast_fp16 = slice_by_index(begin = x1_125_begin_0, end = x1_125_end_0, end_mask = x1_125_end_mask_0, x = q_cast_fp16)[name = string("x1_125_cast_fp16")]; + tensor x2_125_begin_0 = const()[name = string("x2_125_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_125_end_0 = const()[name = string("x2_125_end_0"), val = tensor([1, 15, 0, 64])]; + tensor x2_125_end_mask_0 = const()[name = string("x2_125_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_125_cast_fp16 = slice_by_index(begin = x2_125_begin_0, end = x2_125_end_0, end_mask = x2_125_end_mask_0, x = q_cast_fp16)[name = string("x2_125_cast_fp16")]; + fp16 const_65_promoted_to_fp16 = const()[name = string("const_65_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5767_cast_fp16 = mul(x = x2_125_cast_fp16, y = const_65_promoted_to_fp16)[name = string("op_5767_cast_fp16")]; + bool var_5769_interleave_0 = const()[name = string("op_5769_interleave_0"), val = bool(false)]; + tensor var_5769_cast_fp16 = concat(axis = var_85, interleave = var_5769_interleave_0, values = (var_5767_cast_fp16, x1_125_cast_fp16))[name = string("op_5769_cast_fp16")]; + tensor var_5770_cast_fp16 = mul(x = var_5769_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5770_cast_fp16")]; + tensor query_states_cast_fp16 = add(x = var_5756_cast_fp16, y = var_5770_cast_fp16)[name = string("query_states_cast_fp16")]; + tensor k_cast_fp16 = transpose(perm = k_perm_0, x = var_5749_cast_fp16)[name = string("transpose_2")]; + tensor var_5772_cast_fp16 = mul(x = k_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5772_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 5, 0, 32])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 5, 0, 64])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; + fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5783_cast_fp16 = mul(x = x2_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_5783_cast_fp16")]; + bool var_5785_interleave_0 = const()[name = string("op_5785_interleave_0"), val = bool(false)]; + tensor var_5785_cast_fp16 = concat(axis = var_85, interleave = var_5785_interleave_0, values = (var_5783_cast_fp16, x1_cast_fp16))[name = string("op_5785_cast_fp16")]; + tensor var_5786_cast_fp16 = mul(x = var_5785_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5786_cast_fp16")]; + tensor k_state_cast_fp16 = add(x = var_5772_cast_fp16, y = var_5786_cast_fp16)[name = string("k_state_cast_fp16")]; + tensor expand_dims_372 = const()[name = string("expand_dims_372"), val = tensor([0])]; + tensor expand_dims_373 = const()[name = string("expand_dims_373"), val = tensor([0])]; + tensor expand_dims_375 = const()[name = string("expand_dims_375"), val = tensor([0])]; + tensor concat_594_values0_0 = const()[name = string("concat_594_values0_0"), val = tensor([31])]; + int32 concat_594_axis_0 = const()[name = string("concat_594_axis_0"), val = int32(0)]; + bool concat_594_interleave_0 = const()[name = string("concat_594_interleave_0"), val = bool(false)]; + tensor concat_594 = concat(axis = concat_594_axis_0, interleave = concat_594_interleave_0, values = (concat_594_values0_0, expand_dims_372, expand_dims_373, expand_dims_2, expand_dims_375))[name = string("concat_594")]; + tensor key_cache_internal_tensor_assign_32_stride_0 = const()[name = string("key_cache_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_32_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_32_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_594, begin_mask = key_cache_internal_tensor_assign_32_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_32_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_32_squeeze_mask_0, stride = key_cache_internal_tensor_assign_32_stride_0, update = k_state_cast_fp16, x = coreml_update_state_124)[name = string("key_cache_internal_tensor_assign_32_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_32_cast_fp16, input = key_cache)[name = string("coreml_update_state_126_write_state")]; + tensor coreml_update_state_126 = read_state(input = key_cache)[name = string("coreml_update_state_126")]; + tensor value_cache_internal_tensor_assign_32_stride_0 = const()[name = string("value_cache_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_32_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_32_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_cast_fp16 = transpose(perm = v_state_perm_0, x = var_5752_cast_fp16)[name = string("transpose_1")]; + tensor value_cache_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_594, begin_mask = value_cache_internal_tensor_assign_32_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_32_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_32_squeeze_mask_0, stride = value_cache_internal_tensor_assign_32_stride_0, update = v_state_cast_fp16, x = coreml_update_state_125)[name = string("value_cache_internal_tensor_assign_32_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_32_cast_fp16, input = value_cache)[name = string("coreml_update_state_127_write_state")]; + tensor coreml_update_state_127 = read_state(input = value_cache)[name = string("coreml_update_state_127")]; + tensor var_5809_begin_0 = const()[name = string("op_5809_begin_0"), val = tensor([31, 0, 0, 0, 0])]; + tensor var_5809_end_0 = const()[name = string("op_5809_end_0"), val = tensor([32, 1, 5, 2048, 64])]; + tensor var_5809_end_mask_0 = const()[name = string("op_5809_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5809_squeeze_mask_0 = const()[name = string("op_5809_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5809_cast_fp16 = slice_by_index(begin = var_5809_begin_0, end = var_5809_end_0, end_mask = var_5809_end_mask_0, squeeze_mask = var_5809_squeeze_mask_0, x = coreml_update_state_126)[name = string("op_5809_cast_fp16")]; + tensor var_5812_begin_0 = const()[name = string("op_5812_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5812_end_mask_0 = const()[name = string("op_5812_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5812_cast_fp16 = slice_by_index(begin = var_5812_begin_0, end = concat_11, end_mask = var_5812_end_mask_0, x = var_5809_cast_fp16)[name = string("op_5812_cast_fp16")]; + tensor var_5814_begin_0 = const()[name = string("op_5814_begin_0"), val = tensor([31, 0, 0, 0, 0])]; + tensor var_5814_end_0 = const()[name = string("op_5814_end_0"), val = tensor([32, 1, 5, 2048, 64])]; + tensor var_5814_end_mask_0 = const()[name = string("op_5814_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5814_squeeze_mask_0 = const()[name = string("op_5814_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5814_cast_fp16 = slice_by_index(begin = var_5814_begin_0, end = var_5814_end_0, end_mask = var_5814_end_mask_0, squeeze_mask = var_5814_squeeze_mask_0, x = coreml_update_state_127)[name = string("op_5814_cast_fp16")]; + tensor var_5817_begin_0 = const()[name = string("op_5817_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5817_end_mask_0 = const()[name = string("op_5817_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5817_cast_fp16 = slice_by_index(begin = var_5817_begin_0, end = concat_11, end_mask = var_5817_end_mask_0, x = var_5814_cast_fp16)[name = string("op_5817_cast_fp16")]; + tensor var_5819_shape_cast_fp16 = shape(x = var_5812_cast_fp16)[name = string("op_5819_shape_cast_fp16")]; + int32 gather_571 = const()[name = string("gather_571"), val = int32(1)]; + int32 gather_572 = const()[name = string("gather_572"), val = int32(5)]; + int32 gather_573_axis_0 = const()[name = string("gather_573_axis_0"), val = int32(0)]; + int32 gather_573_batch_dims_0 = const()[name = string("gather_573_batch_dims_0"), val = int32(0)]; + bool gather_573_validate_indices_0 = const()[name = string("gather_573_validate_indices_0"), val = bool(false)]; + string var_5819_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5819_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_573_to_uint16 = const()[name = string("select_573_to_uint16"), val = uint16(2)]; + tensor var_5819_shape_cast_fp16_to_uint16 = cast(dtype = var_5819_shape_cast_fp16_to_uint16_dtype_0, x = var_5819_shape_cast_fp16)[name = string("cast_6")]; + uint16 gather_573_cast_uint16 = gather(axis = gather_573_axis_0, batch_dims = gather_573_batch_dims_0, indices = select_573_to_uint16, validate_indices = gather_573_validate_indices_0, x = var_5819_shape_cast_fp16_to_uint16)[name = string("gather_573_cast_uint16")]; + string gather_573_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_573_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_574 = const()[name = string("gather_574"), val = int32(64)]; + tensor var_5826_axes_0 = const()[name = string("op_5826_axes_0"), val = tensor([2])]; + tensor var_5826_cast_fp16 = expand_dims(axes = var_5826_axes_0, x = var_5812_cast_fp16)[name = string("op_5826_cast_fp16")]; + tensor shape_637_cast_fp16 = shape(x = var_5826_cast_fp16)[name = string("shape_637_cast_fp16")]; + int32 concat_602_axis_0 = const()[name = string("concat_602_axis_0"), val = int32(0)]; + bool concat_602_interleave_0 = const()[name = string("concat_602_interleave_0"), val = bool(false)]; + int32 gather_573_cast_uint16_to_int32 = cast(dtype = gather_573_cast_uint16_to_int32_dtype_0, x = gather_573_cast_uint16)[name = string("cast_5")]; + tensor concat_602 = concat(axis = concat_602_axis_0, interleave = concat_602_interleave_0, values = (gather_571, gather_572, var_89, gather_573_cast_uint16_to_int32, gather_574))[name = string("concat_602")]; + tensor real_div_62 = real_div(x = concat_602, y = shape_637_cast_fp16)[name = string("real_div_62")]; + tensor hidden_states_941_cast_fp16 = tile(reps = real_div_62, x = var_5826_cast_fp16)[name = string("hidden_states_941_cast_fp16")]; + tensor concat_603x = const()[name = string("concat_603x"), val = tensor([1, 15, -1, 64])]; + tensor key_states_cast_fp16 = reshape(shape = concat_603x, x = hidden_states_941_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor var_5836_shape_cast_fp16 = shape(x = var_5817_cast_fp16)[name = string("op_5836_shape_cast_fp16")]; + int32 gather_575 = const()[name = string("gather_575"), val = int32(1)]; + int32 gather_576 = const()[name = string("gather_576"), val = int32(5)]; + int32 gather_577_axis_0 = const()[name = string("gather_577_axis_0"), val = int32(0)]; + int32 gather_577_batch_dims_0 = const()[name = string("gather_577_batch_dims_0"), val = int32(0)]; + bool gather_577_validate_indices_0 = const()[name = string("gather_577_validate_indices_0"), val = bool(false)]; + string var_5836_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5836_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_577_to_uint16 = const()[name = string("select_577_to_uint16"), val = uint16(2)]; + tensor var_5836_shape_cast_fp16_to_uint16 = cast(dtype = var_5836_shape_cast_fp16_to_uint16_dtype_0, x = var_5836_shape_cast_fp16)[name = string("cast_4")]; + uint16 gather_577_cast_uint16 = gather(axis = gather_577_axis_0, batch_dims = gather_577_batch_dims_0, indices = select_577_to_uint16, validate_indices = gather_577_validate_indices_0, x = var_5836_shape_cast_fp16_to_uint16)[name = string("gather_577_cast_uint16")]; + string gather_577_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_577_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_578 = const()[name = string("gather_578"), val = int32(64)]; + tensor var_5843_axes_0 = const()[name = string("op_5843_axes_0"), val = tensor([2])]; + tensor var_5843_cast_fp16 = expand_dims(axes = var_5843_axes_0, x = var_5817_cast_fp16)[name = string("op_5843_cast_fp16")]; + tensor shape_642_cast_fp16 = shape(x = var_5843_cast_fp16)[name = string("shape_642_cast_fp16")]; + int32 concat_604_axis_0 = const()[name = string("concat_604_axis_0"), val = int32(0)]; + bool concat_604_interleave_0 = const()[name = string("concat_604_interleave_0"), val = bool(false)]; + int32 gather_577_cast_uint16_to_int32 = cast(dtype = gather_577_cast_uint16_to_int32_dtype_0, x = gather_577_cast_uint16)[name = string("cast_3")]; + tensor concat_604 = concat(axis = concat_604_axis_0, interleave = concat_604_interleave_0, values = (gather_575, gather_576, var_89, gather_577_cast_uint16_to_int32, gather_578))[name = string("concat_604")]; + tensor real_div_63 = real_div(x = concat_604, y = shape_642_cast_fp16)[name = string("real_div_63")]; + tensor hidden_states_945_cast_fp16 = tile(reps = real_div_63, x = var_5843_cast_fp16)[name = string("hidden_states_945_cast_fp16")]; + tensor concat_605x = const()[name = string("concat_605x"), val = tensor([1, 15, -1, 64])]; + tensor value_states_cast_fp16 = reshape(shape = concat_605x, x = hidden_states_945_cast_fp16)[name = string("value_states_cast_fp16")]; + tensor var_5853_shape_cast_fp16 = shape(x = key_states_cast_fp16)[name = string("op_5853_shape_cast_fp16")]; + int32 gather_579_axis_0 = const()[name = string("gather_579_axis_0"), val = int32(0)]; + int32 gather_579_batch_dims_0 = const()[name = string("gather_579_batch_dims_0"), val = int32(0)]; + bool gather_579_validate_indices_0 = const()[name = string("gather_579_validate_indices_0"), val = bool(false)]; + string var_5853_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5853_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_579_to_uint16 = const()[name = string("select_579_to_uint16"), val = uint16(2)]; + tensor var_5853_shape_cast_fp16_to_uint16 = cast(dtype = var_5853_shape_cast_fp16_to_uint16_dtype_0, x = var_5853_shape_cast_fp16)[name = string("cast_2")]; + uint16 gather_579_cast_uint16 = gather(axis = gather_579_axis_0, batch_dims = gather_579_batch_dims_0, indices = select_579_to_uint16, validate_indices = gather_579_validate_indices_0, x = var_5853_shape_cast_fp16_to_uint16)[name = string("gather_579_cast_uint16")]; + string gather_579_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_579_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_606_values0_0 = const()[name = string("concat_606_values0_0"), val = int32(1)]; + int32 concat_606_values1_0 = const()[name = string("concat_606_values1_0"), val = int32(1)]; + int32 concat_606_values2_0 = const()[name = string("concat_606_values2_0"), val = int32(0)]; + int32 concat_606_axis_0 = const()[name = string("concat_606_axis_0"), val = int32(0)]; + bool concat_606_interleave_0 = const()[name = string("concat_606_interleave_0"), val = bool(false)]; + int32 gather_579_cast_uint16_to_int32 = cast(dtype = gather_579_cast_uint16_to_int32_dtype_0, x = gather_579_cast_uint16)[name = string("cast_1")]; + tensor concat_606 = concat(axis = concat_606_axis_0, interleave = concat_606_interleave_0, values = (concat_606_values0_0, concat_606_values1_0, concat_606_values2_0, gather_579_cast_uint16_to_int32))[name = string("concat_606")]; + tensor causal_mask_begin_0 = const()[name = string("causal_mask_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_end_mask_0 = const()[name = string("causal_mask_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_cast_fp16 = slice_by_index(begin = causal_mask_begin_0, end = concat_606, end_mask = causal_mask_end_mask_0, x = causal_mask)[name = string("causal_mask_cast_fp16")]; + tensor attn_output_125_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_cast_fp16, key = key_states_cast_fp16, query = query_states_cast_fp16, value = value_states_cast_fp16)[name = string("attn_output_125_cast_fp16")]; + tensor var_5859_perm_0 = const()[name = string("op_5859_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_607_axis_0 = const()[name = string("concat_607_axis_0"), val = int32(0)]; + bool concat_607_interleave_0 = const()[name = string("concat_607_interleave_0"), val = bool(false)]; + int32 gather_563_cast_uint16_to_int32 = cast(dtype = gather_563_cast_uint16_to_int32_dtype_0, x = gather_563_cast_uint16)[name = string("cast_0")]; + tensor concat_607 = concat(axis = concat_607_axis_0, interleave = concat_607_interleave_0, values = (gather_562, gather_563_cast_uint16_to_int32, var_85))[name = string("concat_607")]; + tensor var_5859_cast_fp16 = transpose(perm = var_5859_perm_0, x = attn_output_125_cast_fp16)[name = string("transpose_0")]; + tensor input_249_cast_fp16 = reshape(shape = concat_607, x = var_5859_cast_fp16)[name = string("input_249_cast_fp16")]; + tensor model_model_layers_31_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198985024))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199445888))))[name = string("model_model_layers_31_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_220_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_31_self_attn_o_proj_weight_to_fp16_quantized, x = input_249_cast_fp16)[name = string("linear_220_cast_fp16")]; + tensor hidden_states_949_cast_fp16 = add(x = hidden_states_929_cast_fp16, y = linear_220_cast_fp16)[name = string("hidden_states_949_cast_fp16")]; + fp16 var_80_promoted_63_to_fp16 = const()[name = string("op_80_promoted_63_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5868_cast_fp16 = pow(x = hidden_states_949_cast_fp16, y = var_80_promoted_63_to_fp16)[name = string("op_5868_cast_fp16")]; + tensor variance_127_axes_0 = const()[name = string("variance_127_axes_0"), val = tensor([-1])]; + bool variance_127_keep_dims_0 = const()[name = string("variance_127_keep_dims_0"), val = bool(true)]; + tensor variance_127_cast_fp16 = reduce_mean(axes = variance_127_axes_0, keep_dims = variance_127_keep_dims_0, x = var_5868_cast_fp16)[name = string("variance_127_cast_fp16")]; + fp16 var_5871_to_fp16 = const()[name = string("op_5871_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5872_cast_fp16 = add(x = variance_127_cast_fp16, y = var_5871_to_fp16)[name = string("op_5872_cast_fp16")]; + fp32 var_5873_epsilon_0 = const()[name = string("op_5873_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5873_cast_fp16 = rsqrt(epsilon = var_5873_epsilon_0, x = var_5872_cast_fp16)[name = string("op_5873_cast_fp16")]; + tensor hidden_states_953_cast_fp16 = mul(x = hidden_states_949_cast_fp16, y = var_5873_cast_fp16)[name = string("hidden_states_953_cast_fp16")]; + tensor model_model_layers_31_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_31_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199503552)))]; + tensor input_251_cast_fp16 = mul(x = model_model_layers_31_post_attention_layernorm_weight_to_fp16, y = hidden_states_953_cast_fp16)[name = string("input_251_cast_fp16")]; + tensor model_model_layers_31_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199505536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200734400))))[name = string("model_model_layers_31_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_221_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_31_mlp_gate_proj_weight_to_fp16_quantized, x = input_251_cast_fp16)[name = string("linear_221_cast_fp16")]; + tensor var_5885_cast_fp16 = silu(x = linear_221_cast_fp16)[name = string("op_5885_cast_fp16")]; + tensor model_model_layers_31_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200888064))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202116928))))[name = string("model_model_layers_31_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_222_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_31_mlp_up_proj_weight_to_fp16_quantized, x = input_251_cast_fp16)[name = string("linear_222_cast_fp16")]; + tensor input_255_cast_fp16 = mul(x = var_5885_cast_fp16, y = linear_222_cast_fp16)[name = string("input_255_cast_fp16")]; + tensor model_model_layers_31_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202270592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203499456))))[name = string("model_model_layers_31_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_223_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_31_mlp_down_proj_weight_to_fp16_quantized, x = input_255_cast_fp16)[name = string("linear_223_cast_fp16")]; + tensor hidden_states_959_cast_fp16 = add(x = hidden_states_949_cast_fp16, y = linear_223_cast_fp16)[name = string("hidden_states_959_cast_fp16")]; + fp16 var_80_promoted_64_to_fp16 = const()[name = string("op_80_promoted_64_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5894_cast_fp16 = pow(x = hidden_states_959_cast_fp16, y = var_80_promoted_64_to_fp16)[name = string("op_5894_cast_fp16")]; + tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([-1])]; + bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; + tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_5894_cast_fp16)[name = string("variance_cast_fp16")]; + fp16 var_5897_to_fp16 = const()[name = string("op_5897_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5898_cast_fp16 = add(x = variance_cast_fp16, y = var_5897_to_fp16)[name = string("op_5898_cast_fp16")]; + fp32 var_5899_epsilon_0 = const()[name = string("op_5899_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5899_cast_fp16 = rsqrt(epsilon = var_5899_epsilon_0, x = var_5898_cast_fp16)[name = string("op_5899_cast_fp16")]; + tensor hidden_states_963_cast_fp16 = mul(x = hidden_states_959_cast_fp16, y = var_5899_cast_fp16)[name = string("hidden_states_963_cast_fp16")]; + tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203653120)))]; + tensor hidden_states_cast_fp16 = mul(x = model_model_norm_weight_to_fp16, y = hidden_states_963_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor linear_224_bias_0_to_fp16 = const()[name = string("linear_224_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203655104)))]; + tensor logits = linear(bias = linear_224_bias_0_to_fp16, weight = model_model_embed_tokens_weight_to_fp16_quantized, x = hidden_states_cast_fp16)[name = string("linear_224_cast_fp16")]; + } -> (logits); +} \ No newline at end of file